Skip to content

Instantly share code, notes, and snippets.

@wassname
Last active September 11, 2022 21:28
Show Gist options
  • Select an option

  • Save wassname/a0a75f133831eed1113d052c67cf8633 to your computer and use it in GitHub Desktop.

Select an option

Save wassname/a0a75f133831eed1113d052c67cf8633 to your computer and use it in GitHub Desktop.
Simple class to append value to a hdf5 file on disc (useful for building keras datasets)
import numpy as np
import h5py
class HDF5Store(object):
"""
Simple class to append value to a hdf5 file on disc (usefull for building keras datasets)
Params:
datapath: filepath of h5 file
dataset: dataset name within the file
shape: dataset shape (not counting main/batch axis)
dtype: numpy dtype
Usage:
hdf5_store = HDF5Store('/tmp/hdf5_store.h5','X', shape=(20,20,3))
x = np.random.random(hdf5_store.shape)
hdf5_store.append(x)
hdf5_store.append(x)
From https://gist.github.com/wassname/a0a75f133831eed1113d052c67cf8633
"""
def __init__(self, datapath, dataset, shape, dtype=np.float32, compression="gzip", chunk_len=1):
self.datapath = datapath
self.dataset = dataset
self.shape = shape
self.i = 0
with h5py.File(self.datapath, mode='w') as h5f:
self.dset = h5f.create_dataset(
dataset,
shape=(0, ) + shape,
maxshape=(None, ) + shape,
dtype=dtype,
compression=compression,
chunks=(chunk_len, ) + shape)
def append(self, values):
with h5py.File(self.datapath, mode='a') as h5f:
dset = h5f[self.dataset]
dset.resize((self.i + 1, ) + shape)
dset[self.i] = [values]
self.i += 1
h5f.flush()
# test
shape = (20,20,3)
hdf5_store = HDF5Store('/tmp/hdf5_store.h5','X', shape=shape)
for _ in range(10):
hdf5_store.append(np.random.random(shape))
@laolihaile
Copy link

Thank you for your reply. I will try the method you suggested😊

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment