Perform PCA of features on Thumos14-val.
A is [n_samples, feat_dim] tall matrix i.e. n_samples >> feat_dim. We compute A^T A out of core to perform dimensionality reduction of feat_dim using in-memory eigen-decomposition.
| #!/bin/bash/env python | |
| """ | |
| PCA done via matrix multiplication out-of-core. It is here just to be | |
| informative i.e. hostile and full of dependencies parsing of inputs. | |
| """ | |
| import time | |
| import h5py | |
| import hickle as hkl | |
| import numpy as np | |
| THUMOS14_VAL = 'data/thumos14/c3d/val_c3d_temporal.hdf5' | |
| def main(h5file=THUMOS14_VAL, t_size=16, t_stride=8, feat_dim=4096, | |
| source='c3d_features', log_loop=500000): | |
| print time.ctime(), 'start: loading hdf5' | |
| fid = h5py.File(h5file, 'r') | |
| print time.ctime(), 'finish: loading hdf5' | |
| # Compute mean | |
| print time.ctime(), 'start: compute mean' | |
| x_mean, n = np.zeros((1, feat_dim), dtype=np.float32), 0 | |
| for i, v in fid.iteritems(): | |
| feat = v[source][:] | |
| n += feat.shape[0] | |
| x_mean += feat.sum(axis=0) | |
| x_mean /= n | |
| print time.ctime(), 'finish: compute mean' | |
| # Compute A.T A | |
| print time.ctime(), 'start: out-of-core matrix multiplication' | |
| j, n_videos = 0, len(fid.keys()) | |
| ATA = np.zeros((feat_dim, feat_dim), dtype=np.float32) | |
| for i, v in fid.iteritems(): | |
| feat = v[source][:] | |
| feat_ = feat - x_mean | |
| ATA += np.dot(feat_.T, feat_) | |
| j += 1 | |
| if j % log_loop == 0: | |
| print time.ctime(), 'Iteration {}/{}'.format(j, n_videos) | |
| print time.ctime(), 'finish: out-of-core matrix multiplication' | |
| # SVD | |
| print time.ctime(), 'start: SVD in memory' | |
| U, S, _ = np.linalg.svd(ATA) | |
| print time.ctime(), 'finish: SVD in memory' | |
| print time.ctime(), 'serializing ...' | |
| hkl.dump({'x_mean': x_mean, 'U': U, 'S': S}, 'pca_val_annot_thumos14.hkl') | |
| if __name__ == '__main__': | |
| main() |