df = DataFrame({'A' : np.random.randn(10), 'B' : 'foo'})
df.to_csv('data/test_iterator.csv',mode='w')
df.to_hdf('data/test_iterator.h5','df',mode='w',format='table',data_columns=True)
for df in pd.read_csv('data/test_iterator.csv',chunksize=3,index_col=0):
print df
A B 0 0.548203 foo 1 0.513688 foo 2 0.260623 foo A B 3 -1.474517 foo 4 -2.653109 foo 5 -0.201879 foo A B 6 0.850183 foo 7 -0.796159 foo 8 -0.874545 foo A B 9 -0.272888 foo
for df in pd.read_hdf('data/test_iterator.h5','df',chunksize=3):
print df
A B 0 0.548203 foo 1 0.513688 foo 2 0.260623 foo A B 3 -1.474517 foo 4 -2.653109 foo 5 -0.201879 foo A B 6 0.850183 foo 7 -0.796159 foo 8 -0.874545 foo A B 9 -0.272888 foo
pd.read_csv('data/test_iterator.csv',usecols=[0,'B'],index_col=0)
B | |
---|---|
0 | foo |
1 | foo |
2 | foo |
3 | foo |
4 | foo |
5 | foo |
6 | foo |
7 | foo |
8 | foo |
9 | foo |
# this is actually a reindex
pd.read_hdf('data/test_iterator.h5','df',columns=['B'])
B | |
---|---|
0 | foo |
1 | foo |
2 | foo |
3 | foo |
4 | foo |
5 | foo |
6 | foo |
7 | foo |
8 | foo |
9 | foo |
with pd.HDFStore('data/test_iterator.h5') as store:
print store.select_column('df','B')
0 foo 1 foo 2 foo 3 foo 4 foo 5 foo 6 foo 7 foo 8 foo 9 foo dtype: object