# Prepare environment
import os, sys
sys.path.insert(0, os.path.abspath('..'))
from io import StringIO
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# Prepare input data files
gs1_csv = StringIO("""
symbol, barsize, date, close
GS, 5 min, 2016-07-12 10:35:00-07:00, 140.05
GS, 5 min, 2016-07-12 11:20:00-07:00, 141.34
""")
gs2_csv = StringIO("""
symbol, barSize, datetime, close, volume
GS, 5 min, 2016-07-12 10:35:00-07:00, 140.05, 344428
""")
fb5min_csv = StringIO("""
time, c, vol
2016-07-21 09:30:00, 120.05, 234242
2016-07-21 09:35:00, 120.32, 410842
""")
fb1min_csv = StringIO("""
time, c, vol
2016-07-25 09:40:00, 120.47, 579638
2016-07-25 09:41:00, 120.82, 192476
""")
amzn_csv = StringIO("""
symb, bar, date, close, volume
AMZN, 1 day, 2016-07-21, 749.22, 27917
AMZN, 1 day, 2016-07-22, 738.87, 36662
AMZN, 1 day, 2016-07-23, 727.23, 8766
""")
df_gs1, df_gs2, df_fb5m, df_fb1m, df_amzn = [
pd.read_csv(f)
for f in (gs1_csv, gs2_csv, fb5min_csv, fb1min_csv, amzn_csv)]
print(df_gs1)
print(df_gs2)
print(df_fb5m)
print(df_fb1m)
print(df_amzn)
symbol barsize date close 0 GS 5 min 2016-07-12 10:35:00-07:00 140.05 1 GS 5 min 2016-07-12 11:20:00-07:00 141.34 symbol barSize datetime close volume 0 GS 5 min 2016-07-12 10:35:00-07:00 140.05 344428 time c vol 0 2016-07-21 09:30:00 120.05 234242 1 2016-07-21 09:35:00 120.32 410842 time c vol 0 2016-07-25 09:40:00 120.47 579638 1 2016-07-25 09:41:00 120.82 192476 symb bar date close volume 0 AMZN 1 day 2016-07-21 749.22 27917 1 AMZN 1 day 2016-07-22 738.87 36662 2 AMZN 1 day 2016-07-23 727.23 8766
Data are stored in self.df
, a composition design. Column and index names are standardized for the DataFrame, and a pandas.MultiIndex is created.
import pytz
from ibstract import MarketDataBlock
blk = MarketDataBlock(df_gs1, datatype='TRADES', tz=pytz.timezone('US/Eastern'))
print(blk)
closing Symbol DataType BarSize TickerTime GS TRADES 5m 2016-07-12 13:35:00-04:00 140.05 2016-07-12 14:20:00-04:00 141.34
The date/time column in strings are converted to pandas.DatetimeIndex
.
When creating a MarketDataBlock instance, naive time stamps are localized, or a fixed-offset time zone is converted to region-based pytz.timezone
.
print("\nTickerTime type:", type(blk.df.index.levels[3]))
print("Time zone:", blk.tzinfo, type(blk.tzinfo))
TickerTime type: <class 'pandas.core.indexes.datetimes.DatetimeIndex'> Time zone: US/Eastern <class 'pytz.tzfile.US/Eastern'>
pandas.DataFrame
¶Update from a pandas.DataFrame
will combine columns. N/A data in integer columns are converted to -1.
blk.update(df_gs2, datatype='TRADES', tz=pytz.timezone('US/Eastern'))
blk
closing volume Symbol DataType BarSize TickerTime GS TRADES 5m 2016-07-12 13:35:00-04:00 140.05 344428 2016-07-12 14:20:00-04:00 141.34 -1
Update() can deal with a DataFrame
having naive time stamps, or different BarSize rows, or without a BarSize column.
blk.update(df_fb5m, symbol='FB', datatype='TRADES', barsize='5m', tz=pytz.timezone('US/Eastern'))
blk.update(df_fb1m, symbol='FB', datatype='TRADES', barsize='1m', tz=pytz.timezone('US/Eastern'))
blk
closing volume Symbol DataType BarSize TickerTime FB TRADES 1m 2016-07-25 09:40:00-04:00 120.47 579638 2016-07-25 09:41:00-04:00 120.82 192476 5m 2016-07-21 09:30:00-04:00 120.05 234242 2016-07-21 09:35:00-04:00 120.32 410842 GS TRADES 5m 2016-07-12 13:35:00-04:00 140.05 344428 2016-07-12 14:20:00-04:00 141.34 -1
** Combine with another MarketDataBlock instance is easier than updating from a DataFrame. **
blk_amzn = MarketDataBlock(df_amzn, datatype='TRADES', tz=pytz.timezone('US/Eastern'))
blk.combine(blk_amzn)
blk
closing volume Symbol DataType BarSize TickerTime AMZN TRADES 1d 2016-07-21 00:00:00-04:00 749.22 27917 2016-07-22 00:00:00-04:00 738.87 36662 2016-07-23 00:00:00-04:00 727.23 8766 FB TRADES 1m 2016-07-25 09:40:00-04:00 120.47 579638 2016-07-25 09:41:00-04:00 120.82 192476 5m 2016-07-21 09:30:00-04:00 120.05 234242 2016-07-21 09:35:00-04:00 120.32 410842 GS TRADES 5m 2016-07-12 13:35:00-04:00 140.05 344428 2016-07-12 14:20:00-04:00 141.34 -1