# Prepare environment
import os, sys
sys.path.insert(0, os.path.abspath('..'))
from io import StringIO
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
# Prepare input data files
gs1_csv = StringIO("""
symbol, barsize, date, close
GS, 5 min, 2016-07-12 10:35:00-07:00, 140.05
GS, 5 min, 2016-07-12 11:20:00-07:00, 141.34
""")
gs2_csv = StringIO("""
symbol, barSize, datetime, close, volume
GS, 5 min, 2016-07-12 10:35:00-07:00, 140.05, 344428
""")
fb5min_csv = StringIO("""
time, c, vol
2016-07-21 09:30:00, 120.05, 234242
2016-07-21 09:35:00, 120.32, 410842
""")
fb1min_csv = StringIO("""
time, c, vol
2016-07-25 09:40:00, 120.47, 579638
2016-07-25 09:41:00, 120.82, 192476
""")
amzn_csv = StringIO("""
symb, bar, date, close, volume
AMZN, 1 day, 2016-07-21, 749.22, 27917
AMZN, 1 day, 2016-07-22, 738.87, 36662
AMZN, 1 day, 2016-07-23, 727.23, 8766
""")
df_gs1, df_gs2, df_fb5m, df_fb1m, df_amzn = [
pd.read_csv(f)
for f in (gs1_csv, gs2_csv, fb5min_csv, fb1min_csv, amzn_csv)]
print(df_gs1)
print(df_gs2)
print(df_fb5m)
print(df_fb1m)
print(df_amzn)
Data are stored in self.df
, a composition design. Column and index names are standardized for the DataFrame, and a pandas.MultiIndex is created.
import pytz
from ibstract import MarketDataBlock
blk = MarketDataBlock(df_gs1, datatype='TRADES', tz=pytz.timezone('US/Eastern'))
print(blk)
The date/time column in strings are converted to pandas.DatetimeIndex
.
When creating a MarketDataBlock instance, naive time stamps are localized, or a fixed-offset time zone is converted to region-based pytz.timezone
.
print("\nTickerTime type:", type(blk.df.index.levels[3]))
print("Time zone:", blk.tzinfo, type(blk.tzinfo))
pandas.DataFrame
¶Update from a pandas.DataFrame
will combine columns. N/A data in integer columns are converted to -1.
blk.update(df_gs2, datatype='TRADES', tz=pytz.timezone('US/Eastern'))
blk
Update() can deal with a DataFrame
having naive time stamps, or different BarSize rows, or without a BarSize column.
blk.update(df_fb5m, symbol='FB', datatype='TRADES', barsize='5m', tz=pytz.timezone('US/Eastern'))
blk.update(df_fb1m, symbol='FB', datatype='TRADES', barsize='1m', tz=pytz.timezone('US/Eastern'))
blk
Combine with another MarketDataBlock instance is easier than updating from a DataFrame.
blk_amzn = MarketDataBlock(df_amzn, datatype='TRADES', tz=pytz.timezone('US/Eastern'))
blk.combine(blk_amzn)
blk