!ls data !head -n 3 data/F1\ Practice.txt from lxml import etree pl=[] practice_f='data/F1 Practice.txt' for xml in open(practice_f, 'r'): pl.append(etree.fromstring(xml)) pl[100].attrib pl[100][0].attrib import pandas as pd #Hacky load and parse of each row in the datafile pl=[] for xml in open('data/F1 Practice.txt', 'r'): pl.append(etree.fromstring(xml)) #Dataframe for current state timing screen df_practice_pos=pd.DataFrame(columns=[ "timestamp", "time", "classpos", "classpos_colour", "racingNumber","racingNumber_colour", "name","name_colour", ],index=range(50)) #Column mappings practiceMap={ '1':'classpos', '2':'racingNumber', '3':'name', '4':'laptime', '5':'gap', '6':'sector1', '7':'sector2', '8':'sector3', '9':'laps', '21':'sector1_best', '22':'sector2_best', '23':'sector3_best' } def parse_practice(p,df_practice_pos): if p.attrib['identifier']=='101' and 'sessionstate' not in p[0].attrib: if p[0].attrib['column'] not in ['10','21','22','23']: colname=practiceMap[p[0].attrib['column']] row=int(p[0].attrib['row'])-1 df_practice_pos.ix[row]['timestamp']=p.attrib['timestamp'] tt=p.attrib['timestamp'].replace('.',':').split(':') df_practice_pos.ix[row]['time'] = datetime.time(int(tt[0]),int(tt[1]),int(tt[2]),int(tt[3])*1000) df_practice_pos.ix[row][colname]=p[0].attrib['value'] df_practice_pos.ix[row][colname+'_colour']=p[0].attrib['colour'] return df_practice_pos for p in pl[:2850]: df_practice_pos=parse_practice(p,df_practice_pos) df_practice_pos