import pandas as pd
df=pd.read_csv('ex/data/days-simulated-v2.tsv')
#target structure
df.head()
day | |
---|---|
0 | 0,270,5,32,10,73,16,25,5,165,2,35,4,300,1,53,1... |
1 | 4,150,16,7,4,623,16,5,8,35,16,20,5,30,2,10,8,1... |
2 | 0,270,1,75,16,30,9,15,16,5,8,40,16,5,10,10,16,... |
3 | 0,240,1,60,10,50,16,5,5,55,2,32,16,8,10,10,16,... |
4 | 0,170,2,20,16,5,4,285,2,15,3,205,16,5,6,6,16,1... |
df=pd.read_html('ex/3.html')
import matplotlib.pyplot as plt
%matplotlib inline
df[0].head()
– | 05:30-07:00 – | 07:00-08:30 – | 08:30-10:00 – | 10:00-11:30 – | 11:30-13:00 – | 13:00-14:30 – | 14:30-16:00 – | 16:00-17:30 – | 17:30-19:00 – | 19:00-20:30 – | 20:30-22:00 – | 22:00-23:30 – | 23:30-01:00 – | 01:00-02:30 – | 02:30-04:00 – | 04:00-05:30 – | Total Respondents – | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | – Alvás | 81.32% 74 | 23.08% 21 | 5.49% 5 | 0.00% 0 | 0.00% 0 | 0.00% 0 | 0.00% 0 | 1.10% 1 | 0.00% 0 | 0.00% 0 | 1.10% 1 | 31.87% 29 | 63.74% 58 | 75.82% 69 | 78.02% 71 | 78.02% 71 | 91 |
1 | – Zuhany / Mosdó | 31.18% 29 | 40.86% 38 | 13.98% 13 | 2.15% 2 | 0.00% 0 | 1.08% 1 | 0.00% 0 | 2.15% 2 | 2.15% 2 | 5.38% 5 | 21.51% 20 | 11.83% 11 | 5.38% 5 | 0.00% 0 | 1.08% 1 | 0.00% 0 | 93 |
2 | – Étkezés | 7.69% 7 | 39.56% 36 | 23.08% 21 | 13.19% 12 | 13.19% 12 | 20.88% 19 | 19.78% 18 | 21.98% 20 | 10.99% 10 | 15.38% 14 | 20.88% 19 | 6.59% 6 | 0.00% 0 | 0.00% 0 | 2.20% 2 | 0.00% 0 | 91 |
3 | – Tanulás | 5.71% 2 | 20.00% 7 | 45.71% 16 | 48.57% 17 | 57.14% 20 | 45.71% 16 | 28.57% 10 | 28.57% 10 | 45.71% 16 | 31.43% 11 | 14.29% 5 | 8.57% 3 | 0.00% 0 | 0.00% 0 | 0.00% 0 | 0.00% 0 | 35 |
4 | – Munka (irodai) | 4.23% 3 | 30.99% 22 | 71.83% 51 | 74.65% 53 | 73.24% 52 | 67.61% 48 | 66.20% 47 | 52.11% 37 | 19.72% 14 | 5.63% 4 | 5.63% 4 | 1.41% 1 | 1.41% 1 | 1.41% 1 | 0.00% 0 | 0.00% 0 | 71 |
df1=pd.read_csv('ex/1.csv')
df2=pd.read_csv('ex/2.csv')
df3=pd.read_csv('ex/3.csv')
df=pd.concat([df1,df2[1:],df3[1:]]) #no need for headers twice, df headers completely identical
hkoz=df[df.columns[9:489]].reset_index()
hetv=df[df.columns[489:969]].reset_index()
desc=df[df.columns[969:]].reset_index()
time=df[df.columns[2:4]].reset_index()
#top 16 activities
activities=[[u'Alvás'],
[u'Zuhany / Mosdó'],
[u'Étkezés', u'Étterem/Vendéglő'],
[u'Munka (irodai)', u'Munka (kétkezi)'],
[u'Internet', u'Telefon/Chat/Facebook'],
[u'Vásárlás'],
[u'Vallásgyakorlás'],
[u'TV/Film', u'Mozi'],
[u'Olvasás', u'Újság/Keresztrejtvény'],
[u'Házimunka/Gyerekfelügyelet'],
[u'Hivatalos elintéznivalók'],
[u'Sport', u'Edzőterem/Szépségszalon'],
[u'Utazás/Vezetés'],
[u'Tanulás', u'Magánóra'],
[u'Szórakozóhely/Kávézó/Pub'],
[u'Séta/Kutyasétáltatás', u'Természet/Kirándulás'],
[u'Egyéb Hobby', u'PC játék', u'Önkéntesség', u'Kertészkedés/Barkácsolás', u'Rokonlátogatás', u'Más']]
'Alv\xc3\xa1s',
'Edz\xc5\x91terem/Sz\xc3\xa9ps\xc3\xa9gszalon',
'Egy\xc3\xa9b Hobby',
'Hivatalos elint\xc3\xa9znival\xc3\xb3k',
'H\xc3\xa1zimunka/Gyerekfel\xc3\xbcgyelet',
'Internet',
'Kert\xc3\xa9szked\xc3\xa9s/Bark\xc3\xa1csol\xc3\xa1s',
'Mag\xc3\xa1n\xc3\xb3ra',
'Mozi',
'Munka (irodai)',
'Munka (k\xc3\xa9tkezi)',
'M\xc3\xa1s',
'Olvas\xc3\xa1s',
'PC j\xc3\xa1t\xc3\xa9k',
'Rokonl\xc3\xa1togat\xc3\xa1s',
'Sport',
'Sz\xc3\xb3rakoz\xc3\xb3hely/K\xc3\xa1v\xc3\xa9z\xc3\xb3/Pub',
'S\xc3\xa9ta/Kutyas\xc3\xa9t\xc3\xa1ltat\xc3\xa1s',
'TV/Film',
'Tanul\xc3\xa1s',
'Telefon/Chat/Facebook',
'Term\xc3\xa9szet/Kir\xc3\xa1ndul\xc3\xa1s',
'Utaz\xc3\xa1s/Vezet\xc3\xa9s',
'Vall\xc3\xa1sgyakorl\xc3\xa1s',
'V\xc3\xa1s\xc3\xa1rl\xc3\xa1s',
'Zuhany / Mosd\xc3\xb3',
'\xc3\x89tkez\xc3\xa9s',
'\xc3\x89tterem/Vend\xc3\xa9gl\xc5\x91',
'\xc3\x96nk\xc3\xa9ntess\xc3\xa9g',
'\xc3\x9ajs\xc3\xa1g/Keresztrejtv\xc3\xa9ny'
actidict={}
for i in range(len(activities)):
for j in range(len(activities[i])):
actidict[activities[i][j]]=i
actidict
{u'Alv\xe1s': 0, u'Edz\u0151terem/Sz\xe9ps\xe9gszalon': 11, u'Egy\xe9b Hobby': 16, u'Hivatalos elint\xe9znival\xf3k': 10, u'H\xe1zimunka/Gyerekfel\xfcgyelet': 9, u'Internet': 4, u'Kert\xe9szked\xe9s/Bark\xe1csol\xe1s': 16, u'Mag\xe1n\xf3ra': 13, u'Mozi': 7, u'Munka (irodai)': 3, u'Munka (k\xe9tkezi)': 3, u'M\xe1s': 16, u'Olvas\xe1s': 8, u'PC j\xe1t\xe9k': 16, u'Rokonl\xe1togat\xe1s': 16, u'Sport': 11, u'Sz\xf3rakoz\xf3hely/K\xe1v\xe9z\xf3/Pub': 14, u'S\xe9ta/Kutyas\xe9t\xe1ltat\xe1s': 15, u'TV/Film': 7, u'Tanul\xe1s': 13, u'Telefon/Chat/Facebook': 4, u'Term\xe9szet/Kir\xe1ndul\xe1s': 15, u'Utaz\xe1s/Vezet\xe9s': 12, u'Vall\xe1sgyakorl\xe1s': 6, u'V\xe1s\xe1rl\xe1s': 5, u'Zuhany / Mosd\xf3': 1, u'\xc9tkez\xe9s': 2, u'\xc9tterem/Vend\xe9gl\u0151': 2, u'\xd6nk\xe9ntess\xe9g': 16, u'\xdajs\xe1g/Keresztrejtv\xe9ny': 8}
# run only once
hkoz.columns=hkoz.loc[0].values
hkoz=hkoz[1:].drop(0,axis=1)
for i in hkoz.columns[:10]:
print i
activity=i[:i.find('-')-1]
timeslice=i[i.find('-')+2:]
print activity,timeslice
Alvás - 05:30-07:00 Alvás 05:30-07:00 Alvás - 07:00-08:30 Alvás 07:00-08:30 Alvás - 08:30-10:00 Alvás 08:30-10:00 Alvás - 10:00-11:30 Alvás 10:00-11:30 Alvás - 11:30-13:00 Alvás 11:30-13:00 Alvás - 13:00-14:30 Alvás 13:00-14:30 Alvás - 14:30-16:00 Alvás 14:30-16:00 Alvás - 16:00-17:30 Alvás 16:00-17:30 Alvás - 17:30-19:00 Alvás 17:30-19:00 Alvás - 19:00-20:30 Alvás 19:00-20:30
hkozdata={}
for i in hkoz.index:
index=hkoz.loc[i].index
values=hkoz.loc[i].values
helper=[]
for j in range(len(values)):
if str(values[j]).lower()!='nan':
helper.append(index[j])
hkozdata[i]=helper
hkozdata[1]
['Alv\xc3\xa1s - 05:30-07:00', 'Alv\xc3\xa1s - 22:00-23:30', 'Alv\xc3\xa1s - 23:30-01:00', 'Alv\xc3\xa1s - 01:00-02:30', 'Alv\xc3\xa1s - 02:30-04:00', 'Alv\xc3\xa1s - 04:00-05:30', 'Zuhany / Mosd\xc3\xb3 - 05:30-07:00', '\xc3\x89tkez\xc3\xa9s - 10:00-11:30', '\xc3\x89tkez\xc3\xa9s - 16:00-17:30', '\xc3\x89tkez\xc3\xa9s - 20:30-22:00', 'Tanul\xc3\xa1s - 07:00-08:30', 'Tanul\xc3\xa1s - 08:30-10:00', 'Tanul\xc3\xa1s - 10:00-11:30', 'Tanul\xc3\xa1s - 11:30-13:00', 'Tanul\xc3\xa1s - 13:00-14:30', 'Tanul\xc3\xa1s - 14:30-16:00', 'Tanul\xc3\xa1s - 17:30-19:00', 'Tanul\xc3\xa1s - 19:00-20:30', 'Internet - 16:00-17:30', 'Internet - 20:30-22:00', 'TV/Film - 16:00-17:30', 'TV/Film - 20:30-22:00', 'PC j\xc3\xa1t\xc3\xa9k - 16:00-17:30', 'PC j\xc3\xa1t\xc3\xa9k - 20:30-22:00', 'Egy\xc3\xa9b Hobby - 16:00-17:30']
j=1
timematrix={}
for i in hkozdata[j]:
activity=i[:i.find('-')-1]
timeslice=i[i.find('-')+2:]
if timeslice not in timematrix:timematrix[timeslice]=[]
timematrix[timeslice].append(actidict[activity])
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-107-dd5d7bf3b867> in <module>() 5 timeslice=i[i.find('-')+2:] 6 if timeslice not in timematrix:timematrix[timeslice]=[] ----> 7 timematrix[timeslice].append(actidict[activity.replace('\xc3','')]) KeyError: 'Alv\xa1s'
set([i[:i.find('-')-1] for i in hkoz.columns])
{'Alv\xc3\xa1s', 'Edz\xc5\x91terem/Sz\xc3\xa9ps\xc3\xa9gszalon', 'Egy\xc3\xa9b Hobby', 'Hivatalos elint\xc3\xa9znival\xc3\xb3k', 'H\xc3\xa1zimunka/Gyerekfel\xc3\xbcgyelet', 'Internet', 'Kert\xc3\xa9szked\xc3\xa9s/Bark\xc3\xa1csol\xc3\xa1s', 'Mag\xc3\xa1n\xc3\xb3ra', 'Mozi', 'Munka (irodai)', 'Munka (k\xc3\xa9tkezi)', 'M\xc3\xa1s', 'Olvas\xc3\xa1s', 'PC j\xc3\xa1t\xc3\xa9k', 'Rokonl\xc3\xa1togat\xc3\xa1s', 'Sport', 'Sz\xc3\xb3rakoz\xc3\xb3hely/K\xc3\xa1v\xc3\xa9z\xc3\xb3/Pub', 'S\xc3\xa9ta/Kutyas\xc3\xa9t\xc3\xa1ltat\xc3\xa1s', 'TV/Film', 'Tanul\xc3\xa1s', 'Telefon/Chat/Facebook', 'Term\xc3\xa9szet/Kir\xc3\xa1ndul\xc3\xa1s', 'Utaz\xc3\xa1s/Vezet\xc3\xa9s', 'Vall\xc3\xa1sgyakorl\xc3\xa1s', 'V\xc3\xa1s\xc3\xa1rl\xc3\xa1s', 'Zuhany / Mosd\xc3\xb3', '\xc3\x89tkez\xc3\xa9s', '\xc3\x89tterem/Vend\xc3\xa9gl\xc5\x91', '\xc3\x96nk\xc3\xa9ntess\xc3\xa9g', '\xc3\x9ajs\xc3\xa1g/Keresztrejtv\xc3\xa9ny'}
actidict
{u'Alv\xe1s': 0, u'Edz\u0151terem/Sz\xe9ps\xe9gszalon': 11, u'Egy\xe9b Hobby': 16, u'Hivatalos elint\xe9znival\xf3k': 10, u'H\xe1zimunka/Gyerekfel\xfcgyelet': 9, u'Internet': 4, u'Kert\xe9szked\xe9s/Bark\xe1csol\xe1s': 16, u'Mag\xe1n\xf3ra': 13, u'Mozi': 7, u'Munka (irodai)': 3, u'Munka (k\xe9tkezi)': 3, u'M\xe1s': 16, u'Olvas\xe1s': 8, u'PC j\xe1t\xe9k': 16, u'Rokonl\xe1togat\xe1s': 16, u'Sport': 11, u'Sz\xf3rakoz\xf3hely/K\xe1v\xe9z\xf3/Pub': 14, u'S\xe9ta/Kutyas\xe9t\xe1ltat\xe1s': 15, u'TV/Film': 7, u'Tanul\xe1s': 13, u'Telefon/Chat/Facebook': 4, u'Term\xe9szet/Kir\xe1ndul\xe1s': 15, u'Utaz\xe1s/Vezet\xe9s': 12, u'Vall\xe1sgyakorl\xe1s': 6, u'V\xe1s\xe1rl\xe1s': 5, u'Zuhany / Mosd\xf3': 1, u'\xc9tkez\xe9s': 2, u'\xc9tterem/Vend\xe9gl\u0151': 2, u'\xd6nk\xe9ntess\xe9g': 16, u'\xdajs\xe1g/Keresztrejtv\xe9ny': 8}
timematrix
{'05:30-07:00': []}