import numpy as np
from pandas import Series,DataFrame
import pandas as pd
from numpy.random import randn
#Now we'll learn about Index Hierarchy
#pandas allows you to have multiple index levels, which is very clear with this example:
ser = Series(np.random.randn(6),index=[[1,1,1,2,2,2],['a','b','c','a','b','c']])
#Show Series with multiple index levels
ser
1 a -1.337299 b -0.690616 c 1.792962 2 a 0.457808 b 0.891199 c -1.366387 dtype: float64
# We can check the multiple levels
ser.index
MultiIndex(levels=[[1, 2], [u'a', u'b', u'c']], labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])
#Now we can sleect specific subsets
ser[1]
a -1.337299 b -0.690616 c 1.792962 dtype: float64
# We can also select from an internal index level
ser[:,'a']
1 -1.337299 2 0.457808 dtype: float64
# We can also create Data Frames from Series with multiple levels
dframe = ser.unstack()
#Show
dframe
a | b | c | |
---|---|---|---|
1 | -1.337299 | -0.690616 | 1.792962 |
2 | 0.457808 | 0.891199 | -1.366387 |
#Can also reverse
dframe.unstack()
a 1 -1.337299 2 0.457808 b 1 -0.690616 2 0.891199 c 1 1.792962 2 -1.366387 dtype: float64
# We can also apply multiple level indexing to DataFrames
dframe2 = DataFrame(np.arange(16).reshape(4,4),
index=[['a','a','b','b'],[1,2,1,2]],
columns=[['NY','NY','LA','SF'],['cold','hot','hot','cold']])
dframe2
NY | LA | SF | |||
---|---|---|---|---|---|
cold | hot | hot | cold | ||
a | 1 | 0 | 1 | 2 | 3 |
2 | 4 | 5 | 6 | 7 | |
b | 1 | 8 | 9 | 10 | 11 |
2 | 12 | 13 | 14 | 15 |
# We can also give these index levels names
#Name the index levels
dframe2.index.names = ['INDEX_1','INDEX_2']
#Name the column levels
dframe2.columns.names = ['Cities','Temp']
dframe2
Cities | NY | LA | SF | ||
---|---|---|---|---|---|
Temp | cold | hot | hot | cold | |
INDEX_1 | INDEX_2 | ||||
a | 1 | 0 | 1 | 2 | 3 |
2 | 4 | 5 | 6 | 7 | |
b | 1 | 8 | 9 | 10 | 11 |
2 | 12 | 13 | 14 | 15 |
# We can also interchange level orders (note the axis=1 for columns)
dframe2.swaplevel('Cities','Temp',axis=1)
Temp | cold | hot | cold | ||
---|---|---|---|---|---|
Cities | NY | NY | LA | SF | |
INDEX_1 | INDEX_2 | ||||
a | 1 | 0 | 1 | 2 | 3 |
2 | 4 | 5 | 6 | 7 | |
b | 1 | 8 | 9 | 10 | 11 |
2 | 12 | 13 | 14 | 15 |
#We can also sort levels
dframe2.sortlevel(1)
Cities | NY | LA | SF | ||
---|---|---|---|---|---|
Temp | cold | hot | hot | cold | |
INDEX_1 | INDEX_2 | ||||
a | 1 | 0 | 1 | 2 | 3 |
b | 1 | 8 | 9 | 10 | 11 |
a | 2 | 4 | 5 | 6 | 7 |
b | 2 | 12 | 13 | 14 | 15 |
#Note the change in sorting, now the Dframe index is sorted by the INDEX_2
#We can also perform operations on particular levels
dframe2.sum(level='Temp',axis=1)
Temp | cold | hot | |
---|---|---|---|
INDEX_1 | INDEX_2 | ||
a | 1 | 3 | 3 |
2 | 11 | 11 | |
b | 1 | 19 | 19 |
2 | 27 | 27 |
#Thats the end of this section! Next up, Section 5: Working with Data Part 1 !!!