#!/usr/bin/env python # coding: utf-8 # # How do I use the MultiIndex in pandas? ([video](https://www.youtube.com/watch?v=tcRGa2soc-c&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=31)) # # - [My pandas video series](https://www.dataschool.io/easier-data-analysis-with-pandas/) # - [GitHub repository](https://github.com/justmarkham/pandas-videos) # In[1]: import pandas as pd # In[2]: stocks = pd.read_csv('data/stocks.csv') stocks # In[3]: stocks.index # In[4]: stocks.groupby('Symbol').Close.mean() # ## Series with MultiIndex # In[5]: ser = stocks.groupby(['Symbol', 'Date']).Close.mean() ser # In[6]: ser.index # In[7]: ser.unstack() # In[8]: df = stocks.pivot_table(values='Close', index='Symbol', columns='Date') df # ## Selection from Series with MultiIndex # In[9]: ser # In[10]: ser.loc['AAPL'] # In[11]: ser.loc['AAPL', '2016-10-03'] # In[12]: ser.loc[:, '2016-10-03'] # In[13]: df # In[14]: df.loc['AAPL'] # In[15]: df.loc['AAPL', '2016-10-03'] # In[16]: df.loc[:, '2016-10-03'] # ## DataFrame with MultiIndex # In[17]: stocks.set_index(['Symbol', 'Date'], inplace=True) stocks # In[18]: stocks.index # In[19]: stocks.sort_index(inplace=True) stocks # ## Selection from DataFrame with MultiIndex # In[20]: stocks.loc['AAPL'] # In[21]: stocks.loc[('AAPL', '2016-10-03'), :] # In[22]: stocks.loc[('AAPL', '2016-10-03'), 'Close'] # In[23]: stocks.loc[['AAPL', 'MSFT'], :] # In[24]: stocks.loc[(['AAPL', 'MSFT'], '2016-10-03'), :] # In[25]: stocks.loc[(['AAPL', 'MSFT'], '2016-10-03'), 'Close'] # In[26]: stocks.loc[('AAPL', ['2016-10-03', '2016-10-04']), 'Close'] # In[27]: stocks.loc[(slice(None), ['2016-10-03', '2016-10-04']), :] # ## Merging DataFrames with MultiIndexes # In[28]: close = pd.read_csv('data/stocks.csv', usecols=[0, 1, 3], index_col=['Symbol', 'Date']).sort_index() close # In[29]: volume = pd.read_csv('data/stocks.csv', usecols=[0, 2, 3], index_col=['Symbol', 'Date']).sort_index() volume # In[30]: both = pd.merge(close, volume, left_index=True, right_index=True) both # In[31]: both.reset_index()