#!/usr/bin/env python # coding: utf-8 # # 5 new changes in pandas you need to know about ([video](https://www.youtube.com/watch?v=te5JrSCW-LY&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=33)) # # - [My pandas video series (30 videos)](http://www.dataschool.io/easier-data-analysis-with-pandas/) # - [GitHub repository](https://github.com/justmarkham/pandas-videos) # - [pandas release notes](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html) # In[1]: import pandas as pd pd.__version__ # ## 1. `ix` has been deprecated # # *New in 0.20.0* # In[2]: # read the drinks dataset into a DataFrame drinks = pd.read_csv('http://bit.ly/drinksbycountry', index_col='country') drinks.head() # In[3]: # loc accesses by label drinks.loc['Angola', 'spirit_servings'] # In[4]: # iloc accesses by position drinks.iloc[4, 1] # In[5]: # ix accesses by label OR position (newly deprecated) drinks.ix['Angola', 1] # In[6]: # alternative: use loc drinks.loc['Angola', drinks.columns[1]] # In[7]: # alternative: use iloc drinks.iloc[drinks.index.get_loc('Angola'), 1] # In[8]: # ix accesses by label OR position (newly deprecated) drinks.ix[4, 'spirit_servings'] # In[9]: # alternative: use loc drinks.loc[drinks.index[4], 'spirit_servings'] # In[10]: # alternative: use iloc drinks.iloc[4, drinks.columns.get_loc('spirit_servings')] # - [More information](http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated) # - [Video: How do I select multiple rows and columns from a pandas DataFrame?](https://www.youtube.com/watch?v=xvpNA7bC8cs&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=19) # ## 2. Aliases have been added for `isnull` and `notnull` # # *New in 0.21.0* # In[11]: # read the UFO dataset into a DataFrame ufo = pd.read_csv('http://bit.ly/uforeports') ufo.head() # In[12]: # check which values are missing ufo.isnull().head() # In[13]: # check which values are not missing ufo.notnull().head() # In[14]: # drop rows with missing values ufo.dropna().head() # In[15]: # fill in missing values ufo.fillna(value='UNKNOWN').head() # In[16]: # new alias for isnull ufo.isna().head() # In[17]: # new alias for notnull ufo.notna().head() # - [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#na-naming-changes) # - [Video: How do I handle missing values in pandas?](https://www.youtube.com/watch?v=fCMrO_VzeL8&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=16) # ## 3. `drop` now accepts "index" and "columns" keywords # # *New in 0.21.0* # In[18]: # read the UFO dataset into a DataFrame ufo = pd.read_csv('http://bit.ly/uforeports') ufo.head() # In[19]: # old way to drop rows: specify labels and axis ufo.drop([0, 1], axis=0).head() ufo.drop([0, 1], axis='index').head() # In[20]: # new way to drop rows: specify index ufo.drop(index=[0, 1]).head() # In[21]: # old way to drop columns: specify labels and axis ufo.drop(['City', 'State'], axis=1).head() ufo.drop(['City', 'State'], axis='columns').head() # In[22]: # new way to drop columns: specify columns ufo.drop(columns=['City', 'State']).head() # - [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#drop-now-also-accepts-index-columns-keywords) # - [Video: How do I remove columns from a pandas DataFrame?](https://www.youtube.com/watch?v=gnUKkS964WQ&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=6) # ## 4. `rename` and `reindex` now accept "axis" keyword # # *New in 0.21.0* # In[23]: # old way to rename columns: specify columns ufo.rename(columns={'City':'CITY', 'State':'STATE'}).head() # In[24]: # new way to rename columns: specify mapper and axis ufo.rename({'City':'CITY', 'State':'STATE'}, axis='columns').head() # In[25]: # note: mapper can be a function ufo.rename(str.upper, axis='columns').head() # - [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#rename-reindex-now-also-accept-axis-keyword) # - [Video: How do I rename columns in a pandas DataFrame?](https://www.youtube.com/watch?v=0uBirYFhizE&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=5) # ## 5. Ordered categories must be specified independent of the data # # *New in 0.21.0* # In[26]: # create a small DataFrame df = pd.DataFrame({'ID':[100, 101, 102, 103], 'quality':['good', 'very good', 'good', 'excellent']}) df # In[27]: # old way to create an ordered category (deprecated) df.quality.astype('category', categories=['good', 'very good', 'excellent'], ordered=True) # In[28]: # new way to create an ordered category from pandas.api.types import CategoricalDtype quality_cat = CategoricalDtype(['good', 'very good', 'excellent'], ordered=True) df['quality'] = df.quality.astype(quality_cat) df.quality # - [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#whatsnew-0210-enhancements-categorical-dtype) # - [Video: How do I make my pandas DataFrame smaller and faster?](https://www.youtube.com/watch?v=wDYDYGyN_cw&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=21)