#!/usr/bin/env python
# coding: utf-8

# # 5 new changes in pandas you need to know about ([video](https://www.youtube.com/watch?v=te5JrSCW-LY&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=33))
# 
# - [My pandas video series (30 videos)](http://www.dataschool.io/easier-data-analysis-with-pandas/)
# - [GitHub repository](https://github.com/justmarkham/pandas-videos)
# - [pandas release notes](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html)

# In[1]:


import pandas as pd
pd.__version__


# ## 1. `ix` has been deprecated
# 
# *New in 0.20.0*

# In[2]:


# read the drinks dataset into a DataFrame
drinks = pd.read_csv('http://bit.ly/drinksbycountry', index_col='country')
drinks.head()


# In[3]:


# loc accesses by label
drinks.loc['Angola', 'spirit_servings']


# In[4]:


# iloc accesses by position
drinks.iloc[4, 1]


# In[5]:


# ix accesses by label OR position (newly deprecated)
drinks.ix['Angola', 1]


# In[6]:


# alternative: use loc
drinks.loc['Angola', drinks.columns[1]]


# In[7]:


# alternative: use iloc
drinks.iloc[drinks.index.get_loc('Angola'), 1]


# In[8]:


# ix accesses by label OR position (newly deprecated)
drinks.ix[4, 'spirit_servings']


# In[9]:


# alternative: use loc
drinks.loc[drinks.index[4], 'spirit_servings']


# In[10]:


# alternative: use iloc
drinks.iloc[4, drinks.columns.get_loc('spirit_servings')]


# - [More information](http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated)
# - [Video: How do I select multiple rows and columns from a pandas DataFrame?](https://www.youtube.com/watch?v=xvpNA7bC8cs&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=19)

# ## 2. Aliases have been added for `isnull` and `notnull`
# 
# *New in 0.21.0*

# In[11]:


# read the UFO dataset into a DataFrame
ufo = pd.read_csv('http://bit.ly/uforeports')
ufo.head()


# In[12]:


# check which values are missing
ufo.isnull().head()


# In[13]:


# check which values are not missing
ufo.notnull().head()


# In[14]:


# drop rows with missing values
ufo.dropna().head()


# In[15]:


# fill in missing values
ufo.fillna(value='UNKNOWN').head()


# In[16]:


# new alias for isnull
ufo.isna().head()


# In[17]:


# new alias for notnull
ufo.notna().head()


# - [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#na-naming-changes)
# - [Video: How do I handle missing values in pandas?](https://www.youtube.com/watch?v=fCMrO_VzeL8&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=16)

# ## 3. `drop` now accepts "index" and "columns" keywords
# 
# *New in 0.21.0*

# In[18]:


# read the UFO dataset into a DataFrame
ufo = pd.read_csv('http://bit.ly/uforeports')
ufo.head()


# In[19]:


# old way to drop rows: specify labels and axis
ufo.drop([0, 1], axis=0).head()
ufo.drop([0, 1], axis='index').head()


# In[20]:


# new way to drop rows: specify index
ufo.drop(index=[0, 1]).head()


# In[21]:


# old way to drop columns: specify labels and axis
ufo.drop(['City', 'State'], axis=1).head()
ufo.drop(['City', 'State'], axis='columns').head()


# In[22]:


# new way to drop columns: specify columns
ufo.drop(columns=['City', 'State']).head()


# - [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#drop-now-also-accepts-index-columns-keywords)
# - [Video: How do I remove columns from a pandas DataFrame?](https://www.youtube.com/watch?v=gnUKkS964WQ&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=6)

# ## 4. `rename` and `reindex` now accept "axis" keyword
# 
# *New in 0.21.0*

# In[23]:


# old way to rename columns: specify columns
ufo.rename(columns={'City':'CITY', 'State':'STATE'}).head()


# In[24]:


# new way to rename columns: specify mapper and axis
ufo.rename({'City':'CITY', 'State':'STATE'}, axis='columns').head()


# In[25]:


# note: mapper can be a function
ufo.rename(str.upper, axis='columns').head()


# - [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#rename-reindex-now-also-accept-axis-keyword)
# - [Video: How do I rename columns in a pandas DataFrame?](https://www.youtube.com/watch?v=0uBirYFhizE&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=5)

# ## 5. Ordered categories must be specified independent of the data
# 
# *New in 0.21.0*

# In[26]:


# create a small DataFrame
df = pd.DataFrame({'ID':[100, 101, 102, 103],
                   'quality':['good', 'very good', 'good', 'excellent']})
df


# In[27]:


# old way to create an ordered category (deprecated)
df.quality.astype('category', categories=['good', 'very good', 'excellent'], ordered=True)


# In[28]:


# new way to create an ordered category
from pandas.api.types import CategoricalDtype
quality_cat = CategoricalDtype(['good', 'very good', 'excellent'], ordered=True)
df['quality'] = df.quality.astype(quality_cat)
df.quality


# - [More information](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#whatsnew-0210-enhancements-categorical-dtype)
# - [Video: How do I make my pandas DataFrame smaller and faster?](https://www.youtube.com/watch?v=wDYDYGyN_cw&list=PL5-da3qGB5ICCsgW1MxlZ0Hq8LL5U3u9y&index=21)