#!/usr/bin/env python # coding: utf-8 # ![Pandas Tutorial | Hedaro >](https://www.dropbox.com/s/220ncn0o5danuey/pandas-ipython-tutorials-hedaro.jpg?dl=1) # # Lesson 4 # In this lesson were going to go back to the basics. We will be working with a small data set so that you can easily understand what I am trying to explain. We will be adding columns, deleting columns, and slicing the data many different ways. Enjoy! # In[1]: # Import libraries import pandas as pd import sys # In[2]: print('Python version ' + sys.version) print('Pandas version: ' + pd.__version__) # In[3]: # Our small data set d = [0,1,2,3,4,5,6,7,8,9] # Create dataframe df = pd.DataFrame(d) df # In[4]: # Lets change the name of the column df.columns = ['Rev'] df # In[5]: # Lets add a column df['NewCol'] = 5 df # In[6]: # Lets modify our new column df['NewCol'] = df['NewCol'] + 1 df # In[7]: # We can delete columns del df['NewCol'] df # In[8]: # Lets add a couple of columns df['test'] = 3 df['col'] = df['Rev'] df # In[9]: # If we wanted, we could change the name of the index i = ['a','b','c','d','e','f','g','h','i','j'] df.index = i df # We can now start to select pieces of the dataframe using ***loc***. # In[10]: df.loc['a'] # In[11]: # df.loc[inclusive:inclusive] df.loc['a':'d'] # In[12]: # df.iloc[inclusive:exclusive] # Note: .iloc is strictly integer position based. It is available from [version 0.11.0] (http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#v0-11-0-april-22-2013) df.iloc[0:3] # We can also select using the column name. # In[13]: df['Rev'] # In[14]: df[['Rev', 'test']] # In[15]: # df.ix[rows,columns] # replaces the deprecated ix function #df.ix[0:3,'Rev'] df.loc[df.index[0:3],'Rev'] # In[16]: # replaces the deprecated ix function #df.ix[5:,'col'] df.loc[df.index[5:],'col'] # In[17]: # replaces the deprecated ix function #df.ix[:3,['col', 'test']] df.loc[df.index[:3],['col', 'test']] # There is also some handy function to select the top and bottom records of a dataframe. # In[18]: # Select top N number of records (default = 5) df.head() # In[19]: # Select bottom N number of records (default = 5) df.tail() #
This tutorial was created by HEDARO