#!/usr/bin/env python # coding: utf-8 # # Accessing a DataFrame # There are many [different choices for indexing](https://pandas.pydata.org/pandas-docs/stable/indexing.html#different-choices-for-indexing) DataFrames. # # Let's explore! # In[1]: # Setup import pandas as pd test_user_data = { 'first_name': ['Craig', 'Treasure', 'Ashley', 'Guil'], 'last_name': ['Dennis', 'Porth', 'Boucher', 'Hernandez'], 'balance': [42.42, 25.00, 2.02, 87.00] } test_user_names = ['craigsdennis', 'treasure', 'lindsay2000', 'guil'] users = pd.DataFrame(test_user_data, index=test_user_names) # ## Retrieve a specific Series # # ### By Column Name # Each column in a `DataFrame` is actually a `Series`. The `DataFrame` provides access to each of these `Series` by a column name index. # # For instance, to get the **`balance`** `Series`, you could just use that for the index. # In[2]: balances = users['balance'] balances # Note that the returned `Series` has a property of `name` set to the column label. # In[3]: balances.name # ### By Label # You can retrieve a row from a `DataFrame` by using the `loc` property and supply the label. # # Note how the returned `Series` is labelled by the existing column labels of the **`users`** `DataFrame` and the `name` property is set to the label. # In[4]: users.loc['guil'] # ### By Position # Normal list like indices are also available to get a specific row, by using the `iloc` ( `i`ndex `loc`ation) property and the appropriate index. # # For instance the second row can be accessed by using the index `1`. # In[5]: users.iloc[1] # ## Retrieve a Specific Value # # ### By Chaining # You can definitely [chain lookups together](https://pandas.pydata.org/pandas-docs/stable/indexing.html#returning-a-view-versus-a-copy), but there's a better way to do it! # # Note that this is really running multiple separate lookups. You don't want to force this extra work to happen. However, I do want you to see that you can (and you will see this!) # In[6]: # CAREFUL: This first retrieves the column `Series` # and then uses the label. users['first_name']['craigsdennis'] # In[7]: # CAREFUL: This retrieves the row `Series` # and then does a lookup for first_name users.loc['craigsdennis']['first_name'] # ### By Row and Column # The DataFrame allows access in a single operation to a specific value by specifying a row and a column. There are a few options here too. # # #### Using [`DataFrame.loc`](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.loc.html) # # # # The `loc` property on `DataFrame` allows you to work on both axes, rows and columns, by indexing with a tuple (remember a comma creates a tuple). The first element of the tuple is the row access, second is the column. # # Using a single label and single column name will retrieve that specific scalar value. # In[8]: users.loc['craigsdennis', 'first_name'] # #### Using [`DataFrame.at`](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.at.html#pandas.DataFrame.at) # # To be even more explicit (💪🏾) that you are trying to just get at a single scalar value, you should use the `DataFrame.at` property. # In[9]: users.at['craigsdennis', 'first_name'] # ## Retrieve a Specific DataFrame Through Slicing # # Using the `loc` and `iloc` properties you can slice an existing `DataFrame` into a new one. # # In the example below we use `:` in the rows axis to select all rows, and we specify which columns we want back using a list in the columns axis, ala NumPy Fancy Indexing. # In[10]: # All rows and the following ordered columns as a list (fancy, eh?) users.loc[:, ['balance', 'last_name']] # When using a slice with `loc` the results are inclusive, meaning they include the right side. # In[11]: # All rows starting with 'treasure' up to and including 'lindsay2000' # and all the columns users.loc['treasure':'lindsay2000', :] # When using a slice with `iloc` the results are exclusive, just like standard Python list slices # In[12]: # All rows starting with the 2nd up until but not including the 3rd # and all columns starting at the second until the end users.iloc[1:2, 1:]