#!/usr/bin/env python # coding: utf-8 # # Pandas Basics # In[26]: import pandas as pd print(pd.__version__) # ## Panda Series # ### A Pandas Series is like a column in a table. # # *It is a one-dimensional array holding data of any type.* # In[13]: a = [1, 7, 2] myvar = pd.Series(a) print(myvar) # In[17]: print(myvar[1]) # - **With the index argument, you can name your own labels.** # In[14]: a = [1, 7, 2] myvar = pd.Series(a, index = ["x", "y", "z"]) print(myvar) # In[19]: print(myvar["y"]) # - **You can also use a key/value object, like a dictionary, when creating a Series.** # In[20]: calories = {"day1": 420, "day2": 380, "day3": 390} myvar = pd.Series(calories) print(myvar) # ****The keys of the dictionary become the labels.* # In[21]: print(myvar['day1']) # - **To select only some of the items in the dictionary, use the index argument and specify only the items you want to include in the Series.** # In[23]: calories = {"day1": 420, "day2": 380, "day3": 390} myvar = pd.Series(calories, index = ["day1", "day2"]) print(myvar) # ## Pandas Data Frame # ### Data sets in Pandas are usually multi-dimensional tables, called DataFrames. # # *Series is like a column, a DataFrame is the whole table.* # In[48]: data = { "calories": [420, 380, 390], "duration": [50, 40, 45] } df = pd.DataFrame(data) print(df) # ### Locate Row # # *Pandas use the loc attribute to return one or more specified row(s)* # In[49]: print(df.loc[0]) # ****This example returns a Pandas Series.* # In[50]: print(df.loc[[0, 1]]) # **** When using [ ], the result is a Pandas DataFrame.* # In[35]: data = { "calories": [420, 380, 390], "duration": [50, 40, 45] } df = pd.DataFrame(data, index = ["day1", "day2", "day3"]) print(df) # In[36]: print(df.loc["day2"]) # ### Load Files Into a DataFrame # In[51]: df = pd.read_csv('pokemon_data.csv') print(df)