# CSV file for demonstration
# Backup your own NikePlus data to CSV by using the following:
# API wrapper: https://github.com/durden/nikeplus
# Video explaining concept behind API wrapper: http://www.youtube.com/watch?v=jA0dwPtiu7c

cat nikeplus.csv | head

# Lets use the pandas library to explore this data in memory
# in a data structure that called a DataFrame, which you can
# think of as similar to an Excel spreadsheet.
import pandas as pd

nike = pd.read_csv('nikeplus.csv')
nike

# Use date column as index instead of a normal column so we can plot with it and
# anchor all data based on a date since our data only has 1 entry per day.
nike = pd.read_csv('nikeplus.csv', index_col=8)
nike

# Pandas DataFrame's support easy indexing by column names and traditional Python
# list style slicing. We'll use this trick a lot in this demonstration just
# to prevent showing lots of data at once.
nike['miles'][:5]

# Pandas can turn our DataFrame into html, but displaying is not as nice in
# this notebook interface by default.
nike[:5].to_html()

# IPython is several things all rolled into one including, but not limited to:
#  1. Python library that can do all sorts of things including displaying html
#  2. Provides this notebook interface to execute code and display results
from IPython.core.display import display_html
display_html(nike[:5].to_html(), raw=True)

# Typically you could just plot a DataFrame like this, which by default
# plots all columns as a separate line on our graph. However, our
# data contains some non-numeric columns like 'device' and 'start_time'.

# Uncomment this line and run this cell with shift-enter to see
# the associated error.
#nike.plot()

# However, we can index by column names and plot only a single column,
# a Pandas.DataSeries data structure, to focus on a single column
# of data. This will work because we now the miles column is all numerical
# data. Again, still could be prettier, maybe there's a problem with our
# importing?
nike['miles'].plot()

# Pandas has A LOT of arguments to tweak how CSV files are imported
# including the ability to parse dates into Python datetime objects
# instead of strings
nike = pd.read_csv('nikeplus.csv', index_col=8, parse_dates=[8])
nike

# Note that IPython code cells keep a global state, so we don't
# need to import this function again if we run this cell after
# the cell above with the import in it.
display_html(nike[:5].to_html(), raw=True)

# Now our plot will look much better because Pandas and matplotlib
# know our index column is actually a datetime object, not a string.

# Notice this is just too much data to show in the default
# width, but IPython provides the ability to drag the bottom
# right corner to increase the size of a plot.
nike['miles'].plot()

# However, showing less data looks even better in this window.
nike['miles'][:20].plot()

# Again, the plot function passes information directly to matplotlib
# so there are lots of arguments to tweak the display. For example,
# we can add a title.
nike['miles'][:30].plot(title='Miles')

# Maybe we don't want to deal with all those columns and are only
# interested in a DataFrame with a few columns.
nike2 = nike.reindex(columns=['calories', 'fuel'])
nike2

# Our new DataFrame, nike2, only has numerical columns now. So, the default
# Pandas plotting will work and can automatically make a new line for
# each of our columns.
nike2.plot()

# We can further control what data is actually shown on the x/y axes.
# So, this plot effectively shows that calories and Nike's proprietary
# fuel measurement have a linear relationship.
nike2[:30].plot(x='calories', y='fuel')

# Of course, we aren't limited to just line plots, there are
# all sorts provided by matplotlib.
nike2[:30].plot(kind='bar')

# Finally, we could remove the above reindexing step and save memory and
# time up front if we know we're only interested in a few columns at the
# time of reading the data from the CSV. Remember, Pandas read_csv has
# ALOT of arguments...

# Note that with usecols the index_col is RELATIVE to the columns in usecols argument!
nike = pd.read_csv('nikeplus.csv', usecols=['miles', 'steps', 'fuel', 'calories', 'start_time'], index_col=4)
nike