# we import pandas <- this is a comment
import pandas as pd # <- this is code
import matplotlib.pyplot as plt
%matplotlib inline
# load data into pandas dataframe
df = pd.read_csv("data/data.csv", delimiter=";")
# look at head of the data
df.head()
ID | Meal | Restaurant | Type | Cal | P | C | F | Like Trigger | Reg | ... | Unnamed: 17 | Unnamed: 18 | Unnamed: 19 | Unnamed: 20 | Unnamed: 21 | Unnamed: 22 | Unnamed: 23 | Unnamed: 24 | Unnamed: 25 | Unnamed: 26 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | Protein Box | Chilango | Lunch / Dinner | 480 | 50 | 16 | 20 | 1.0 | 1.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 2.0 | Rice Hot Box + chicken | Chilango | Lunch / Dinner | 495 | 39,9 | 61,1 | 10,7 | 1.0 | 1.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | 3.0 | Chicken & Black Beans Burrito | Chilango | Lunch / Dinner | 606 | 41 | 73 | 38 | 1.0 | 1.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | 4.0 | Naked Burrito (grilled chicken & guac) | Chilango | Lunch / Dinner | 475 | 34 | 35 | 22 | 1.0 | 1.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
4 | 5.0 | Super Eggs + Smoked Salmon | Pure | Breakfast | 279 | 24,4 | 1,8 | 19,2 | 1.0 | 1.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
5 rows × 27 columns
# count the occurences of like trigger column
df["Like Trigger"].value_counts()
1.0 192 0.0 1 Name: Like Trigger, dtype: int64
# plot a historgram of restaurants
df.Restaurant.hist()
<AxesSubplot:>
# ... sky is the limit
# Some next steps:
# -> clean up the data there are some columns/rows with nans
# -> plot some easy statistics
# -> can we make an optimization program?