import pandas as pd df = pd.read_csv("QueryResults.csv", names=['DATE', 'TAG', 'POSTS'], header=0) df.shape df.head() df.count() df.groupby("TAG").sum() df.groupby("TAG").count() type(df["DATE"][1]) df["DATE"] = pd.to_datetime(df["DATE"]) df.head() reshaped_df = df.pivot(index = "DATE", columns ="TAG", values = "POSTS" ) reshaped_df.head() reshaped_df = reshaped_df.fillna(0) reshaped_df.head() reshaped_df.shape cols = reshaped_df.columns print(cols) reshaped_df.count() import matplotlib.pyplot as plt #change the figsize for better visualization plt.figure(figsize=(16,10)) #label x and y axis plt.xlabel('Date', fontsize=14) plt.ylabel('Number of Posts', fontsize=14) plt.ylim(0, 35000) #plot the dataframe (have to plot each line individually so that we can put label) for col in cols: plt.plot(reshaped_df.index, reshaped_df[col], label = col) #put a legend plt.legend(fontsize=16) #destructure fig and ax fig, ax = plt.subplots(figsize=(16, 10)) #add label with axes functions ax.set_xlabel('Date', fontsize=14) ax.set_ylabel('Number of Posts', fontsize=14) ax.set_ylim(0, 35000) #plot the dataframe (have to plot each line individually so that we can put label) for col in cols: ax.plot(reshaped_df.index, reshaped_df[col], label = col) #put a legend ax.legend(fontsize=16) # The window is number of observations that are averaged roll_df = reshaped_df.rolling(window=6).mean() plt.figure(figsize=(16,10)) plt.xticks(fontsize=14) plt.yticks(fontsize=14) plt.xlabel('Date', fontsize=14) plt.ylabel('Number of Posts', fontsize=14) plt.ylim(0, 35000) # plot the roll_df instead for column in roll_df.columns: plt.plot(roll_df.index, roll_df[column], linewidth=3, label=roll_df[column].name) plt.legend(fontsize=16)