import pandas as pd
df = pd.read_csv("QueryResults.csv", names=['DATE', 'TAG', 'POSTS'], header=0)

df.shape

df.head()

df.count()

df.groupby("TAG").sum()

df.groupby("TAG").count()

type(df["DATE"][1])

df["DATE"] = pd.to_datetime(df["DATE"])
df.head()

reshaped_df = df.pivot(index = "DATE", columns ="TAG", values = "POSTS" )
reshaped_df.head()

reshaped_df = reshaped_df.fillna(0)
reshaped_df.head()

reshaped_df.shape

cols = reshaped_df.columns
print(cols)

reshaped_df.count()

import matplotlib.pyplot as plt

#change the figsize for better visualization
plt.figure(figsize=(16,10))
#label x and y axis
plt.xlabel('Date', fontsize=14)
plt.ylabel('Number of Posts', fontsize=14)
plt.ylim(0, 35000)
#plot the dataframe (have to plot each line individually so that we can put label)
for col in cols:
  plt.plot(reshaped_df.index, reshaped_df[col], label = col)
#put a legend
plt.legend(fontsize=16)

#destructure fig and ax
fig, ax = plt.subplots(figsize=(16, 10))
#add label with axes functions
ax.set_xlabel('Date', fontsize=14)
ax.set_ylabel('Number of Posts', fontsize=14)
ax.set_ylim(0, 35000)
#plot the dataframe (have to plot each line individually so that we can put label)
for col in cols:
  ax.plot(reshaped_df.index, reshaped_df[col], label = col)
#put a legend
ax.legend(fontsize=16)

# The window is number of observations that are averaged
roll_df = reshaped_df.rolling(window=6).mean()

plt.figure(figsize=(16,10))
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel('Date', fontsize=14)
plt.ylabel('Number of Posts', fontsize=14)
plt.ylim(0, 35000)

# plot the roll_df instead
for column in roll_df.columns:
    plt.plot(roll_df.index, roll_df[column],
             linewidth=3, label=roll_df[column].name)

plt.legend(fontsize=16)