#!/usr/bin/env python # coding: utf-8 # # Time Series Visualizations # # This notebook demonstrates how to use Lets-Plot to investigate time series. # # The data is provided by [Kaggle](https://www.kaggle.com/sumanthvrao/daily-climate-time-series-data). # In[1]: import pandas as pd from lets_plot import * from lets_plot.mapping import as_discrete # In[2]: LetsPlot.setup_html() # ### Preparation # In[3]: df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/delhi_climate.csv") # In[4]: df = df.rename(columns={"meantemp": "mean temperature", "wind_speed": "wind speed"}) df.date = pd.to_datetime(df.date) df["day"] = df.date.dt.day df["month"] = df.date.dt.month df["year"] = df.date.dt.year df = df[df.year < 2017] # ### General Information # In[5]: ggplot(df, aes("date", "mean temperature")) + \ geom_line(aes(group="year", color=as_discrete("year")), size=1) + \ scale_x_datetime(breaks=df[df.date.dt.day == 1].date, format="%b %Y") + \ facet_grid(x="year", scales='free') + \ ggtitle("Mean Temperature Along Period Under Review") + \ ggsize(1000, 500) + \ theme(legend_position='bottom') # In[6]: p1 = ggplot() + \ geom_boxplot(aes(x="year", y="mean temperature", \ fill=as_discrete("year")), \ data=df, size=2, alpha=.5) + \ scale_x_discrete(name="year") + \ ggtitle("Mean Temperature Aggregated") + \ theme(legend_position='bottom', panel_grid='blank') p2 = ggplot() + \ geom_boxplot(aes(x="month", y="mean temperature", \ fill=as_discrete("year")), \ data=df, size=.75, alpha=.5) + \ scale_x_continuous(breaks=list(range(1, 13))) + \ facet_grid(x="year") + \ ggtitle("Mean Temperature by Month") + \ theme(legend_position='none', panel_grid='blank') w, h = 1000, 300 bunch = GGBunch() bunch.add_plot(p1, 0, 0, w, h) bunch.add_plot(p2, 0, h, w, h) bunch.show() # ### Year-to-Year Temperature Comparison # In[7]: ggplot(df, aes("day", "mean temperature")) + \ geom_line(aes(group="year", color=as_discrete("year")), size=2, \ tooltips=layer_tooltips().title("@year")\ .format("@{mean temperature}", ".2f")\ .line("@|@{mean temperature}")\ .line("date|@month/@day/@year")) + \ scale_x_continuous(breaks=list(range(1, 32))) + \ facet_grid(y="month", scales='free') + \ ylab("month") + \ ggtitle("Mean Temperature for Each Month") + \ theme(legend_position='bottom') # ### Most Common Temperature Values # In[8]: ggplot(df, aes(x="mean temperature")) + \ geom_histogram(aes(group="year", fill=as_discrete("year")), \ color='black', bins=15, size=.5, alpha=.5, \ tooltips=layer_tooltips().line("count|@..count..")\ .format("@{mean temperature}", ".2f")\ .line("@|@{mean temperature}")\ .line("@|@month")\ .line("@|@year")) + \ facet_grid(x="month", y="year") + \ xlab("month") + ylab("year") + \ ggtitle("Most Common Temperature") + \ ggsize(1000, 500) + \ theme_classic() + theme(legend_position='bottom') # In[9]: int_mean_temp_df = df[["mean temperature", "month", "year"]].copy() int_mean_temp_df["mean temperature"] = int_mean_temp_df["mean temperature"].astype(int) ggplot(int_mean_temp_df, aes("month", "mean temperature", fill="mean temperature")) + \ geom_bin2d(stat='identity', size=.5, color='white', alpha=.2, tooltips=layer_tooltips().format("@{mean temperature}", ".2f")\ .line("@|@{mean temperature}")\ .format("@month", "d") .line("@|@month")\ .title("@year")) + \ scale_x_continuous(breaks=list(range(1, 13))) + \ scale_fill_gradient(low='#abd9e9', high='#d7191c') + \ facet_grid(x="year") + \ coord_fixed(ratio=.5) + \ xlab("") + \ ggtitle("Heatmap of Temperatures by Year") + \ ggsize(1000, 500) + \ theme_classic() + theme(legend_position='bottom') # ### Observing Mean Temperature and Wind Speed Correlation # In[10]: ggplot(df, aes("wind speed", y="mean temperature")) + \ geom_point(aes(color="mean temperature", fill="mean temperature"), \ shape=21, size=3, alpha=.2) + \ scale_color_gradient(low='#abd9e9', high='#d7191c') + \ scale_fill_gradient(low='#abd9e9', high='#d7191c') + \ facet_grid(x="year") + \ ggtitle("Relation Between Mean Temperature and Wind Speed") + \ ggsize(1000, 500) + \ theme_classic() # ### Observing Mean Temperature and Humidity Correlation # In[11]: ggplot(df, aes("humidity", "mean temperature")) + \ geom_point(aes(color="humidity", fill="humidity"), \ shape=21, size=3, alpha=.2) + \ scale_color_gradient(low='#fdae61', high='#2c7bb6') + \ scale_fill_gradient(low='#fdae61', high='#2c7bb6') + \ facet_grid(x="year") + \ ggtitle("Relation Between Mean Temperature and Humidity") + \ ggsize(1000, 500) + \ theme_classic() # ### In Search of Correlation on Lag Scatter Plots # In[12]: df_shifted_by_day = df[["mean temperature", "year"]].copy() df_shifted_by_day["shifted mean temperature"] = df["mean temperature"].shift(-1) df_shifted_by_day = df_shifted_by_day.dropna() p1 = ggplot(df_shifted_by_day, aes("mean temperature", "shifted mean temperature")) + \ geom_point(aes(color="mean temperature", fill="mean temperature"), \ shape=21, size=3, alpha=.2) + \ scale_color_gradient(low='#abd9e9', high='#d7191c') + \ scale_fill_gradient(low='#abd9e9', high='#d7191c') + \ facet_grid(x="year") + \ coord_fixed(ratio=1) + \ ggtitle("One Day Lag Scatter Plot") + \ theme_classic() df_shifted_by_month = df[["mean temperature", "year"]].copy() df_shifted_by_month["shifted mean temperature"] = df["mean temperature"].shift(-30) df_shifted_by_month = df_shifted_by_month.dropna() p2 = ggplot(df_shifted_by_month, aes("mean temperature", "shifted mean temperature")) + \ geom_point(aes(color="mean temperature", fill="mean temperature"), \ shape=21, size=3, alpha=.2) + \ scale_color_gradient(low='#abd9e9', high='#d7191c') + \ scale_fill_gradient(low='#abd9e9', high='#d7191c') + \ facet_grid(x="year") + \ coord_fixed(ratio=1) + \ ggtitle("One Month Lag Scatter Plot") + \ theme_classic() df_shifted_by_year = df[["mean temperature", "year"]].copy() df_shifted_by_year["shifted mean temperature"] = df["mean temperature"].shift(-365) df_shifted_by_year = df_shifted_by_year.dropna()[:-1] p3 = ggplot(df_shifted_by_year, aes("mean temperature", "shifted mean temperature")) + \ geom_point(aes(color="mean temperature", fill="mean temperature"), \ shape=21, size=3, alpha=.2) + \ scale_color_gradient(low='#abd9e9', high='#d7191c') + \ scale_fill_gradient(low='#abd9e9', high='#d7191c') + \ facet_grid(x="year") + \ coord_fixed(ratio=1) + \ ggtitle("One Year Lag Scatter Plot") + \ theme_classic() w, h = 1000, 300 bunch = GGBunch() bunch.add_plot(p1, 0, 0, w, h) bunch.add_plot(p2, 0, h, w, h) bunch.add_plot(p3, 0, 2 * h, w, h) bunch.show() # ### Annual Path of Mean Temperature and Humidity # In[13]: mean_df = df.groupby(by=["year", "month"]).mean(numeric_only=True)[["mean temperature", "humidity"]].reset_index() ggplot(mean_df, aes("humidity", "mean temperature")) + \ geom_path(color='#99d8c9', size=1) + \ geom_point(aes(fill="month"), shape=21, size=3, color='#00441b', tooltips=layer_tooltips().title("@year")\ .line("month|@month")\ .format("@humidity", ".2f")\ .line("@|@humidity")\ .format("@{mean temperature}", ".2f")\ .line("mean temperature|@{mean temperature}")) + \ scale_fill_gradient(name="", low='#e5f5f9', high='#2ca25f') + \ facet_grid(x="year") + \ ylab("mean temperature") + \ ggtitle("Annual Path of Mean Temperature and Humidity") + \ ggsize(1000, 500) + \ theme_classic() # ### Autocorrelation Plots for Mean Temperature, Wind Speed and Humidity # In[14]: acf_df = pd.DataFrame([ (lag, df["mean temperature"].autocorr(lag=lag), df["wind speed"].autocorr(lag=lag), df.humidity.autocorr(lag=lag)) for lag in range(365 * 3) ], columns=["lag", "mean temperature acf", "wind speed acf", "humidity acf"]).melt( id_vars=["lag"], value_vars=["mean temperature acf", "wind speed acf", "humidity acf"], var_name="acf_type", value_name="acf_value" ) ggplot(acf_df, aes("lag", "acf_value")) + \ geom_point(aes(color="acf_value"), size=3) + \ scale_color_gradient(low='#fc8d59', high='#91cf60') + \ facet_grid(y="acf_type") + \ ylab("ACF value") + \ ggtitle("Autocorrelation Functions") + \ ggsize(1000, 600) + \ theme(legend_position='none')