Time Series Visualizations

This notebook demonstrates how to use Lets-Plot to investigate time series.

The data is provided by Kaggle.

In [1]:
import pandas as pd

from lets_plot import *
from lets_plot.mapping import as_discrete
LetsPlot.setup_html()

Preparation

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/delhi_climate.csv")
In [3]:
df = df.rename(columns={"meantemp": "mean temperature", "wind_speed": "wind speed"})
df.date = pd.to_datetime(df.date)
df["day"] = df.date.dt.day
df["month"] = df.date.dt.month
df["year"] = df.date.dt.year
df = df[df.year < 2017]

General Information

In [4]:
ggplot(df, aes("date", "mean temperature")) + \
    geom_line(aes(group="year", color=as_discrete("year")), size=1) + \
    scale_x_datetime(breaks=df[df.date.dt.day == 1].date, format="%b %Y") + \
    facet_grid(x="year", scales='free') + \
    ggtitle("Mean Temperature Along Period Under Review") + \
    ggsize(1000, 500) + \
    theme(legend_position='bottom')
Out[4]:
In [5]:
p1 = ggplot() + \
    geom_boxplot(aes(x="year", y="mean temperature", \
                     fill=as_discrete("year")), \
                 data=df, size=2, alpha=.5) + \
    scale_x_discrete(name="year") + \
    ggtitle("Mean Temperature Aggregated") + \
    theme(legend_position='bottom', panel_grid='blank')
p2 = ggplot() + \
    geom_boxplot(aes(x="month", y="mean temperature", \
                     fill=as_discrete("year")), \
                 data=df, size=.75, alpha=.5) + \
    scale_x_continuous(breaks=list(range(1, 13))) + \
    facet_grid(x="year") + \
    ggtitle("Mean Temperature by Month") + \
    theme(legend_position='none', panel_grid='blank')

w, h = 1000, 300
bunch = GGBunch()
bunch.add_plot(p1, 0, 0, w, h)
bunch.add_plot(p2, 0, h, w, h)
bunch.show()

Year-to-Year Temperature Comparison

In [6]:
ggplot(df, aes("day", "mean temperature")) + \
    geom_line(aes(group="year", color=as_discrete("year")), size=2, \
              tooltips=layer_tooltips().title("@year")\
                                       .format("@{mean temperature}", ".2f")\
                                       .line("@|@{mean temperature}")\
                                       .line("date|@month/@day/@year")) + \
    scale_x_continuous(breaks=list(range(1, 32))) + \
    facet_grid(y="month", scales='free') + \
    ylab("month") + \
    ggtitle("Mean Temperature for Each Month") + \
    theme(legend_position='bottom')
Out[6]:

Most Common Temperature Values

In [7]:
ggplot(df, aes(x="mean temperature")) + \
    geom_histogram(aes(group="year", fill=as_discrete("year")), \
                   color='black', bins=15, size=.5, alpha=.5, \
                   tooltips=layer_tooltips().line("count|@..count..")\
                                            .format("@{mean temperature}", ".2f")\
                                            .line("@|@{mean temperature}")\
                                            .line("@|@month")\
                                            .line("@|@year")) + \
    facet_grid(x="month", y="year") + \
    xlab("month") + ylab("year") + \
    ggtitle("Most Common Temperature") + \
    ggsize(1000, 500) + \
    theme_classic() + theme(legend_position='bottom')
Out[7]:
In [8]:
int_mean_temp_df = df[["mean temperature", "month", "year"]].copy()
int_mean_temp_df["mean temperature"] = int_mean_temp_df["mean temperature"].astype(int)

ggplot(int_mean_temp_df, aes("month", "mean temperature", fill="mean temperature")) + \
    geom_bin2d(stat='identity', size=.5, color='white', alpha=.2,
               tooltips=layer_tooltips().format("@{mean temperature}", ".2f")\
                                        .line("@|@{mean temperature}")\
                                        .format("@month", "d")
                                        .line("@|@month")\
                                        .title("@year")) + \
    scale_x_continuous(breaks=list(range(1, 13))) + \
    scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
    facet_grid(x="year") + \
    coord_fixed(ratio=.5) + \
    xlab("") + \
    ggtitle("Heatmap of Temperatures by Year") + \
    ggsize(1000, 500) + \
    theme_classic() + theme(legend_position='bottom')
Out[8]:

Observing Mean Temperature and Wind Speed Correlation

In [9]:
ggplot(df, aes("wind speed", y="mean temperature")) + \
    geom_point(aes(color="mean temperature", fill="mean temperature"), \
               shape=21, size=3, alpha=.2) + \
    scale_color_gradient(low='#abd9e9', high='#d7191c') + \
    scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
    facet_grid(x="year") + \
    ggtitle("Relation Between Mean Temperature and Wind Speed") + \
    ggsize(1000, 500) + \
    theme_classic()
Out[9]:

Observing Mean Temperature and Humidity Correlation

In [10]:
ggplot(df, aes("humidity", "mean temperature")) + \
    geom_point(aes(color="humidity", fill="humidity"), \
               shape=21, size=3, alpha=.2) + \
    scale_color_gradient(low='#fdae61', high='#2c7bb6') + \
    scale_fill_gradient(low='#fdae61', high='#2c7bb6') + \
    facet_grid(x="year") + \
    ggtitle("Relation Between Mean Temperature and Humidity") + \
    ggsize(1000, 500) + \
    theme_classic()
Out[10]:

In Search of Correlation on Lag Scatter Plots

In [11]:
df_shifted_by_day = df[["mean temperature", "year"]].copy()
df_shifted_by_day["shifted mean temperature"] = df["mean temperature"].shift(-1)
df_shifted_by_day = df_shifted_by_day.dropna()

p1 = ggplot(df_shifted_by_day, aes("mean temperature", "shifted mean temperature")) + \
    geom_point(aes(color="mean temperature", fill="mean temperature"), \
               shape=21, size=3, alpha=.2) + \
    scale_color_gradient(low='#abd9e9', high='#d7191c') + \
    scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
    facet_grid(x="year") + \
    coord_fixed(ratio=1) + \
    ggtitle("One Day Lag Scatter Plot") + \
    theme_classic()

df_shifted_by_month = df[["mean temperature", "year"]].copy()
df_shifted_by_month["shifted mean temperature"] = df["mean temperature"].shift(-30)
df_shifted_by_month = df_shifted_by_month.dropna()

p2 = ggplot(df_shifted_by_month, aes("mean temperature", "shifted mean temperature")) + \
    geom_point(aes(color="mean temperature", fill="mean temperature"), \
               shape=21, size=3, alpha=.2) + \
    scale_color_gradient(low='#abd9e9', high='#d7191c') + \
    scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
    facet_grid(x="year") + \
    coord_fixed(ratio=1) + \
    ggtitle("One Month Lag Scatter Plot") + \
    theme_classic()

df_shifted_by_year = df[["mean temperature", "year"]].copy()
df_shifted_by_year["shifted mean temperature"] = df["mean temperature"].shift(-365)
df_shifted_by_year = df_shifted_by_year.dropna()[:-1]

p3 = ggplot(df_shifted_by_year, aes("mean temperature", "shifted mean temperature")) + \
    geom_point(aes(color="mean temperature", fill="mean temperature"), \
               shape=21, size=3, alpha=.2) + \
    scale_color_gradient(low='#abd9e9', high='#d7191c') + \
    scale_fill_gradient(low='#abd9e9', high='#d7191c') + \
    facet_grid(x="year") + \
    coord_fixed(ratio=1) + \
    ggtitle("One Year Lag Scatter Plot") + \
    theme_classic()

w, h = 1000, 300
bunch = GGBunch()
bunch.add_plot(p1, 0, 0, w, h)
bunch.add_plot(p2, 0, h, w, h)
bunch.add_plot(p3, 0, 2 * h, w, h)
bunch.show()

Annual Path of Mean Temperature and Humidity

In [12]:
mean_df = df.groupby(by=["year", "month"]).mean()[["mean temperature", "humidity"]].reset_index()

ggplot(mean_df, aes("humidity", "mean temperature")) + \
    geom_path(color='#99d8c9', size=1) + \
    geom_point(aes(fill="month"), shape=21, size=3, color='#00441b',
               tooltips=layer_tooltips().title("@year")\
                                        .line("month|@month")\
                                        .format("@humidity", ".2f")\
                                        .line("@|@humidity")\
                                        .format("@{mean temperature}", ".2f")\
                                        .line("mean temperature|@{mean temperature}")) + \
    scale_fill_gradient(name="", low='#e5f5f9', high='#2ca25f') + \
    facet_grid(x="year") + \
    ylab("mean temperature") + \
    ggtitle("Annual Path of Mean Temperature and Humidity") + \
    ggsize(1000, 500) + \
    theme_classic()
Out[12]:

Autocorrelation Plots for Mean Temperature, Wind Speed and Humidity

In [13]:
acf_df = pd.DataFrame([
    (lag, df["mean temperature"].autocorr(lag=lag), df["wind speed"].autocorr(lag=lag), df.humidity.autocorr(lag=lag))
    for lag in range(365 * 3)
], columns=["lag", "mean temperature acf", "wind speed acf", "humidity acf"]).melt(
    id_vars=["lag"],
    value_vars=["mean temperature acf", "wind speed acf", "humidity acf"],
    var_name="acf_type", value_name="acf_value"
)

ggplot(acf_df, aes("lag", "acf_value")) + \
    geom_point(aes(color="acf_value"), size=3) + \
    scale_color_gradient(low='#fc8d59', high='#91cf60') + \
    facet_grid(y="acf_type") + \
    ylab("ACF value") + \
    ggtitle("Autocorrelation Functions") + \
    ggsize(1000, 600) + \
    theme(legend_position='none')
Out[13]: