#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd url_prefix = "https://raw.githubusercontent.com/coderefinery/data-visualization-python/main/data/" data_tromso = pd.read_csv(url_prefix + "tromso-monthly.csv") data_oslo = pd.read_csv(url_prefix + "oslo-monthly.csv") data_monthly = pd.concat([data_tromso, data_oslo], axis=0) # let us print the combined result data_monthly # In[2]: # replace mm.yyyy to date format data_monthly["date"] = pd.to_datetime(list(data_monthly["date"]), format="%m.%Y") # In[3]: # print the first 5 rows data_monthly.head() # In[4]: # print the last 5 rows data_monthly.tail() # In[5]: # print all column titles - no parentheses here data_monthly.columns # In[6]: # show which data types were detected data_monthly.dtypes # In[7]: # print table dimensions - no parentheses here data_monthly.shape # In[8]: # print one column data_monthly["max temperature"] # In[9]: # get some statistics data_monthly["max temperature"].describe() # In[10]: # what was the maximum temperature? data_monthly["max temperature"].max() # In[11]: # print all rows where max temperature was above 20 data_monthly[data_monthly["max temperature"] > 20.0] # In[12]: import altair as alt # In[13]: # this is here for google colab to update altair if not alt.__version__.startswith("5"): get_ipython().run_line_magic('pip', 'install altair==5.3.0') # In[14]: alt.Chart(data_monthly).mark_bar().encode( x="date", y="precipitation", color="name", ) # In[15]: alt.Chart(data_monthly).mark_bar().encode( x="yearmonth(date):T", y="precipitation", color="name", ) # In[16]: alt.Chart(data_monthly).mark_bar().encode( x="yearmonth(date):T", y="precipitation", color="name", column="name", ) # In[17]: alt.Chart(data_monthly).mark_bar().encode( x="yearmonth(date):T", y="precipitation", color="name", xOffset="name", ) # In[18]: alt.Chart(data_monthly).mark_bar().encode( y="yearmonth(date):T", x="precipitation", color="name", yOffset="name", ) # In[19]: alt.Chart(data_monthly).mark_bar().encode( y="yearmonth(date):T", x=alt.X("precipitation").title("Precipitation (mm)"), color="name", yOffset="name", ) # In[20]: alt.Chart(data_monthly).mark_area(opacity=0.5).encode( x="yearmonth(date):T", y="max temperature", y2="min temperature", color="name", ) # In[21]: alt.Chart(data_monthly).mark_area(opacity=0.5).encode( x="yearmonth(date):T", y="max temperature", y2="min temperature", color="name", column="name", ) # In[22]: url_prefix = "https://raw.githubusercontent.com/coderefinery/data-visualization-python/main/data/" data_tromso = pd.read_csv(url_prefix + "tromso-daily.csv") data_oslo = pd.read_csv(url_prefix + "oslo-daily.csv") data_daily = pd.concat([data_tromso, data_oslo], axis=0) # In[23]: # replace dd.mm.yyyy to date format data_daily["date"] = pd.to_datetime(list(data_daily["date"]), format="%d.%m.%Y") # we are here only interested in the range december to may data_daily = data_daily[ (data_daily["date"] > "2022-12-01") & (data_daily["date"] < "2023-05-01") ] # In[24]: alt.Chart(data_daily).mark_bar().encode( x="date", y="snow depth", column="name", ) # In[25]: alt.Chart(data_daily).mark_bar().encode( x="date", y="snow depth", color="max temperature", column="name", ) # In[26]: alt.Chart(data_daily).mark_bar().encode( x="date", y="snow depth", color=alt.Color("max temperature").scale(scheme="plasma"), column="name", ) # In[27]: alt.Chart(data_daily).mark_circle().encode( x="date", y="snow depth", color=alt.Color("max temperature").scale(scheme="plasma"), column="name", )