import pandas as pd
url_prefix = "https://raw.githubusercontent.com/coderefinery/data-visualization-python/main/data/"
data_tromso = pd.read_csv(url_prefix + "tromso-monthly.csv")
data_oslo = pd.read_csv(url_prefix + "oslo-monthly.csv")
data_monthly = pd.concat([data_tromso, data_oslo], axis=0)
# let us print the combined result
data_monthly
name | station | date | max temperature | precipitation | min temperature | |
---|---|---|---|---|---|---|
0 | Tromso - Langnes | SN90490 | 10.2022 | 10.7 | 187.0 | -4.2 |
1 | Tromso - Langnes | SN90490 | 11.2022 | 8.5 | 41.5 | -7.0 |
2 | Tromso - Langnes | SN90490 | 12.2022 | 5.6 | 88.8 | -11.7 |
3 | Tromso - Langnes | SN90490 | 1.2023 | 7.7 | 111.4 | -13.9 |
4 | Tromso - Langnes | SN90490 | 2.2023 | 6.6 | 171.3 | -10.7 |
5 | Tromso - Langnes | SN90490 | 3.2023 | 4.5 | 157.0 | -15.1 |
6 | Tromso - Langnes | SN90490 | 4.2023 | 9.8 | 85.0 | -7.1 |
7 | Tromso - Langnes | SN90490 | 5.2023 | 17.7 | 101.2 | -4.6 |
8 | Tromso - Langnes | SN90490 | 6.2023 | 25.4 | 43.4 | -0.4 |
9 | Tromso - Langnes | SN90490 | 7.2023 | 26.7 | 14.0 | 6.0 |
10 | Tromso - Langnes | SN90490 | 8.2023 | 25.1 | 43.4 | 5.4 |
11 | Tromso - Langnes | SN90490 | 9.2023 | 19.3 | 163.7 | 0.3 |
12 | Tromso - Langnes | SN90490 | 10.2023 | 9.8 | 64.8 | -0.6 |
0 | Oslo - Blindern | SN18700 | 10.2022 | 17.1 | 82.9 | -0.4 |
1 | Oslo - Blindern | SN18700 | 11.2022 | 15.1 | 83.4 | -2.1 |
2 | Oslo - Blindern | SN18700 | 12.2022 | 6.5 | 85.5 | -14.6 |
3 | Oslo - Blindern | SN18700 | 1.2023 | 7.2 | 100.5 | -13.4 |
4 | Oslo - Blindern | SN18700 | 2.2023 | 10.2 | 46.0 | -9.4 |
5 | Oslo - Blindern | SN18700 | 3.2023 | 9.8 | 72.6 | -12.6 |
6 | Oslo - Blindern | SN18700 | 4.2023 | 19.8 | 99.7 | -4.7 |
7 | Oslo - Blindern | SN18700 | 5.2023 | 24.2 | 17.0 | -0.8 |
8 | Oslo - Blindern | SN18700 | 6.2023 | 31.8 | 39.9 | 4.6 |
9 | Oslo - Blindern | SN18700 | 7.2023 | 28.4 | 146.9 | 8.6 |
10 | Oslo - Blindern | SN18700 | 8.2023 | 24.5 | 259.8 | 9.8 |
11 | Oslo - Blindern | SN18700 | 9.2023 | 25.1 | 105.8 | 5.3 |
12 | Oslo - Blindern | SN18700 | 10.2023 | 17.1 | 7.3 | -0.7 |
# replace mm.yyyy to date format
data_monthly["date"] = pd.to_datetime(list(data_monthly["date"]), format="%m.%Y")
# print the first 5 rows
data_monthly.head()
name | station | date | max temperature | precipitation | min temperature | |
---|---|---|---|---|---|---|
0 | Tromso - Langnes | SN90490 | 2022-10-01 | 10.7 | 187.0 | -4.2 |
1 | Tromso - Langnes | SN90490 | 2022-11-01 | 8.5 | 41.5 | -7.0 |
2 | Tromso - Langnes | SN90490 | 2022-12-01 | 5.6 | 88.8 | -11.7 |
3 | Tromso - Langnes | SN90490 | 2023-01-01 | 7.7 | 111.4 | -13.9 |
4 | Tromso - Langnes | SN90490 | 2023-02-01 | 6.6 | 171.3 | -10.7 |
# print the last 5 rows
data_monthly.tail()
name | station | date | max temperature | precipitation | min temperature | |
---|---|---|---|---|---|---|
8 | Oslo - Blindern | SN18700 | 2023-06-01 | 31.8 | 39.9 | 4.6 |
9 | Oslo - Blindern | SN18700 | 2023-07-01 | 28.4 | 146.9 | 8.6 |
10 | Oslo - Blindern | SN18700 | 2023-08-01 | 24.5 | 259.8 | 9.8 |
11 | Oslo - Blindern | SN18700 | 2023-09-01 | 25.1 | 105.8 | 5.3 |
12 | Oslo - Blindern | SN18700 | 2023-10-01 | 17.1 | 7.3 | -0.7 |
# print all column titles - no parentheses here
data_monthly.columns
Index(['name', 'station', 'date', 'max temperature', 'precipitation', 'min temperature'], dtype='object')
# show which data types were detected
data_monthly.dtypes
name object station object date datetime64[ns] max temperature float64 precipitation float64 min temperature float64 dtype: object
# print table dimensions - no parentheses here
data_monthly.shape
(26, 6)
# print one column
data_monthly["max temperature"]
0 10.7 1 8.5 2 5.6 3 7.7 4 6.6 5 4.5 6 9.8 7 17.7 8 25.4 9 26.7 10 25.1 11 19.3 12 9.8 0 17.1 1 15.1 2 6.5 3 7.2 4 10.2 5 9.8 6 19.8 7 24.2 8 31.8 9 28.4 10 24.5 11 25.1 12 17.1 Name: max temperature, dtype: float64
# get some statistics
data_monthly["max temperature"].describe()
count 26.000000 mean 15.930769 std 8.345862 min 4.500000 25% 8.825000 50% 16.100000 75% 24.425000 max 31.800000 Name: max temperature, dtype: float64
# what was the maximum temperature?
data_monthly["max temperature"].max()
np.float64(31.8)
# print all rows where max temperature was above 20
data_monthly[data_monthly["max temperature"] > 20.0]
name | station | date | max temperature | precipitation | min temperature | |
---|---|---|---|---|---|---|
8 | Tromso - Langnes | SN90490 | 2023-06-01 | 25.4 | 43.4 | -0.4 |
9 | Tromso - Langnes | SN90490 | 2023-07-01 | 26.7 | 14.0 | 6.0 |
10 | Tromso - Langnes | SN90490 | 2023-08-01 | 25.1 | 43.4 | 5.4 |
7 | Oslo - Blindern | SN18700 | 2023-05-01 | 24.2 | 17.0 | -0.8 |
8 | Oslo - Blindern | SN18700 | 2023-06-01 | 31.8 | 39.9 | 4.6 |
9 | Oslo - Blindern | SN18700 | 2023-07-01 | 28.4 | 146.9 | 8.6 |
10 | Oslo - Blindern | SN18700 | 2023-08-01 | 24.5 | 259.8 | 9.8 |
11 | Oslo - Blindern | SN18700 | 2023-09-01 | 25.1 | 105.8 | 5.3 |
import altair as alt
# this is here for google colab to update altair
if not alt.__version__.startswith("5"):
%pip install altair==5.3.0
alt.Chart(data_monthly).mark_bar().encode(
x="date",
y="precipitation",
color="name",
)
alt.Chart(data_monthly).mark_bar().encode(
x="yearmonth(date):T",
y="precipitation",
color="name",
)
alt.Chart(data_monthly).mark_bar().encode(
x="yearmonth(date):T",
y="precipitation",
color="name",
column="name",
)
alt.Chart(data_monthly).mark_bar().encode(
x="yearmonth(date):T",
y="precipitation",
color="name",
xOffset="name",
)
alt.Chart(data_monthly).mark_bar().encode(
y="yearmonth(date):T",
x="precipitation",
color="name",
yOffset="name",
)
alt.Chart(data_monthly).mark_bar().encode(
y="yearmonth(date):T",
x=alt.X("precipitation").title("Precipitation (mm)"),
color="name",
yOffset="name",
)
alt.Chart(data_monthly).mark_area(opacity=0.5).encode(
x="yearmonth(date):T",
y="max temperature",
y2="min temperature",
color="name",
)
alt.Chart(data_monthly).mark_area(opacity=0.5).encode(
x="yearmonth(date):T",
y="max temperature",
y2="min temperature",
color="name",
column="name",
)
url_prefix = "https://raw.githubusercontent.com/coderefinery/data-visualization-python/main/data/"
data_tromso = pd.read_csv(url_prefix + "tromso-daily.csv")
data_oslo = pd.read_csv(url_prefix + "oslo-daily.csv")
data_daily = pd.concat([data_tromso, data_oslo], axis=0)
# replace dd.mm.yyyy to date format
data_daily["date"] = pd.to_datetime(list(data_daily["date"]), format="%d.%m.%Y")
# we are here only interested in the range december to may
data_daily = data_daily[
(data_daily["date"] > "2022-12-01") & (data_daily["date"] < "2023-05-01")
]
alt.Chart(data_daily).mark_bar().encode(
x="date",
y="snow depth",
column="name",
)
alt.Chart(data_daily).mark_bar().encode(
x="date",
y="snow depth",
color="max temperature",
column="name",
)
alt.Chart(data_daily).mark_bar().encode(
x="date",
y="snow depth",
color=alt.Color("max temperature").scale(scheme="plasma"),
column="name",
)
alt.Chart(data_daily).mark_circle().encode(
x="date",
y="snow depth",
color=alt.Color("max temperature").scale(scheme="plasma"),
column="name",
)