import pandas as pd
from lets_plot import *
from lets_plot.mapping import as_discrete
LetsPlot.setup_html()
wind_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/pea_wind.csv")
records_count = wind_df.shape[0]
def is_float(x):
try:
float(x)
except ValueError:
return False
return True
wind_df = wind_df[wind_df['sped'].apply(lambda x: is_float(x))]
wind_df = wind_df[wind_df['drct'].apply(lambda x: is_float(x))]
wind_df['sped'] = wind_df['sped'].astype(float)
wind_df['drct'] = wind_df['drct'].astype(float)
wind_df = wind_df[wind_df['sped'].apply(lambda x: x >= 2.0)]
wind_df.head()
station | valid | drct | sped | |
---|---|---|---|---|
0 | PEA | 2002-05-17 00:14 | 330.0 | 8.05 |
1 | PEA | 2002-05-17 00:20 | 330.0 | 8.05 |
2 | PEA | 2002-05-17 00:34 | 340.0 | 10.35 |
3 | PEA | 2002-05-17 00:40 | 340.0 | 10.35 |
4 | PEA | 2002-05-17 00:54 | 340.0 | 14.95 |
# Compute calm
calm = 100 - wind_df.shape[0] / records_count * 100
# Define the speed bins
bins = [2, 5, 7, 10, 15, 20, float('inf')]
bin_ids = list(range(6))
wind_df['speed_group'] = pd.cut(wind_df['sped'], bins=bins, labels=bin_ids, right=False)
# Group by 'drct' and 'speed_group', and count the occurrences
grouped_counts = wind_df.groupby(['drct', 'speed_group'], observed=False).size().reset_index(name='count')
# Calculate the total number of observations in the dataset
total_observations = wind_df.shape[0]
# Calculate the percentage of each speed group within each direction relative to the total number of observations
grouped_counts['percentage_of_total'] = (grouped_counts['count'] / total_observations) * 100
ggplot(grouped_counts) + \
geom_bar(
aes('drct', 'percentage_of_total', fill=as_discrete('speed_group', order=1)),
size=0, width=.8,
stat='identity',
tooltips=layer_tooltips().format('^y', '{.2g}%').format('^x', '{}°')
) + \
geom_rect(
# Visually align the width of the rectangle with the bars - widen it by 5 (half a bar width)
xmin=5, xmax=365,
ymin=-1, ymax=0, fill='white', size=0
) + \
geom_hline(yintercept=0, size=2) + \
geom_text(x=180, y=-1, label=f'Calm\n{calm:.1f}%', hjust='middle', vjust='center', size='10') + \
scale_fill_manual(
name='Wind Speed (mph):',
values=['#002bff', '#03d3f8', '#7afe81', '#fde609', '#ff4404', '#780200'],
labels={
0: '2 - 4.9',
1: '5 - 6.9',
2: '7 - 9.9',
3: '10 - 14.9',
4: '15 - 19.9',
5: '20+'
},
) + \
scale_y_continuous(
breaks=[0, 1, 2, 3, 4, 5], # To not add automatically generated ticks for values outside of the data range
format='{}%'
) + \
scale_x_continuous(
labels={
45: 'NE',
90: 'E',
135: 'SE',
180: 'S',
225: 'SW',
270: 'W',
315: 'NW',
360: 'N',
},
) + \
labs(
title="Wind rose for [PEA] PELLA",
subtitle="Observations from 2002 to 2024",
caption="Data is provided by the Iowa Environmental Mesonet of Iowa State University"
) + \
ggsize(800, 800) + \
theme_minimal2() + \
theme(
plot_title=element_text(size=24, face='bold'),
plot_subtitle=element_text(size=18),
panel_grid_minor_x=element_line(),
panel_grid=element_line(color='#A0A0A0'),
axis_ticks_y=element_line(),
axis_text_x=element_text(size=18),
axis_title=element_blank()
) + \
coord_polar(
ylim=[-1, None], # -1 is to make inner circle
start=(3.14 * 2) / 36 / 2 # Divide by 2 (i.e. rotate by half a bar width) to make the N-S axis perpendicular
)