USGS dataset listing every wind turbine in the United States:
from collections import OrderedDict
import bearcart
import bokeh
import bokeh.plotting as bp
from bokeh.plotting import output_notebook
import folium
import ggplot as gg
from ggplot import ggplot
from IPython.html.widgets import interact
import matplotlib.pyplot as plt
import mpld3
import numpy as np
import pandas as pd
import vincent
%matplotlib inline
mpld3.enable_notebook()
bearcart.initialize_notebook()
vincent.initialize_notebook()
folium.initialize_notebook()
# axis_color = 'black'
axis_color = '#d0d0d0'
df = pd.read_csv('USGS_WindTurbine_201307_cleaned.csv')
df.head()
Unnamed: 0 | Unique ID | Site Name | Online Year | Turbine Manufacturer | Turbine Model | Tower Type | Turbine MW | Total Height | Tower/Hub Height | Blade Length | Rotor Diameter | Rotor Swept Area | Latitude-Decimal Degrees | Longitude-Decimal Degrees | State | County | Attribute Confidence | Location Confidence | WENDI Site Name | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 1836 | Sand Point | 2012 | Vestas | V39 | monopole | 0.500 | 59.5 | 40 | 19.5 | 39 | 1194.59 | 55.3436 | -160.4891 | AK | Aleutians East | 2 | 2 | unknown | ... |
1 | 1 | 1837 | Sand Point | 2012 | Vestas | V39 | monopole | 0.500 | 59.5 | 40 | 19.5 | 39 | 1194.59 | 55.3448 | -160.4906 | AK | Aleutians East | 2 | 2 | unknown | ... |
2 | 2 | 1838 | St. Paul Island | 2007 | Vestas | V27 | monopole | 0.225 | 50.5 | 37 | 13.5 | 27 | 572.55 | 57.1571 | -170.2352 | AK | Aleutians West | 2 | 2 | St. Paul Island | ... |
3 | 3 | 1839 | St. Paul Island | 1999 | Vestas | V27 | monopole | 0.225 | 50.5 | 37 | 13.5 | 27 | 572.55 | 57.1576 | -170.2359 | AK | Aleutians West | 2 | 2 | St. Paul Island | ... |
4 | 4 | 1840 | St. Paul Island | 2007 | Vestas | V27 | monopole | 0.225 | 50.5 | 37 | 13.5 | 27 | 572.55 | 57.1576 | -170.2373 | AK | Aleutians West | 2 | 2 | St. Paul Island | ... |
5 rows × 27 columns
ws = pd.read_table('CO_WS_2011_2012.txt')
ws = ws.set_index('Date & Time Stamp')
ws.index = ws.index.to_datetime()
ws.head()
WS1_50mMean | WS1_50mStdev | WS1_50mMax | WS1_50mMin | WS2_50mMean | WS2_50mStDev | WS2_50mMax | WS2_50mMin | WS3_30mMean | WS3_30mStDev | WS3_30mMax | WS3_30mMin | WS4_40mMean | WS4_40mStDev | WS4_40mMax | WS4_40mMin | WD1_49mMean | WD1_49mStDev | WD1_49mMax | WD1_49mMin | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2011-06-03 00:00:00 | 9.50 | 1.12 | 11.70 | 6.40 | 9.39 | 1.03 | 11.39 | 6.43 | 7.96 | 1.33 | 10.97 | 4.51 | 8.76 | 1.17 | 11.01 | 4.95 | 172 | 7 | 173 | 240 | ... |
2011-06-03 00:10:00 | 8.37 | 0.56 | 10.19 | 6.78 | 8.27 | 0.56 | 9.86 | 6.43 | 6.83 | 0.76 | 9.06 | 3.75 | 7.60 | 0.64 | 9.49 | 5.70 | 172 | 5 | 169 | 240 | ... |
2011-06-03 00:20:00 | 8.38 | 0.43 | 9.44 | 7.17 | 8.28 | 0.41 | 9.48 | 7.20 | 7.13 | 0.64 | 8.69 | 4.89 | 7.81 | 0.45 | 9.10 | 6.83 | 169 | 5 | 177 | 240 | ... |
2011-06-03 00:30:00 | 7.48 | 1.17 | 9.44 | 3.76 | 7.43 | 1.12 | 9.48 | 3.77 | 6.43 | 1.03 | 8.29 | 3.37 | 6.93 | 1.03 | 9.10 | 3.43 | 163 | 11 | 166 | 240 | ... |
2011-06-03 00:40:00 | 7.13 | 0.79 | 9.06 | 5.26 | 7.10 | 0.76 | 9.09 | 5.28 | 6.21 | 0.67 | 7.91 | 2.99 | 6.61 | 0.66 | 8.34 | 3.81 | 172 | 12 | 166 | 240 | ... |
5 rows × 24 columns
# Rotor Diameter vs. Turbine Manufacturer
mf_grouped = df.groupby('Turbine Manufacturer')
mean_grouped = mf_grouped.mean().dropna()
mean_rd = mean_grouped.sort('Rotor Diameter')['Rotor Diameter']
rotor_diam = vincent.Bar(mean_rd)
rotor_diam.axis_titles(x='Turbine Manufacturer', y='Rotor Diameter')
# The Hard Way
from vincent.axes import AxisProperties
from vincent.properties import PropertySet
from vincent.values import ValueRef
for axis in rotor_diam.axes:
axis.properties = AxisProperties()
for prop in ['ticks', 'axis', 'major_ticks', 'minor_ticks']:
setattr(axis.properties, prop, PropertySet(stroke=ValueRef(value=axis_color)))
axis.properties.title = PropertySet(font_size=ValueRef(value=20),
fill=ValueRef(value=axis_color))
axis.properties.labels = PropertySet(fill=ValueRef(value=axis_color))
rotor_diam.axes[0].properties.labels.angle = ValueRef(value=50)
rotor_diam.axes[0].properties.labels.align = ValueRef(value='left')
rotor_diam.axes[0].properties.title.dy = ValueRef(value=115)
rotor_diam.scales[2].range = ['#b48ead']
rotor_diam
# Total Turbine Count
turbine_ct = mf_grouped.count().dropna().sort('Unique ID', ascending=False)['Unique ID']
num_turbines = (vincent.Bar(turbine_ct[:25])
.axis_titles(x='Turbine Manufacturer',
y='Number of Turbines in the US')
.colors(range_=['#6a9fb5']))
# Shortcuts!
def lighten_axes(vis, x_offset=50):
(vis.common_axis_properties(color=axis_color, title_size=20)
.x_axis_properties(label_angle=50, label_align='left',
title_offset=x_offset)
.y_axis_properties(title_offset=-40))
# If Area Chart
# num_turbines.scales[0].type = 'ordinal'
lighten_axes(num_turbines)
num_turbines
# Turbine Count vs. Date
grouped_date = df.groupby(['Online Year', 'Turbine Manufacturer'])
by_year = grouped_date.count()['Unique ID'].reset_index()
by_year['Online Year'] = pd.to_datetime(by_year['Online Year'], coerce=True)
by_year = by_year.rename(columns={'Unique ID': 'Turbine Count'}).dropna()
by_year = by_year.pivot(index='Online Year', columns='Turbine Manufacturer', values='Turbine Count')
by_year = by_year[turbine_ct[:10].index.tolist()]
online_by_year = (vincent.StackedArea(by_year)
.axis_titles(x='Date', y='Turbine Count')
.legend(title='Turbine Manufacturer', text_color=axis_color)
.colors(range_=['#ac4142', '#d28445', '#f4bf75', '#90a959',
'#75b5aa', '#6a9fb5', '#aa759f', '#8f5536']))
lighten_axes(online_by_year, x_offset=30)
online_by_year
height_diam = (vincent.GroupedBar(mean_grouped[['Tower/Hub Height', 'Rotor Diameter']]
.sort(['Rotor Diameter', 'Tower/Hub Height'], ascending=False))
.axis_titles(x='Turbine Manufacturer', y='Meters')
.legend(title='Parameters', text_color=axis_color)
.colors(range_=['#f4bf75', '#75b5aa']))
lighten_axes(height_diam, 100)
height_diam
november_2011 = ws['2011-11-15':'2011-12-01']
ws_line = (vincent.Line(november_2011['WS1_50mMean'])
.axis_titles(x='Date', y='Wind Speed (m/s)')
.colors(range_=['#d28445']))
lighten_axes(ws_line, x_offset=30)
ws_line
# Rotor Diameter vs. Power
min_heights = df[df['Rotor Diameter'] > 10]
diameter_vs_mw = (vincent.Scatter(min_heights[['Turbine MW', 'Rotor Diameter']], iter_idx='Turbine MW')
.axis_titles(x='Power (MW)', y='Rotor Diameter (m)')
.colors(range_=['#75b5aa']))
lighten_axes(diameter_vs_mw, x_offset=30)
diameter_vs_mw