from collections import OrderedDict import bearcart import bokeh import bokeh.plotting as bp from bokeh.plotting import output_notebook import folium import ggplot as gg from ggplot import ggplot from IPython.html.widgets import interact import matplotlib.pyplot as plt import mpld3 import numpy as np import pandas as pd import vincent %matplotlib inline mpld3.enable_notebook() bearcart.initialize_notebook() vincent.initialize_notebook() folium.initialize_notebook() # axis_color = 'black' axis_color = '#d0d0d0' df = pd.read_csv('USGS_WindTurbine_201307_cleaned.csv') df.head() ws = pd.read_table('CO_WS_2011_2012.txt') ws = ws.set_index('Date & Time Stamp') ws.index = ws.index.to_datetime() ws.head() # Rotor Diameter vs. Turbine Manufacturer mf_grouped = df.groupby('Turbine Manufacturer') mean_grouped = mf_grouped.mean().dropna() mean_rd = mean_grouped.sort('Rotor Diameter')['Rotor Diameter'] rotor_diam = vincent.Bar(mean_rd) rotor_diam.axis_titles(x='Turbine Manufacturer', y='Rotor Diameter') # The Hard Way from vincent.axes import AxisProperties from vincent.properties import PropertySet from vincent.values import ValueRef for axis in rotor_diam.axes: axis.properties = AxisProperties() for prop in ['ticks', 'axis', 'major_ticks', 'minor_ticks']: setattr(axis.properties, prop, PropertySet(stroke=ValueRef(value=axis_color))) axis.properties.title = PropertySet(font_size=ValueRef(value=20), fill=ValueRef(value=axis_color)) axis.properties.labels = PropertySet(fill=ValueRef(value=axis_color)) rotor_diam.axes[0].properties.labels.angle = ValueRef(value=50) rotor_diam.axes[0].properties.labels.align = ValueRef(value='left') rotor_diam.axes[0].properties.title.dy = ValueRef(value=115) rotor_diam.scales[2].range = ['#b48ead'] rotor_diam # Total Turbine Count turbine_ct = mf_grouped.count().dropna().sort('Unique ID', ascending=False)['Unique ID'] num_turbines = (vincent.Bar(turbine_ct[:25]) .axis_titles(x='Turbine Manufacturer', y='Number of Turbines in the US') .colors(range_=['#6a9fb5'])) # Shortcuts! def lighten_axes(vis, x_offset=50): (vis.common_axis_properties(color=axis_color, title_size=20) .x_axis_properties(label_angle=50, label_align='left', title_offset=x_offset) .y_axis_properties(title_offset=-40)) # If Area Chart # num_turbines.scales[0].type = 'ordinal' lighten_axes(num_turbines) num_turbines # Turbine Count vs. Date grouped_date = df.groupby(['Online Year', 'Turbine Manufacturer']) by_year = grouped_date.count()['Unique ID'].reset_index() by_year['Online Year'] = pd.to_datetime(by_year['Online Year'], coerce=True) by_year = by_year.rename(columns={'Unique ID': 'Turbine Count'}).dropna() by_year = by_year.pivot(index='Online Year', columns='Turbine Manufacturer', values='Turbine Count') by_year = by_year[turbine_ct[:10].index.tolist()] online_by_year = (vincent.StackedArea(by_year) .axis_titles(x='Date', y='Turbine Count') .legend(title='Turbine Manufacturer', text_color=axis_color) .colors(range_=['#ac4142', '#d28445', '#f4bf75', '#90a959', '#75b5aa', '#6a9fb5', '#aa759f', '#8f5536'])) lighten_axes(online_by_year, x_offset=30) online_by_year height_diam = (vincent.GroupedBar(mean_grouped[['Tower/Hub Height', 'Rotor Diameter']] .sort(['Rotor Diameter', 'Tower/Hub Height'], ascending=False)) .axis_titles(x='Turbine Manufacturer', y='Meters') .legend(title='Parameters', text_color=axis_color) .colors(range_=['#f4bf75', '#75b5aa'])) lighten_axes(height_diam, 100) height_diam november_2011 = ws['2011-11-15':'2011-12-01'] ws_line = (vincent.Line(november_2011['WS1_50mMean']) .axis_titles(x='Date', y='Wind Speed (m/s)') .colors(range_=['#d28445'])) lighten_axes(ws_line, x_offset=30) ws_line # Rotor Diameter vs. Power min_heights = df[df['Rotor Diameter'] > 10] diameter_vs_mw = (vincent.Scatter(min_heights[['Turbine MW', 'Rotor Diameter']], iter_idx='Turbine MW') .axis_titles(x='Power (MW)', y='Rotor Diameter (m)') .colors(range_=['#75b5aa'])) lighten_axes(diameter_vs_mw, x_offset=30) diameter_vs_mw (ggplot(gg.aes(x='Turbine MW', y='Rotor Swept Area'), data=min_heights[:500]) + gg.geom_point(color='#75b5aa', size=75) + gg.ggtitle("Rotor Swept Area vs. Power") + gg.xlab("Power (MW)") + gg.ylab("Rotor Swept Area (m^2)")) (ggplot(gg.aes(x='WS1_50mMean'), data=ws) + gg.geom_histogram(fill='#ac4142') + gg.ggtitle("Wind Speed Distribution") + gg.labs("Wind Speed (m)", "Freq")) molten = pd.melt(ws, value_vars=['WS1_50mMean', 'WS2_50mMean', 'WS3_30mMean', 'WS4_40mMean']) (ggplot(gg.aes(x='value'), data=molten) + gg.geom_histogram(fill='#aa759f') + gg.facet_wrap('variable') + gg.ggtitle("Wind Speed Distribution") + gg.labs("Wind Speed (m)", "Freq")) november_2011['Date'] = november_2011.index @interact def plot_gg_ws(column=november_2011.columns.tolist()): molten = pd.melt(november_2011, value_vars=[column], id_vars=['Date']) gg_wind_data = (ggplot(gg.aes(x='Date', y='value'), data=molten) + gg.geom_line(color="steelblue") + gg.stat_smooth(colour="red")) gg_wind_data.draw() output_notebook() import bokeh subset = min_heights[:1000] bp.figure(plot_width=800, background_fill= '#e5e5e5') plot_tools = "pan, wheel_zoom, box_zoom, reset, hover" bp.scatter(subset['Total Height'], subset['Rotor Swept Area'], tools=plot_tools, size=10, color='purple', alpha=1.0) bp.curplot().title = "Total Height (m) vs. Rotor Swept Area (m^2)" bp.xaxis().axis_label = "Rotor Swept Area (m^2)" bp.yaxis().axis_label = "Following" hover = [t for t in bp.curplot().tools if isinstance(t, bokeh.objects.HoverTool)][0] hover.tooltips = {"(Rotor Swept Area, Total Height)": "(@x, @y)"} bp.show() by_year.dropna() built_by_year = bearcart.Chart(by_year, palette='spectrum2000', height=500, width=750, plt_type='bar') built_by_year all_ws = bearcart.Chart(november_2011[['WS1_50mMean', 'WS2_50mMean', 'WS3_30mMean', 'WS4_40mMean']], height=500, width=750, plt_type='area') all_ws all_wd = bearcart.Chart(november_2011[['WD1_49mMean', 'WD2_38mMean']], height=500, width=750, plt_type='bar', colors={'WD1_49mMean': '#75b5aa', 'WD2_38mMean': '#aa759f'}) all_wd import seaborn as sb sb.set(rc={'text.color': axis_color, 'axes.labelcolor': axis_color}) rvm = df[['Rotor Diameter', 'Turbine MW', 'Turbine Manufacturer']].dropna() row_mask = rvm['Turbine Manufacturer'].isin(['Vestas', 'Siemens', 'GE']) rvm = rvm[row_mask] rvm['Rotor Diameter'] = rvm['Rotor Diameter'].apply(lambda x: int(5 * round(float(x)/5))) grouped_rvm = rvm.groupby(['Rotor Diameter', 'Turbine Manufacturer']) rvm = grouped_rvm.mean().reset_index() sb.lmplot("Rotor Diameter", "Turbine MW", col="Turbine Manufacturer", hue="Turbine Manufacturer", data=rvm) sb.set_palette('husl') nov_cols = november_2011.columns.tolist() @interact def sb_jointplot(x=nov_cols, y=nov_cols): sb.jointplot(x, y, data=november_2011, size=9) sb.jointplot('WD1_49mMean', 'WD2_38mMean', data=november_2011, size=9, kind='kde') import scipy.stats as stats dists = ['Rayleigh', 'Weibull', 'Normal'] @interact def plot_sb_dist(column=ws.columns.tolist(), dist=dists): plt.figure(figsize=(10, 8)) dist_map = { 'Rayleigh': stats.rayleigh, 'Weibull': stats.exponweib, 'Normal': stats.norm, } sb.distplot(ws[column], fit=dist_map[dist]) state_geo = r'us-states.json' state_data = df.groupby('State').count().drop('State', axis=1).reset_index() state_map = folium.Map(location=[48, -102], zoom_start=3) state_map.geo_json(geo_path=state_geo, data=state_data, columns=['State', 'Unique ID'], key_on='feature.id', fill_color='BuPu', fill_opacity=0.7, line_opacity=0.2, legend_name='Turbine Count Per State') state_map.create_map('turbines_per_state.html') state_map Solano = df[df['Site Name'].isin(['Solano Wind 1', 'Solano Wind 3'])] solano_map = folium.Map(location=[38.1067, -121.7735], zoom_start=12, tiles='Stamen Terrain') solano_map.lat_lng_popover() for row in Solano.iterrows(): wtg_id = str(row[1]['Unique ID']) lat = row[1]['Latitude-Decimal Degrees'] lng = row[1]['Longitude-Decimal Degrees'] solano_map.polygon_marker(location=[lat, lng], fill_color='#f4bf75', radius=12, popup='WTG ID: ' + wtg_id) solano_map.create_map('solano_map.html') solano_map.render_iframe = True solano_map # Let's do a little more mapping # Source: http://wind.nrel.gov/Web_nrel/ wind_map = folium.Map(location=[45.48, -121.7], zoom_start=10, tiles='Stamen Terrain') locations = { 'df_26512': (45.38, -121.52), 'df_26795': (45.48, -121.72), 'df_26798': (45.48, -121.62) } data = { 'df_26512': pd.read_csv('26512.csv'), 'df_26795': pd.read_csv('26795.csv'), 'df_26798': pd.read_csv('26798.csv') } charts = { 'df_26512': None, 'df_26795': None, 'df_26798': None } for k, df in data.items(): df['Date'] = pd.to_datetime(df['Date(YYYY-MM-DD hh:mm:ss)']) data[k] = df.drop('Date(YYYY-MM-DD hh:mm:ss)', axis=1).set_index('Date') data[k] = data[k]['2006-12-01': '2006-12-31'] for k, chart in charts.items(): charts[k] = (vincent.Line(data[k]['100m wind speed (m/s)'], width=400, height=200) .colors(range_=['#6a9fb5'])) charts[k].axes[0].ticks = 4 charts[k].axis_titles(x='Date', y='100m wind speed (m/s)') path = ''.join([k, '.json']) charts[k].to_json(path) wind_map.polygon_marker(location=locations[k], fill_color='#6a9fb5', radius=12, popup=(charts[k], path)) wind_map.create_map('wind_map.html') wind_map.render_iframe = True wind_map from IPython.core.display import HTML styles = open("custom_dark.css", "r").read() HTML(styles)