# -*- coding: utf-8 -*-
"""
@author: Nithiya M Streethran
"""
import pandas as pd #import libraries
import numpy as np
import itertools
import matplotlib.pyplot as plt
%matplotlib inline
#import data
df=pd.read_csv('C:/Users/nithi/Google Drive/Python/Student Data/SCADA_and_downtime.csv',skip_blank_lines=True)
#list1=list(df['turbine_id'].unique()) #list of turbines to plot
#list1=sorted(list1,key=int) #sort turbines in ascending order
list1=[1]
list2=list(df['TurbineCategory_id'].unique()) #list of categories to plot
list2=[g for g in list2 if g>=0] #remove NaN from list
list2=[a.astype(np.int64) for a in list2] #round from float to integer
list2=sorted(list2,key=int) #sort categories in ascending order
list2=[b for b in list2 if b not in (1,12,13,14,15,17,21,22)] #categories to remove from plot
list3=list(itertools.product(list1,list2))
for (x,y) in list3:
df2x=df[(df['turbine_id']==x)].copy() #filter only data for turbine x
df2x=df2x.sort_values(by="timestamp",ascending=False) #sort values by timestamp in descending order
def f(c): #copying fault to new column (mins) (fault when turbine category id is y)
if c['TurbineCategory_id']==y:
return 0
else:
return 1
df2x['mins']=df2x.apply(f,axis=1)
df2x.reset_index(drop=True,inplace=True) #reset index
if df2x.loc[0,'mins']==0: #assigning value to first cell if it's not 0
df2x.set_value(0,'mins',0)
else:
df2x.set_value(0,'mins',999999999)
for i,e in enumerate(df2x['mins']): #using previous value's row to evaluate time
if e==1:
df2x.at[i,'mins']=df2x.at[i-1,'mins']+10
df2x=df2x.sort_values(by="timestamp") #sort in ascending order
df2x.reset_index(drop=True,inplace=True) #reset index
df2x['hours']=df2x['mins'].astype(np.int64) #convert to hours and round to nearest hour
df2x['hours']=df2x['hours']/60
df2x['hours']=round(df2x['hours']).astype(np.int64)
def f1(c): #>48 hours - label as normal (9999)
if c['hours']>48:
return 9999
else:
return c['hours']
df2x['hours']=df2x.apply(f1,axis=1)
def f2(c): #filter out curtailment - curtailed when turbine is pitching outside 0deg<= normal <=3.5deg
if 0<=c['pitch']<=3.5 or c['hours']!=9999 or ((c['pitch']>3.5 or c['pitch']<0) and
(c['ap_av']<=(.1*df2x['ap_av'].max())
or c['ap_av']>=(.9*df2x['ap_av'].max()))):
return 'normal'
else:
return 'curtailed'
df2x['curtailment']=df2x.apply(f2,axis=1)
def f3(c): #filter unusual readings, i.e. for normal operation, power<=0 in operating wind speeds, power>100...
#before cut-in, runtime<600
if c['hours']==9999 and ((3<c['ws_av']<25 and (c['ap_av']<=0 or c['runtime']<600 or
c['EnvironmentalCategory_id']>1 or
c['GridCategory_id']>1 or c['InfrastructureCategory_id']>1 or
c['AvailabilityCategory_id']==2 or
12<=c['TurbineCategory_id']<=15 or
21<=c['TurbineCategory_id']<=22)) or
(c['ws_av']<3 and c['ap_av']>100)):
return 'unusual'
else:
return 'normal'
df2x['unusual']=df2x.apply(f3,axis=1)
#filter data for plots
df3=df2x[df2x.hours==9999] #normal w/ curtailment (all data)
df4=df2x[df2x.hours!=9999] #before fault
df4=df4[df4.hours!=0]
df5=df2x[df2x.hours==0] #fault
df6=df3[df3.curtailment=='normal'] #normal w/o curtailment
df7=df6[df6.unusual=='normal'] #normal w/o curtailment and unusual readings
#get x and y coordinates
x1=df3['ws_av'] #normal w/ curtailment
y1=df3['ap_av']
x2=df4['ws_av'] #before fault
y2=df4['ap_av']
x3=df5['ws_av'] #faulty
y3=df5['ap_av']
x4=df6['ws_av'] #normal w/o curtailment
y4=df6['ap_av']
x5=df7['ws_av'] #normal w/o curtailment and unusual readings
y5=df7['ap_av']
fig=plt.figure(figsize=(18.5,4.5),dpi=1500)
ax1=fig.add_subplot(131)
ax1.scatter(x1,y1,c='#098A63',label='normal',marker=".")
ax1.scatter(x2,y2,c='#3F2B78',label='before fault',marker=".")
ax1.scatter(x3,y3,c='c',label='faulty',marker=".")
ax1.legend()
plt.xlabel('Wind speed (m/s)')
plt.ylabel('Average active power (kW)')
plt.title('all data points')
ax2=fig.add_subplot(132)
ax2.scatter(x4,y4,c='#098A63',marker=".")
ax2.scatter(x2,y2,c='#3F2B78',marker=".")
ax2.scatter(x3,y3,c='c',marker=".")
plt.xlabel('Wind speed (m/s)')
plt.ylabel('Average active power (kW)')
plt.title('w/o curtailment')
ax3=fig.add_subplot(133)
ax3.scatter(x5,y5,c='#098A63',marker=".")
ax3.scatter(x2,y2,c='#3F2B78',marker=".")
ax3.scatter(x3,y3,c='c',marker=".")
plt.xlabel('Wind speed (m/s)')
plt.ylabel('Average active power (kW)')
plt.title('w/o curtailment and anomalies')
fig.suptitle('Power curves for turbine %s'%x+' with turbine category %s'%y)
plt.tight_layout()
plt.subplots_adjust(top=0.88)
plt.show()