import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
d=pd.read_csv('DT.csv')
d0=d.groupby('turbine',as_index=False)['f1'].mean()
d1=d.groupby('turbine',as_index=False)['f1'].max()
d2=d.groupby('turbine',as_index=False)['f1'].min()
d3=d.groupby('turbine',as_index=False)['f'].mean()
d4=d.groupby('turbine',as_index=False)['f'].max()
d5=d.groupby('turbine',as_index=False)['f'].min()
x=np.array(d0['turbine'])
y=np.array(d0['f1'])
eh=np.array(d1['f1'])
el=np.array(d2['f1'])
y1=np.array(d3['f'])
eh1=np.array(d4['f'])
el1=np.array(d5['f'])
fig,ax=plt.subplots(figsize=(10,4),dpi=500)
plt.errorbar(x,y,linestyle='None',color='#098A63',marker='o',label='balanced')
plt.errorbar(x,y,[y-el,eh-y],linestyle='None',ecolor='#098A63',capsize=3)
plt.errorbar(x,y1,linestyle='None',color='#3F2B78',marker='o',label='imbalanced')
plt.errorbar(x,y1,[y1-el1,eh1-y1],linestyle='None',ecolor='#3F2B78',capsize=3)
plt.xticks(list(range(1,26)))
plt.xlabel('Turbine')
plt.ylabel('F1 score')
plt.legend(loc=4)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
d=pd.read_csv('DT.csv')
d0=d.groupby('fault',as_index=False)['f1'].mean()
d1=d.groupby('fault',as_index=False)['f1'].max()
d2=d.groupby('fault',as_index=False)['f1'].min()
d3=d.groupby('fault',as_index=False)['f'].mean()
d4=d.groupby('fault',as_index=False)['f'].max()
d5=d.groupby('fault',as_index=False)['f'].min()
x=np.array(list(range(1,15)))
y=np.array(d0['f1'])
eh=np.array(d1['f1'])
el=np.array(d2['f1'])
y1=np.array(d3['f'])
eh1=np.array(d4['f'])
el1=np.array(d5['f'])
fig,ax=plt.subplots(figsize=(10,4),dpi=500)
plt.errorbar(x,y,linestyle='None',color='#098A63',marker='o',label='balanced')
plt.errorbar(x,y,[y-el,eh-y],linestyle='None',ecolor='#098A63',capsize=3)
plt.errorbar(x,y1,linestyle='None',color='#3F2B78',marker='o',label='imbalanced')
plt.errorbar(x,y1,[y1-el1,eh1-y1],linestyle='None',ecolor='#3F2B78',capsize=3)
plt.xticks(range(1,15),sorted(d0['fault'].tolist(),key=int))
plt.xlabel('Turbine category')
plt.ylabel('F1 score')
plt.legend(loc=4)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
d=pd.read_csv('RF.csv')
d0=d.groupby('turbine',as_index=False)['f1'].mean()
d1=d.groupby('turbine',as_index=False)['f1'].max()
d2=d.groupby('turbine',as_index=False)['f1'].min()
d3=d.groupby('turbine',as_index=False)['f'].mean()
d4=d.groupby('turbine',as_index=False)['f'].max()
d5=d.groupby('turbine',as_index=False)['f'].min()
x=np.array(d0['turbine'])
y=np.array(d0['f1'])
eh=np.array(d1['f1'])
el=np.array(d2['f1'])
y1=np.array(d3['f'])
eh1=np.array(d4['f'])
el1=np.array(d5['f'])
fig,ax=plt.subplots(figsize=(10,4),dpi=500)
plt.errorbar(x,y,linestyle='None',color='#098A63',marker='o',label='balanced')
plt.errorbar(x,y,[y-el,eh-y],linestyle='None',ecolor='#098A63',capsize=3)
plt.errorbar(x,y1,linestyle='None',color='#3F2B78',marker='o',label='imbalanced')
plt.errorbar(x,y1,[y1-el1,eh1-y1],linestyle='None',ecolor='#3F2B78',capsize=3)
plt.xticks(list(range(1,26)))
plt.xlabel('Turbine')
plt.ylabel('F1 score')
plt.legend(loc=4)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
d=pd.read_csv('RF.csv')
d0=d.groupby('fault',as_index=False)['f1'].mean()
d1=d.groupby('fault',as_index=False)['f1'].max()
d2=d.groupby('fault',as_index=False)['f1'].min()
d3=d.groupby('fault',as_index=False)['f'].mean()
d4=d.groupby('fault',as_index=False)['f'].max()
d5=d.groupby('fault',as_index=False)['f'].min()
x=np.array(list(range(1,15)))
y=np.array(d0['f1'])
eh=np.array(d1['f1'])
el=np.array(d2['f1'])
y1=np.array(d3['f'])
eh1=np.array(d4['f'])
el1=np.array(d5['f'])
fig,ax=plt.subplots(figsize=(10,4),dpi=500)
plt.errorbar(x,y,linestyle='None',color='#098A63',marker='o',label='balanced')
plt.errorbar(x,y,[y-el,eh-y],linestyle='None',ecolor='#098A63',capsize=3)
plt.errorbar(x,y1,linestyle='None',color='#3F2B78',marker='o',label='imbalanced')
plt.errorbar(x,y1,[y1-el1,eh1-y1],linestyle='None',ecolor='#3F2B78',capsize=3)
plt.xticks(range(1,15),sorted(d0['fault'].tolist(),key=int))
plt.xlabel('Turbine category')
plt.ylabel('F1 score')
plt.legend(loc=4)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
d=pd.read_csv('knn.csv')
d0=d.groupby('turbine',as_index=False)['f1'].mean()
d1=d.groupby('turbine',as_index=False)['f1'].max()
d2=d.groupby('turbine',as_index=False)['f1'].min()
d3=d.groupby('turbine',as_index=False)['f'].mean()
d4=d.groupby('turbine',as_index=False)['f'].max()
d5=d.groupby('turbine',as_index=False)['f'].min()
x=np.array(d0['turbine'])
y=np.array(d0['f1'])
eh=np.array(d1['f1'])
el=np.array(d2['f1'])
y1=np.array(d3['f'])
eh1=np.array(d4['f'])
el1=np.array(d5['f'])
fig,ax=plt.subplots(figsize=(10,4),dpi=500)
plt.errorbar(x,y,linestyle='None',color='#098A63',marker='o',label='balanced')
plt.errorbar(x,y,[y-el,eh-y],linestyle='None',ecolor='#098A63',capsize=3)
plt.errorbar(x,y1,linestyle='None',color='#3F2B78',marker='o',label='imbalanced')
plt.errorbar(x,y1,[y1-el1,eh1-y1],linestyle='None',ecolor='#3F2B78',capsize=3)
plt.xticks(list(range(1,26)))
plt.xlabel('Turbine')
plt.ylabel('F1 score')
plt.legend(loc=4)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
d=pd.read_csv('knn.csv')
d0=d.groupby('fault',as_index=False)['f1'].mean()
d1=d.groupby('fault',as_index=False)['f1'].max()
d2=d.groupby('fault',as_index=False)['f1'].min()
d3=d.groupby('fault',as_index=False)['f'].mean()
d4=d.groupby('fault',as_index=False)['f'].max()
d5=d.groupby('fault',as_index=False)['f'].min()
x=np.array(list(range(1,15)))
y=np.array(d0['f1'])
eh=np.array(d1['f1'])
el=np.array(d2['f1'])
y1=np.array(d3['f'])
eh1=np.array(d4['f'])
el1=np.array(d5['f'])
fig,ax=plt.subplots(figsize=(10,4),dpi=500)
plt.errorbar(x,y,linestyle='None',color='#098A63',marker='o',label='balanced')
plt.errorbar(x,y,[y-el,eh-y],linestyle='None',ecolor='#098A63',capsize=3)
plt.errorbar(x,y1,linestyle='None',color='#3F2B78',marker='o',label='imbalanced')
plt.errorbar(x,y1,[y1-el1,eh1-y1],linestyle='None',ecolor='#3F2B78',capsize=3)
plt.xticks(range(1,15),sorted(d0['fault'].tolist(),key=int))
plt.xlabel('Turbine category')
plt.ylabel('F1 score')
plt.legend(loc=4)
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
d=pd.read_csv('knn-k.csv')
d0=d.groupby('turbine',as_index=False)['f1'].mean()
d1=d.groupby('turbine',as_index=False)['f1'].max()
d2=d.groupby('turbine',as_index=False)['f1'].min()
d3=d.groupby('turbine',as_index=False)['f'].mean()
d4=d.groupby('turbine',as_index=False)['f'].max()
d5=d.groupby('turbine',as_index=False)['f'].min()
x=np.array(d0['turbine'])
y=np.array(d0['f1'])
eh=np.array(d1['f1'])
el=np.array(d2['f1'])
y1=np.array(d3['f'])
eh1=np.array(d4['f'])
el1=np.array(d5['f'])
d=pd.read_csv('knn.csv')
d6=d.groupby('turbine',as_index=False)['f1'].mean()
d7=d.groupby('turbine',as_index=False)['f1'].max()
d8=d.groupby('turbine',as_index=False)['f1'].min()
y2=np.array(d6['f1'])
eh2=np.array(d7['f1'])
el2=np.array(d8['f1'])
fig,ax=plt.subplots(figsize=(10,4),dpi=500)
plt.errorbar(x,y2,linestyle='None',color='#098A63',marker='o',label='balanced')
plt.errorbar(x,y2,[y2-el2,eh2-y2],linestyle='None',ecolor='#098A63',capsize=3)
plt.errorbar(x,y,linestyle='None',color='#3F2B78',marker='o',label='imbalanced, without k optimisation')
plt.errorbar(x,y,[y-el,eh-y],linestyle='None',ecolor='#3F2B78',capsize=3)
plt.errorbar(x,y1,linestyle='None',color='C0',marker='o',label='imbalanced, with k optimisation')
plt.errorbar(x,y1,[y1-el1,eh1-y1],linestyle='None',ecolor='C0',capsize=3)
plt.xticks(list(range(1,26)))
plt.xlabel('Turbine')
plt.ylabel('F1 score')
plt.legend()
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
d=pd.read_csv('knn-k.csv')
d0=d.groupby('fault',as_index=False)['f1'].mean()
d1=d.groupby('fault',as_index=False)['f1'].max()
d2=d.groupby('fault',as_index=False)['f1'].min()
d3=d.groupby('fault',as_index=False)['f'].mean()
d4=d.groupby('fault',as_index=False)['f'].max()
d5=d.groupby('fault',as_index=False)['f'].min()
d=pd.read_csv('knn.csv')
d6=d.groupby('fault',as_index=False)['f1'].mean()
d7=d.groupby('fault',as_index=False)['f1'].max()
d8=d.groupby('fault',as_index=False)['f1'].min()
y2=np.array(d6['f1'])
eh2=np.array(d7['f1'])
el2=np.array(d8['f1'])
x=np.array(list(range(1,15)))
y=np.array(d0['f1'])
eh=np.array(d1['f1'])
el=np.array(d2['f1'])
y1=np.array(d3['f'])
eh1=np.array(d4['f'])
el1=np.array(d5['f'])
fig,ax=plt.subplots(figsize=(10,4),dpi=500)
plt.errorbar(x,y2,linestyle='None',color='#098A63',marker='o',label='balanced')
plt.errorbar(x,y2,[y2-el2,eh2-y2],linestyle='None',ecolor='#098A63',capsize=3)
plt.errorbar(x,y,linestyle='None',color='#3F2B78',marker='o',label='imbalanced, without k optimisation')
plt.errorbar(x,y,[y-el,eh-y],linestyle='None',ecolor='#3F2B78',capsize=3)
plt.errorbar(x,y1,linestyle='None',color='C0',marker='o',label='imbalanced, with k optimisation')
plt.errorbar(x,y1,[y1-el1,eh1-y1],linestyle='None',ecolor='C0',capsize=3)
plt.xticks(range(1,15),sorted(d0['fault'].tolist(),key=int))
plt.xlabel('Turbine category')
plt.ylabel('F1 score')
plt.legend()
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
d=pd.read_csv('knn-optimal.csv')
x=d['turbine']
y=d['k_p']
y1=d['k_r']
y2=d['k_f']
fig,ax=plt.subplots(figsize=(10,4),dpi=500)
plt.plot(x,y,color='c',label='k_precision',marker='o')
plt.plot(x,y1,color='#098A63',label='k_recall',marker='o')
plt.plot(x,y2,color='#3F2B78',label='k_F1-score',marker='o')
plt.xlabel('Turbine')
plt.ylabel('Optimal k value')
plt.xticks(range(1,26))
plt.legend()
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
d=pd.read_csv('knn-optimal.csv')
x=d['turbine']
y2=d['k_f']
fig,ax=plt.subplots(figsize=(10,4),dpi=500)
plt.plot(x,y2,color='#3F2B78',label='k_F1-score',marker='o')
plt.xlabel('Turbine')
plt.ylabel('Optimal k value')
plt.xticks(range(1,26))
plt.show()