#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import numpy as np import os from glob import glob from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split # In[29]: from ngboost import NGBClassifier from ngboost.distns import Bernoulli,k_categorical from ngboost.learners import default_tree_learner # In[2]: from sklearn.metrics import classification_report from display_results import confusion_matrix # In[3]: df_activity = pd.read_csv("activities.csv") # In[4]: def segmentation(x_data,y,overlap_rate,time_window): seg_data = [] overlap = int((1 - overlap_rate)*time_window) y_segmented_list = [] for i in range(0,x_data.shape[0],overlap): seg_data.append(x_data[i:i+time_window]) y_segmented_list.append(y) return seg_data,y_segmented_list # In[5]: def handle_missing_values(df): df['x']=df['x'].replace(0, np.nan) df['y']=df['y'].replace(0, np.nan) df['z']=df['z'].replace(0, np.nan) return df # In[47]: change_act_id = {2:0, 3:1, 4:2, 6:3, 9:4, 12:5} # In[45]: def get_act_id(seg_id): seg = df_activity[df_activity["segment_id"]==seg_id] activity_id = seg["activity_id"].values return change_act_id[int(activity_id)] # In[46]: def load_data(csv_file): y_list = [] x_data_list = [] csv_df = pd.read_csv(csv_file) csv_df = handle_missing_values(csv_df) csv_df.dropna(inplace=True) x_data = csv_df.values act_id = get_act_id(int(os.path.splitext(os.path.basename(csv_file))[0].replace("segment",""))) return x_data,act_id # In[48]: def get_features(x_data): features = [] for i in range(x_data.shape[1]): # std features.append(x_data.T[i].std(ddof=0)) # avg features.append(np.average(x_data.T[i])) # max features.append(np.max(x_data.T[i])) # min features.append(np.min(x_data.T[i])) return features # In[42]: csv_files = glob("train/*") # In[43]: X_feature_data_list = [] y_list = [] for csv_file in csv_files: x,y = load_data(csv_file) X_feature_data_list.append(get_features(x)) y_list.append(y) # In[51]: X_train,X_test,y_train,y_test = train_test_split(X_feature_data_list,y_list,test_size=0.3) # In[52]: model_ml = RandomForestClassifier(n_estimators=500,n_jobs=-1) # In[53]: model_ml.fit(X_train,y_train) y_predict = model_ml.predict(X_test) # In[54]: print(classification_report(y_test,y_predict)) # In[55]: print(confusion_matrix(y_test, y_predict)) # In[58]: ngb = NGBClassifier(Dist = k_categorical(6)) ngb.fit(X_train, y_train) # In[59]: y_predict_ngboost =ngb.predict(X_test) # In[60]: print(classification_report(y_predict_ngboost,y_test)) # In[61]: confusion_matrix(y_predict_ngboost,y_test)