#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import numpy as np import os from glob import glob # In[2]: df_activity = pd.read_csv("activities.csv") # In[3]: def segmentation(x_data,y,overlap_rate,time_window): seg_data = [] overlap = int((1 - overlap_rate)*time_window) y_segmented_list = [] for i in range(0,x_data.shape[0],overlap): seg_data.append(x_data[i:i+time_window]) y_segmented_list.append(y) return seg_data,y_segmented_list # In[4]: def handle_missing_values(df): df['x']=df['x'].replace(0, np.nan) df['y']=df['y'].replace(0, np.nan) df['z']=df['z'].replace(0, np.nan) return df # In[5]: def load_data(csv_file): y_list = [] x_data_list = [] csv_df = pd.read_csv(csv_file) csv_df = handle_missing_values(csv_df) csv_df.dropna(inplace=True) x_data = csv_df.values act_id = get_act_id(int(os.path.splitext(os.path.basename(csv_files[0]))[0].replace("segment",""))) return x_data,act_id # In[6]: def get_act_id(seg_id): seg = df_activity[df_activity["segment_id"]==seg_id] activity_id = seg["activity_id"].values return int(activity_id) # In[7]: # get features (std,avg,max,min) def get_features(x_data): features = [] for i in range(x_data.shape[1]): # std features.append(x_data.T[i].std(ddof=0)) # avg features.append(np.average(x_data.T[i])) # max features.append(np.max(x_data.T[i])) # min features.append(np.min(x_data.T[i])) return features # In[8]: csv_files = glob("train/*") # In[9]: x_data,y = load_data(csv_files[0]) # # overroll window feature extraction # In[10]: x_feature = get_features(x_data) print(x_feature) # # feature extraction after segmentetion # In[11]: seg_data_list,y_seg_list = segmentation(x_data,y,overlap_rate=0.5,time_window=10) # In[12]: x_feature_list = [] for seg_data in seg_data_list: x_feature_list.append(get_features(seg_data)) # In[13]: for x_feature in zip(x_feature_list): print(x_feature)