import ast
import pandas as pd
import datetime
from keras.layers import Input, Dense, Embedding, merge, Flatten, Merge, BatchNormalization
from keras.models import Model, load_model
from keras.regularizers import l2
import keras.backend as K
from keras.optimizers import SGD
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth
import utils
import data
from sklearn.model_selection import train_test_split
from bcolz_array_iterator import BcolzArrayIterator
import bcolz
from keras_tqdm import TQDMNotebookCallback
from keras.callbacks import ModelCheckpoint
Using Theano backend. Using gpu device 1: GeForce GTX TITAN X (CNMeM is enabled with initial size: 80.0% of memory, cuDNN 5110) /home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5. warnings.warn(warn)
Below path is a shared directory, swap to own
data_path = "/data/datasets/taxi/"
Original repo used some bizarre tuple method of reading in data to save in a hdf5 file using fuel. The following does the same approach in that module, only using pandas and saving in a bcolz format (w/ training data as example)
meta = pd.read_csv(data_path+'metaData_taxistandsID_name_GPSlocation.csv', header=0)
meta.head()
ID | Descricao | Latitude | Longitude | |
---|---|---|---|---|
0 | 1 | Agra | 41.177146 | -8.609670 |
1 | 2 | Alameda | 41.156190 | -8.591064 |
2 | 3 | Aldoar | 41.170525 | -8.665876 |
3 | 4 | Alfândega | 41.143764 | -8.621803 |
4 | 5 | Amial | 41.183510 | -8.612726 |
train = pd.read_csv(data_path+'train/train.csv', header=0)
train.head()
TRIP_ID | CALL_TYPE | ORIGIN_CALL | ORIGIN_STAND | TAXI_ID | TIMESTAMP | DAY_TYPE | MISSING_DATA | POLYLINE | |
---|---|---|---|---|---|---|---|---|---|
0 | 1372636858620000589 | C | NaN | NaN | 20000589 | 1372636858 | A | False | [[-8.618643,41.141412],[-8.618499,41.141376],[... |
1 | 1372637303620000596 | B | NaN | 7.0 | 20000596 | 1372637303 | A | False | [[-8.639847,41.159826],[-8.640351,41.159871],[... |
2 | 1372636951620000320 | C | NaN | NaN | 20000320 | 1372636951 | A | False | [[-8.612964,41.140359],[-8.613378,41.14035],[-... |
3 | 1372636854620000520 | C | NaN | NaN | 20000520 | 1372636854 | A | False | [[-8.574678,41.151951],[-8.574705,41.151942],[... |
4 | 1372637091620000337 | C | NaN | NaN | 20000337 | 1372637091 | A | False | [[-8.645994,41.18049],[-8.645949,41.180517],[-... |
train['ORIGIN_CALL'] = pd.Series(pd.factorize(train['ORIGIN_CALL'])[0]) + 1
train['ORIGIN_STAND']=pd.Series([0 if pd.isnull(x) or x=='' else int(x) for x in train["ORIGIN_STAND"]])
train['TAXI_ID'] = pd.Series(pd.factorize(train['TAXI_ID'])[0]) + 1
train['DAY_TYPE'] = pd.Series([ord(x[0]) - ord('A') for x in train['DAY_TYPE']])
The array of long/lat coordinates per trip (row) is read in as a string. The function ast.literal_eval(x)
evaluates the string into the expression it represents (safely). This happens below
polyline = pd.Series([ast.literal_eval(x) for x in train['POLYLINE']])
Split into latitude/longitude
train['LATITUDE'] = pd.Series([np.array([point[1] for point in poly],dtype=np.float32) for poly in polyline])
train['LONGITUDE'] = pd.Series([np.array([point[0] for point in poly],dtype=np.float32) for poly in polyline])
utils.save_array(data_path+'train/train.bc', train.as_matrix())
utils.save_array(data_path+'train/meta_train.bc', meta.as_matrix())
After converting 'csv_to_hdf5.py' functionality to pandas, I saved that array and then simply constructed the rest of the features as specified in the paper using pandas. I didn't bother seeing how the author did it as it was extremely obtuse and involved the fuel module.
train = pd.DataFrame(utils.load_array(data_path+'train/train.bc'), columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',
'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE'])
train.head()
TRIP_ID | CALL_TYPE | ORIGIN_CALL | ORIGIN_STAND | TAXI_ID | TIMESTAMP | DAY_TYPE | MISSING_DATA | POLYLINE | LATITUDE | LONGITUDE | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1372636858620000589 | C | 0 | 0 | 1 | 1372636858 | 0 | False | [[-8.618643,41.141412],[-8.618499,41.141376],[... | [41.1414, 41.1414, 41.1425, 41.1438, 41.1444, ... | [-8.61864, -8.6185, -8.62033, -8.62215, -8.623... |
1 | 1372637303620000596 | B | 0 | 7 | 2 | 1372637303 | 0 | False | [[-8.639847,41.159826],[-8.640351,41.159871],[... | [41.1598, 41.1599, 41.1601, 41.1605, 41.1609, ... | [-8.63985, -8.64035, -8.6422, -8.64445, -8.646... |
2 | 1372636951620000320 | C | 0 | 0 | 3 | 1372636951 | 0 | False | [[-8.612964,41.140359],[-8.613378,41.14035],[-... | [41.1404, 41.1404, 41.1403, 41.1404, 41.1404, ... | [-8.61296, -8.61338, -8.61421, -8.61477, -8.61... |
3 | 1372636854620000520 | C | 0 | 0 | 4 | 1372636854 | 0 | False | [[-8.574678,41.151951],[-8.574705,41.151942],[... | [41.152, 41.1519, 41.1519, 41.152, 41.1519, 41... | [-8.57468, -8.57471, -8.5747, -8.57466, -8.574... |
4 | 1372637091620000337 | C | 0 | 0 | 5 | 1372637091 | 0 | False | [[-8.645994,41.18049],[-8.645949,41.180517],[-... | [41.1805, 41.1805, 41.18, 41.1789, 41.1785, 41... | [-8.64599, -8.64595, -8.64605, -8.6468, -8.649... |
The paper discusses how many categorical variables there are per category. The following all check out
train['ORIGIN_CALL'].max()
57105
train['ORIGIN_STAND'].max()
63
train['TAXI_ID'].max()
448
Self-explanatory
train['DAY_OF_WEEK'] = pd.Series([datetime.datetime.fromtimestamp(t).weekday() for t in train['TIMESTAMP']])
Quarter hour of the day, i.e. 1 of the 4*24 = 96
quarter hours of the day
train['QUARTER_HOUR'] = pd.Series([int((datetime.datetime.fromtimestamp(t).hour*60 + datetime.datetime.fromtimestamp(t).minute)/15)
for t in train['TIMESTAMP']])
Self-explanatory
train['WEEK_OF_YEAR'] = pd.Series([datetime.datetime.fromtimestamp(t).isocalendar()[1] for t in train['TIMESTAMP']])
Target coords are the last in the sequence (final position). If there are no positions, or only 1, then mark as invalid w/ nan in order to drop later
train['TARGET'] = pd.Series([[l[1][0][-1], l[1][1][-1]] if len(l[1][0]) > 1 else numpy.nan for l in train[['LONGITUDE','LATITUDE']].iterrows()])
This function creates the continuous inputs, which are the concatened k first and k last coords in a sequence, as discussed in the paper.
If there aren't at least 2* k coords excluding the target, then the k first and k last overlap. In this case the sequence (excluding target) is padded at the end with the last coord in the sequence. The paper mentioned they padded front and back but didn't specify in what manner.
Also marks any invalid w/ na's
def start_stop_inputs(k):
result = []
for l in train[['LONGITUDE','LATITUDE']].iterrows():
if len(l[1][0]) < 2 or len(l[1][1]) < 2:
result.append(numpy.nan)
elif len(l[1][0][:-1]) >= 2*k:
result.append(numpy.concatenate([l[1][0][0:k],l[1][0][-(k+1):-1],l[1][1][0:k],l[1][1][-(k+1):-1]]).flatten())
else:
l1 = numpy.lib.pad(l[1][0][:-1], (0,20-len(l[1][0][:-1])), mode='edge')
l2 = numpy.lib.pad(l[1][1][:-1], (0,20-len(l[1][1][:-1])), mode='edge')
result.append(numpy.concatenate([l1[0:k],l1[-k:],l2[0:k],l2[-k:]]).flatten())
return pd.Series(result)
train['COORD_FEATURES'] = start_stop_inputs(5)
train.shape
(1710670, 16)
train.dropna().shape
(1674160, 16)
Drop na's
train = train.dropna()
utils.save_array(data_path+'train/train_features.bc', train.as_matrix())
train = pd.read_csv(data_path+'train/train.csv', header=0)
test = pd.read_csv(data_path+'test/test.csv', header=0)
def start_stop_inputs(k, data, test):
result = []
for l in data[['LONGITUDE','LATITUDE']].iterrows():
if not test:
if len(l[1][0]) < 2 or len(l[1][1]) < 2:
result.append(np.nan)
elif len(l[1][0][:-1]) >= 2*k:
result.append(np.concatenate([l[1][0][0:k],l[1][0][-(k+1):-1],l[1][1][0:k],l[1][1][-(k+1):-1]]).flatten())
else:
l1 = np.lib.pad(l[1][0][:-1], (0,4*k-len(l[1][0][:-1])), mode='edge')
l2 = np.lib.pad(l[1][1][:-1], (0,4*k-len(l[1][1][:-1])), mode='edge')
result.append(np.concatenate([l1[0:k],l1[-k:],l2[0:k],l2[-k:]]).flatten())
else:
if len(l[1][0]) < 1 or len(l[1][1]) < 1:
result.append(np.nan)
elif len(l[1][0]) >= 2*k:
result.append(np.concatenate([l[1][0][0:k],l[1][0][-k:],l[1][1][0:k],l[1][1][-k:]]).flatten())
else:
l1 = np.lib.pad(l[1][0], (0,4*k-len(l[1][0])), mode='edge')
l2 = np.lib.pad(l[1][1], (0,4*k-len(l[1][1])), mode='edge')
result.append(np.concatenate([l1[0:k],l1[-k:],l2[0:k],l2[-k:]]).flatten())
return pd.Series(result)
Pre-calculated below on train set
lat_mean = 41.15731
lat_std = 0.074120656
long_mean = -8.6161413
long_std = 0.057200309
def feature_ext(data, test=False):
data['ORIGIN_CALL'] = pd.Series(pd.factorize(data['ORIGIN_CALL'])[0]) + 1
data['ORIGIN_STAND']=pd.Series([0 if pd.isnull(x) or x=='' else int(x) for x in data["ORIGIN_STAND"]])
data['TAXI_ID'] = pd.Series(pd.factorize(data['TAXI_ID'])[0]) + 1
data['DAY_TYPE'] = pd.Series([ord(x[0]) - ord('A') for x in data['DAY_TYPE']])
polyline = pd.Series([ast.literal_eval(x) for x in data['POLYLINE']])
data['LATITUDE'] = pd.Series([np.array([point[1] for point in poly],dtype=np.float32) for poly in polyline])
data['LONGITUDE'] = pd.Series([np.array([point[0] for point in poly],dtype=np.float32) for poly in polyline])
if not test:
data['TARGET'] = pd.Series([[l[1][0][-1], l[1][1][-1]] if len(l[1][0]) > 1 else np.nan for l in data[['LONGITUDE','LATITUDE']].iterrows()])
data['LATITUDE'] = pd.Series([(t-lat_mean)/lat_std for t in data['LATITUDE']])
data['LONGITUDE'] = pd.Series([(t-long_mean)/long_std for t in data['LONGITUDE']])
data['COORD_FEATURES'] = start_stop_inputs(5, data, test)
data['DAY_OF_WEEK'] = pd.Series([datetime.datetime.fromtimestamp(t).weekday() for t in data['TIMESTAMP']])
data['QUARTER_HOUR'] = pd.Series([int((datetime.datetime.fromtimestamp(t).hour*60 + datetime.datetime.fromtimestamp(t).minute)/15)
for t in data['TIMESTAMP']])
data['WEEK_OF_YEAR'] = pd.Series([datetime.datetime.fromtimestamp(t).isocalendar()[1] for t in data['TIMESTAMP']])
data = data.dropna()
return data
train = feature_ext(train)
test = feature_ext(test, test=True)
test.head()
TRIP_ID | CALL_TYPE | ORIGIN_CALL | ORIGIN_STAND | TAXI_ID | TIMESTAMP | DAY_TYPE | MISSING_DATA | POLYLINE | LATITUDE | LONGITUDE | COORD_FEATURES | DAY_OF_WEEK | QUARTER_HOUR | WEEK_OF_YEAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | T1 | B | 0 | 15 | 1 | 1408039037 | 0 | False | [[-8.585676,41.148522],[-8.585712,41.148639],[... | [-0.118578, -0.116982, -0.1141, -0.113122, -0.... | [0.532604, 0.531971, 0.532454, 0.531671, 0.527... | [0.532604, 0.531971, 0.532454, 0.531671, 0.527... | 3 | 43 | 33 |
1 | T2 | B | 0 | 57 | 2 | 1408038611 | 0 | False | [[-8.610876,41.14557],[-8.610858,41.145579],[-... | [-0.158413, -0.158258, -0.155736, -0.150024, -... | [0.0920491, 0.0923659, 0.0915823, 0.0996017, 0... | [0.0920491, 0.0923659, 0.0915823, 0.0996017, 0... | 3 | 43 | 33 |
2 | T3 | B | 0 | 15 | 3 | 1408038568 | 0 | False | [[-8.585739,41.148558],[-8.58573,41.148828],[-... | [-0.118063, -0.11446, -0.112505, -0.111887, -0... | [0.531504, 0.531671, 0.531821, 0.5219, 0.52490... | [0.531504, 0.531671, 0.531821, 0.5219, 0.52490... | 3 | 43 | 33 |
3 | T4 | B | 0 | 53 | 4 | 1408039090 | 0 | False | [[-8.613963,41.141169],[-8.614125,41.141124],[... | [-0.217753, -0.21837, -0.221047, -0.222488, -0... | [0.0380801, 0.0352457, 0.0184065, 0.0151053, 0... | [0.0380801, 0.0352457, 0.0184065, 0.0151053, 0... | 3 | 43 | 33 |
4 | T5 | B | 0 | 18 | 5 | 1408039177 | 0 | False | [[-8.619903,41.148036],[-8.619894,41.148036]] | [-0.125114, -0.125114] | [-0.0657565, -0.0656064] | [-0.0657565, -0.0656064, -0.0656064, -0.065606... | 3 | 43 | 33 |
utils.save_array(data_path+'train/train_features.bc', train.as_matrix())
utils.save_array(data_path+'test/test_features.bc', test.as_matrix())
train.head()
TRIP_ID | CALL_TYPE | ORIGIN_CALL | ORIGIN_STAND | TAXI_ID | TIMESTAMP | DAY_TYPE | MISSING_DATA | POLYLINE | LATITUDE | LONGITUDE | TARGET | COORD_FEATURES | DAY_OF_WEEK | QUARTER_HOUR | WEEK_OF_YEAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1372636858620000589 | C | 0 | 0 | 1 | 1372636858 | 0 | False | [[-8.618643,41.141412],[-8.618499,41.141376],[... | [-0.21451, -0.214974, -0.199688, -0.182087, -0... | [-0.0437321, -0.0412145, -0.0731591, -0.105104... | [-8.63084, 41.1545] | [-0.0437321, -0.0412145, -0.0731591, -0.105104... | 6 | 68 | 26 |
1 | 1372637303620000596 | B | 0 | 7 | 2 | 1372637303 | 0 | False | [[-8.639847,41.159826],[-8.640351,41.159871],[... | [0.0339161, 0.0345337, 0.0378275, 0.0429227, 0... | [-0.414429, -0.423249, -0.455494, -0.494991, -... | [-8.66574, 41.1707] | [-0.414429, -0.423249, -0.455494, -0.494991, -... | 6 | 68 | 26 |
2 | 1372636951620000320 | C | 0 | 0 | 3 | 1372636951 | 0 | False | [[-8.612964,41.140359],[-8.613378,41.14035],[-... | [-0.228715, -0.228818, -0.229796, -0.228561, -... | [0.0555529, 0.048317, 0.0336785, 0.0239251, 0.... | [-8.61597, 41.1405] | [0.0555529, 0.048317, 0.0336785, 0.0239251, 0.... | 6 | 68 | 26 |
3 | 1372636854620000520 | C | 0 | 0 | 4 | 1372636854 | 0 | False | [[-8.574678,41.151951],[-8.574705,41.151942],[... | [-0.0723098, -0.0724127, -0.0725671, -0.072206... | [0.724872, 0.724405, 0.724572, 0.725189, 0.724... | [-8.608, 41.1429] | [0.724872, 0.724405, 0.724572, 0.725189, 0.724... | 6 | 68 | 26 |
4 | 1372637091620000337 | C | 0 | 0 | 5 | 1372637091 | 0 | False | [[-8.645994,41.18049],[-8.645949,41.180517],[-... | [0.312708, 0.313068, 0.306789, 0.291092, 0.285... | [-0.5219, -0.521117, -0.522834, -0.536055, -0.... | [-8.68727, 41.1781] | [-0.5219, -0.521117, -0.522834, -0.536055, -0.... | 6 | 68 | 26 |
Meanshift clustering as performed in the paper
train = pd.DataFrame(utils.load_array(data_path+'train/train_features.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',
'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'DAY_OF_WEEK',
'QUARTER_HOUR', "WEEK_OF_YEAR", "TARGET", "COORD_FEATURES"])
Clustering performed on the targets
y_targ = np.vstack(train["TARGET"].as_matrix())
from sklearn.cluster import MeanShift, estimate_bandwidth
Can use the commented out code for a estimate of bandwidth, which causes clustering to converge much quicker.
This is not mentioned in the paper but is included in the code. In order to get results similar to the paper's, they manually chose the uncommented bandwidth
#bw = estimate_bandwidth(y_targ, quantile=.1, n_samples=1000)
bw = 0.001
This takes some time
ms = MeanShift(bandwidth=bw, bin_seeding=True, min_bin_freq=5)
ms.fit(y_targ)
MeanShift(bandwidth=0.001, bin_seeding=True, cluster_all=True, min_bin_freq=5, n_jobs=1, seeds=None)
cluster_centers = ms.cluster_centers_
This is very close to the number of clusters mentioned in the paper
cluster_centers.shape
(3421, 2)
utils.save_array(data_path+"cluster_centers_bw_001.bc", cluster_centers)
train = pd.DataFrame(utils.load_array(data_path+'train/train_features.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',
'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET',
'COORD_FEATURES', "DAY_OF_WEEK", "QUARTER_HOUR", "WEEK_OF_YEAR"])
cluster_centers = utils.load_array(data_path+"cluster_centers_bw_001.bc")
long = np.array([c[0] for c in cluster_centers])
lat = np.array([c[1] for c in cluster_centers])
X_train, X_val = train_test_split(train, test_size=0.2, random_state=42)
def get_features(data):
return [np.vstack(data['COORD_FEATURES'].as_matrix()), np.vstack(data['ORIGIN_CALL'].as_matrix()),
np.vstack(data['TAXI_ID'].as_matrix()), np.vstack(data['ORIGIN_STAND'].as_matrix()),
np.vstack(data['QUARTER_HOUR'].as_matrix()), np.vstack(data['DAY_OF_WEEK'].as_matrix()),
np.vstack(data['WEEK_OF_YEAR'].as_matrix()), np.array([long for i in range(0,data.shape[0])]),
np.array([lat for i in range(0,data.shape[0])])]
def get_target(data):
return np.vstack(data["TARGET"].as_matrix())
X_train_features = get_features(X_train)
X_train_target = get_target(X_train)
utils.save_array(data_path+'train/X_train_features.bc', get_features(X_train))
(1339328, 20)
Load training data and cluster centers
train = pd.DataFrame(utils.load_array(data_path+'train/train_features.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',
'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET',
'COORD_FEATURES', "DAY_OF_WEEK", "QUARTER_HOUR", "WEEK_OF_YEAR"])
Validation cuts
cuts = [
1376503200, # 2013-08-14 18:00
1380616200, # 2013-10-01 08:30
1381167900, # 2013-10-07 17:45
1383364800, # 2013-11-02 04:00
1387722600 # 2013-12-22 14:30
]
print(datetime.datetime.fromtimestamp(1376503200))
2013-08-14 11:00:00
train.shape
(1674160, 16)
val_indices = []
index = 0
for index, row in train.iterrows():
time = row['TIMESTAMP']
latitude = row['LATITUDE']
for ts in cuts:
if time <= ts and time + 15 * (len(latitude) - 1) >= ts:
val_indices.append(index)
break
index += 1
X_valid = train.iloc[val_indices]
valid.head()
TRIP_ID | CALL_TYPE | ORIGIN_CALL | ORIGIN_STAND | TAXI_ID | TIMESTAMP | DAY_TYPE | MISSING_DATA | POLYLINE | LATITUDE | LONGITUDE | TARGET | COORD_FEATURES | DAY_OF_WEEK | QUARTER_HOUR | WEEK_OF_YEAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
200153 | 1376502576620000126 | B | 0 | 36 | 247 | 1376502576 | 0 | False | [[-8.649504,41.15421],[-8.649684,41.154201],[-... | [-0.0418419, -0.0419448, -0.0449813, -0.046422... | [-0.583255, -0.586407, -0.59711, -0.589074, -0... | [-8.61122, 41.1463] | [-0.583255, -0.586407, -0.59711, -0.589074, -0... | 2 | 43 | 33 |
200186 | 1376503146620000161 | B | 0 | 35 | 19 | 1376503146 | 0 | False | [[-8.649621,41.167323],[-8.64963,41.167251],[-... | [0.135098, 0.134121, 0.126709, 0.125371, 0.124... | [-0.585306, -0.585456, -0.589241, -0.588774, -... | [-8.64504, 41.1586] | [-0.585306, -0.585456, -0.589241, -0.588774, -... | 2 | 43 | 33 |
200200 | 1376502942620000500 | B | 0 | 15 | 428 | 1376502942 | 0 | False | [[-8.585694,41.148522],[-8.585712,41.148801],[... | [-0.118578, -0.114821, -0.112402, -0.116982, -... | [0.532287, 0.531971, 0.523018, 0.524735, 0.524... | [-8.61524, 41.1418] | [0.532287, 0.531971, 0.523018, 0.524735, 0.524... | 2 | 43 | 33 |
200202 | 1376502604620000105 | C | 0 | 0 | 87 | 1376502604 | 0 | False | [[-8.61093,41.145498],[-8.610939,41.145516],[-... | [-0.15939, -0.159133, -0.153883, -0.145392, -0... | [0.0910987, 0.0909487, 0.093783, 0.108572, 0.1... | [-8.64832, 41.1648] | [0.0910987, 0.0909487, 0.093783, 0.108572, 0.1... | 2 | 43 | 33 |
200227 | 1376502611620000022 | C | 0 | 0 | 304 | 1376502611 | 0 | False | [[-8.591301,41.162715],[-8.591004,41.162562],[... | [0.0729274, 0.0708687, 0.0587228, 0.0539879, 0... | [0.43427, 0.439455, 0.42735, 0.423566, 0.41539... | [-8.60977, 41.1512] | [0.43427, 0.439455, 0.42735, 0.423566, 0.41539... | 2 | 43 | 33 |
for d in valid['TIMESTAMP']:
print(datetime.datetime.fromtimestamp(d))
2013-08-14 10:49:36 2013-08-14 10:59:06 2013-08-14 10:55:42 2013-08-14 10:50:04 2013-08-14 10:50:11 2013-08-14 10:56:57 2013-08-14 10:36:51 2013-08-14 10:44:15 2013-08-14 10:55:50 2013-08-14 10:50:35 2013-08-14 10:50:27 2013-08-14 10:43:57 2013-08-14 10:16:48 2013-08-14 10:40:47 2013-08-14 10:45:55 2013-08-14 10:43:00 2013-08-14 10:53:22 2013-08-14 10:50:03 2013-08-14 10:26:22 2013-08-14 10:59:15 2013-08-14 10:50:17 2013-08-14 10:56:34 2013-08-14 10:53:42 2013-08-14 10:47:46 2013-08-14 10:58:46 2013-08-14 10:24:23 2013-08-14 10:55:19 2013-08-14 10:57:03 2013-08-14 10:56:11 2013-08-14 10:56:52 2013-08-14 10:57:57 2013-08-14 10:08:15 2013-08-14 10:51:14 2013-08-14 10:58:31 2013-08-14 10:47:31 2013-08-14 10:30:36 2013-08-14 10:17:59 2013-08-14 10:48:03 2013-08-14 10:55:52 2013-08-14 10:49:06 2013-08-14 10:58:55 2013-08-14 10:51:24 2013-08-14 10:54:12 2013-08-14 10:54:26 2013-08-14 10:51:18 2013-08-14 10:59:56 2013-08-14 10:48:31 2013-08-14 10:51:56 2013-08-14 10:39:22 2013-08-14 10:57:25 2013-08-14 10:57:28 2013-08-14 10:57:40 2013-08-14 10:39:01 2013-08-14 10:50:39 2013-08-14 09:48:19 2013-10-01 01:16:12 2013-10-01 01:28:04 2013-10-01 01:18:37 2013-10-01 01:24:48 2013-10-01 01:23:39 2013-10-01 01:28:37 2013-10-01 01:20:16 2013-10-01 01:23:49 2013-10-01 01:27:11 2013-10-01 01:06:20 2013-10-01 01:28:08 2013-10-01 01:29:02 2013-10-01 01:24:44 2013-10-01 01:24:44 2013-10-01 01:19:06 2013-10-01 00:28:33 2013-10-01 01:29:28 2013-10-01 01:27:31 2013-10-01 01:22:13 2013-10-01 01:26:03 2013-10-01 01:28:55 2013-10-01 01:18:10 2013-10-01 01:22:13 2013-10-01 01:14:30 2013-10-01 01:24:41 2013-10-01 01:22:16 2013-10-01 01:25:35 2013-10-01 01:21:27 2013-10-01 01:11:33 2013-10-01 01:10:18 2013-10-01 01:09:33 2013-10-01 01:01:15 2013-10-01 01:17:58 2013-10-01 01:18:00 2013-10-01 01:13:26 2013-10-01 01:18:01 2013-10-01 01:25:54 2013-10-01 01:21:20 2013-10-01 01:25:31 2013-10-01 01:25:54 2013-10-01 01:23:40 2013-10-01 01:26:46 2013-10-01 01:23:31 2013-10-01 01:17:09 2013-10-01 01:21:57 2013-10-01 00:29:09 2013-10-01 01:14:47 2013-10-01 01:04:25 2013-10-01 01:14:09 2013-10-01 01:16:59 2013-10-01 01:27:16 2013-10-01 01:16:26 2013-10-01 01:23:18 2013-10-01 01:16:05 2013-10-01 01:27:43 2013-10-01 01:08:13 2013-10-01 01:19:21 2013-10-01 01:21:19 2013-10-01 01:24:20 2013-10-01 01:26:45 2013-10-01 01:18:28 2013-10-01 01:19:45 2013-10-01 01:28:10 2013-10-01 01:22:20 2013-10-01 01:18:42 2013-10-01 01:19:52 2013-10-01 01:18:44 2013-10-01 01:15:11 2013-10-01 01:19:24 2013-10-01 01:23:58 2013-10-01 01:28:50 2013-10-01 01:13:24 2013-10-01 01:28:38 2013-10-01 01:24:50 2013-10-01 01:14:19 2013-10-01 01:10:05 2013-10-01 01:26:31 2013-10-01 01:28:01 2013-09-30 23:44:16 2013-10-01 01:21:43 2013-10-01 01:26:57 2013-10-01 01:25:25 2013-10-01 01:25:36 2013-10-01 01:16:34 2013-10-01 01:26:40 2013-10-01 01:14:56 2013-10-01 01:13:10 2013-10-01 01:28:34 2013-10-01 01:19:08 2013-10-01 01:24:57 2013-10-01 00:52:43 2013-10-01 01:25:28 2013-10-01 01:22:54 2013-10-01 01:28:49 2013-10-01 00:13:25 2013-10-07 10:34:47 2013-10-07 10:38:08 2013-10-07 10:31:10 2013-10-07 10:35:12 2013-10-07 10:41:50 2013-10-07 10:34:31 2013-10-07 10:42:02 2013-10-07 10:39:05 2013-10-07 10:31:43 2013-10-07 10:34:27 2013-10-07 10:31:48 2013-10-07 10:42:24 2013-10-07 10:38:37 2013-10-07 10:29:02 2013-10-07 10:33:55 2013-10-07 10:17:07 2013-10-07 10:44:31 2013-10-07 10:42:52 2013-10-07 10:26:05 2013-10-07 10:34:07 2013-10-07 10:40:59 2013-10-07 10:41:36 2013-10-07 10:33:47 2013-10-07 10:30:59 2013-10-07 10:38:59 2013-10-07 10:28:56 2013-10-07 10:41:24 2013-10-07 10:41:49 2013-10-07 10:42:47 2013-10-07 10:34:09 2013-10-07 10:40:31 2013-10-07 10:21:34 2013-10-07 10:43:52 2013-10-07 10:18:11 2013-10-07 10:41:47 2013-10-07 10:33:04 2013-10-07 10:40:53 2013-10-07 10:36:38 2013-10-07 10:41:46 2013-10-07 10:03:36 2013-10-07 10:44:45 2013-10-07 10:21:42 2013-10-07 10:24:07 2013-10-07 10:40:35 2013-10-07 10:41:00 2013-10-07 10:43:10 2013-10-07 10:23:55 2013-10-07 10:43:30 2013-10-07 10:25:24 2013-10-07 10:35:07 2013-10-07 10:43:33 2013-10-07 10:39:30 2013-10-07 10:31:42 2013-10-07 10:39:17 2013-10-07 10:42:47 2013-10-07 10:39:20 2013-10-07 10:44:41 2013-10-07 10:24:22 2013-10-07 10:12:39 2013-10-07 10:37:25 2013-10-07 10:42:55 2013-10-07 10:14:35 2013-10-07 10:37:12 2013-10-07 10:32:29 2013-10-07 10:42:37 2013-10-07 10:26:52 2013-10-07 10:31:19 2013-10-07 10:44:58 2013-11-01 20:47:37 2013-11-01 20:54:00 2013-11-01 20:58:53 2013-11-01 20:56:37 2013-11-01 20:56:09 2013-11-01 20:51:05 2013-11-01 20:50:58 2013-11-01 20:55:26 2013-11-01 20:53:43 2013-11-01 20:53:46 2013-11-01 20:54:55 2013-11-01 20:59:28 2013-11-01 20:56:54 2013-11-01 20:50:37 2013-11-01 20:48:40 2013-11-01 20:55:46 2013-11-01 20:45:20 2013-11-01 20:46:22 2013-11-01 20:48:25 2013-11-01 20:47:19 2013-11-01 20:57:31 2013-11-01 20:58:14 2013-11-01 20:49:30 2013-11-01 20:43:31 2013-11-01 20:59:00 2013-11-01 20:54:23 2013-11-01 20:51:01 2013-11-01 20:38:12 2013-11-01 20:59:31 2013-11-01 20:56:46 2013-11-01 20:53:51 2013-11-01 20:48:00 2013-11-01 20:58:04 2013-11-01 20:52:50 2013-11-01 20:58:12 2013-11-01 20:57:37 2013-11-01 20:53:33 2013-11-01 20:54:11 2013-11-01 20:48:49 2013-11-01 20:42:56 2013-11-01 20:55:36 2013-11-01 20:51:36 2013-11-01 20:48:45 2013-11-01 20:49:17 2013-11-01 20:53:50 2013-11-01 20:45:28 2013-11-01 20:45:04 2013-11-01 20:52:17 2013-11-01 20:52:10 2013-11-01 20:59:16 2013-11-01 20:51:37 2013-11-01 20:50:10 2013-12-22 06:24:50 2013-12-22 06:04:12 2013-12-22 06:16:27 2013-12-22 06:23:06 2013-12-22 06:24:04 2013-12-22 06:17:33 2013-12-22 06:22:55 2013-12-22 06:24:35 2013-12-22 06:21:56 2013-12-22 06:22:49 2013-12-22 06:25:31 2013-12-22 06:21:31 2013-12-22 06:27:31 2013-12-22 06:29:45 2013-12-22 06:26:09 2013-12-22 06:17:08 2013-12-22 06:26:00 2013-12-22 06:20:56 2013-12-22 06:23:09 2013-12-22 06:22:31 2013-12-22 06:29:59 2013-12-22 06:27:43 2013-12-22 06:23:04 2013-12-22 06:25:30 2013-12-22 06:19:16 2013-12-22 06:23:06 2013-12-22 06:26:01 2013-12-22 06:19:45 2013-12-22 02:34:23 2013-12-22 06:29:54 2013-12-22 06:28:39 2013-12-22 06:27:43 2013-12-22 06:16:23 2013-12-22 06:17:26
X_train = train.drop(train.index[[val_indices]])
cluster_centers = utils.load_array(data_path+"/data/cluster_centers_bw_001.bc")
long = np.array([c[0] for c in cluster_centers])
lat = np.array([c[1] for c in cluster_centers])
utils.save_array(data_path+'train/X_train.bc', X_train.as_matrix())
utils.save_array(data_path+'valid/X_val.bc', X_valid.as_matrix())
X_train = pd.DataFrame(utils.load_array(data_path+'train/X_train.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',
'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET',
'COORD_FEATURES', "DAY_OF_WEEK", "QUARTER_HOUR", "WEEK_OF_YEAR"])
X_val = pd.DataFrame(utils.load_array(data_path+'valid/X_val.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',
'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET',
'COORD_FEATURES', "DAY_OF_WEEK", "QUARTER_HOUR", "WEEK_OF_YEAR"])
The equirectangular loss function mentioned in the paper.
Note: Very important that y[0] is longitude and y[1] is latitude.
Omitted the radius of the earth constant "R" as it does not affect minimization and units were not given in the paper.
def equirectangular_loss(y_true, y_pred):
deg2rad = 3.141592653589793 / 180
long_1 = y_true[:,0]*deg2rad
long_2 = y_pred[:,0]*deg2rad
lat_1 = y_true[:,1]*deg2rad
lat_2 = y_pred[:,1]*deg2rad
return 6371*K.sqrt(K.square((long_1 - long_2)*K.cos((lat_1 + lat_2)/2.))
+K.square(lat_1 - lat_2))
def embedding_input(name, n_in, n_out, reg):
inp = Input(shape=(1,), dtype='int64', name=name)
return inp, Embedding(n_in, n_out, input_length=1, W_regularizer=l2(reg))(inp)
The following returns a fully-connected model as mentioned in the paper. Takes as input k as defined before, and the cluster centers.
Inputs: Embeddings for each category, concatenated w/ the 4*k continous variable representing the first/last k coords as mentioned above.
Embeddings have no regularization, as it was not mentioned in paper, though are easily equipped to include.
Paper mentions global normalization. Didn't specify exactly how they did that, whether thay did it sequentially or whatnot. I just included a batchnorm layer for the continuous inputs.
After concatenation, 1 hidden layer of 500 neurons as called for in paper.
Finally, output layer has as many outputs as there are cluster centers, w/ a softmax activation. Call this output P.
The prediction is the weighted sum of each cluster center c_i w/ corresponding predicted prob P_i.
To facilitate this, dotted output w/ cluster latitudes and longitudes separately. (this happens at variable y), then concatenated into single tensor.
NOTE!!: You will see that I have the cluster center coords as inputs. Ideally, This function should store the cluster longs/lats as a constant to be used in the model, but I could not figure out. As a consequence, I pass them in as a repeated input.
def taxi_mlp(k, cluster_centers):
shp = cluster_centers.shape[0]
nums = Input(shape=(4*k,))
center_longs = Input(shape=(shp,))
center_lats = Input(shape=(shp,))
emb_names = ['client_ID', 'taxi_ID', "stand_ID", "quarter_hour", "day_of_week", "week_of_year"]
emb_ins = [57106, 448, 64, 96, 7, 52]
emb_outs = [10 for i in range(0,6)]
regs = [0 for i in range(0,6)]
embs = [embedding_input(e[0], e[1]+1, e[2], e[3]) for e in zip(emb_names, emb_ins, emb_outs, regs)]
x = merge([nums] + [Flatten()(e[1]) for e in embs], mode='concat')
x = Dense(500, activation='relu')(x)
x = Dense(shp, activation='softmax')(x)
y = merge([merge([x, center_longs], mode='dot'), merge([x, center_lats], mode='dot')], mode='concat')
return Model(input = [nums]+[e[0] for e in embs] + [center_longs, center_lats], output = y)
As mentioned, construction of repeated cluster longs/lats for input
Iterator for in memory train
pandas dataframe. I did this as opposed to bcolz iterator due to the pre-processing
def data_iter(data, batch_size, cluster_centers):
long = [c[0] for c in cluster_centers]
lat = [c[1] for c in cluster_centers]
i = 0
N = data.shape[0]
while True:
yield ([np.vstack(data['COORD_FEATURES'][i:i+batch_size].as_matrix()), np.vstack(data['ORIGIN_CALL'][i:i+batch_size].as_matrix()),
np.vstack(data['TAXI_ID'][i:i+batch_size].as_matrix()), np.vstack(data['ORIGIN_STAND'][i:i+batch_size].as_matrix()),
np.vstack(data['QUARTER_HOUR'][i:i+batch_size].as_matrix()), np.vstack(data['DAY_OF_WEEK'][i:i+batch_size].as_matrix()),
np.vstack(data['WEEK_OF_YEAR'][i:i+batch_size].as_matrix()), np.array([long for i in range(0,batch_size)]),
np.array([lat for i in range(0,batch_size)])], np.vstack(data["TARGET"][i:i+batch_size].as_matrix()))
i += batch_size
x=Lambda(thing)([x,long,lat])
Of course, k in the model needs to match k from feature construction. We again use 5 as they did in the paper
model = taxi_mlp(5, cluster_centers)
Paper used SGD opt w/ following paramerters
model.compile(optimizer=SGD(0.01, momentum=0.9), loss=equirectangular_loss, metrics=['mse'])
X_train_feat = get_features(X_train)
X_train_target = get_target(X_train)
X_val_feat = get_features(X_valid)
X_val_target = get_target(X_valid)
tqdm = TQDMNotebookCallback()
checkpoint = ModelCheckpoint(filepath=data_path+'models/tmp/weights.{epoch:03d}.{val_loss:.8f}.hdf5', save_best_only=True)
batch_size=256
model.fit(X_train_feat, X_train_target, nb_epoch=1, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)
5272/|/[loss: 0.469, mean_squared_error: 0.000] 100%|| 5272/5273 [01:54<00:00, 47.14it/s]
<keras.callbacks.History at 0x7fb2bb8a19e8>
model.fit(X_train_feat, X_train_target, nb_epoch=30, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)
5272/|/[loss: 0.107, mean_squared_error: 0.000] 100%|| 5272/5273 [01:54<00:00, 49.65it/s]
model = load_model(data_path+'models/weights.0.0799.hdf5', custom_objects={'equirectangular_loss':equirectangular_loss})
model.fit(X_train_feat, X_train_target, nb_epoch=100, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)
5231/|/[loss: 0.074, mean_squared_error: 0.000] 100%|| 5231/5232 [01:58<00:00, 50.19it/s]
<keras.callbacks.History at 0x7fced25954a8>
model.save(data_path+'models/current_model.hdf5')
model.fit(X_train_feat, X_train_target, nb_epoch=1, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)
<keras.callbacks.History at 0x7f82d815c550>
model.fit(X_train_feat, X_train_target, nb_epoch=400, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)
model.save(data_path+'/models/current_model.hdf5')
len(X_val_feat[0])
304
It works, but it seems to converge unrealistically quick and the loss values are not the same. The paper does not mention what it's using as "error" in it's results. I assume the same equirectangular? Not very clear. The difference in values could be due to the missing Earth-radius factor
best_model = load_model(data_path+'models/weights.308.0.03373993.hdf5', custom_objects={'equirectangular_loss':equirectangular_loss})
best_model.evaluate(X_val_feat, X_val_target)
32/304 [==>...........................] - ETA: 0s
[0.033743755401749363, 2.5798687967213293e-07]
test = pd.DataFrame(utils.load_array(data_path+'test/test_features.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',
'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE',
'COORD_FEATURES', "DAY_OF_WEEK", "QUARTER_HOUR", "WEEK_OF_YEAR"])
test['ORIGIN_CALL'] = pd.read_csv(data_path+'real_origin_call.csv', header=None)
test['TAXI_ID'] = pd.read_csv(data_path+'real_taxi_id.csv',header=None)
X_test = get_features(test)
b = np.sort(X_test[1],axis=None)
test_preds = np.round(best_model.predict(X_test), decimals=6)
d = {0:test['TRIP_ID'], 1:test_preds[:,1], 2:test_preds[:,0]}
kaggle_out = pd.DataFrame(data=d)
kaggle_out.to_csv(data_path+'submission.csv', header=['TRIP_ID','LATITUDE', 'LONGITUDE'], index=False)
def hdist(a, b):
deg2rad = 3.141592653589793 / 180
lat1 = a[:, 1] * deg2rad
lon1 = a[:, 0] * deg2rad
lat2 = b[:, 1] * deg2rad
lon2 = b[:, 0] * deg2rad
dlat = abs(lat1-lat2)
dlon = abs(lon1-lon2)
al = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * (np.sin(dlon/2)**2)
d = np.arctan2(np.sqrt(al), np.sqrt(1-al))
hd = 2 * 6371 * d
return hd
val_preds = best_model.predict(X_val_feat)
trn_preds = model.predict(X_train_feat)
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-88-7606f80b50cf> in <module>() ----> 1 trn_preds = model.predict(X_train_feat) /home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/keras/engine/training.py in predict(self, x, batch_size, verbose) 1270 f = self.predict_function 1271 return self._predict_loop(f, ins, -> 1272 batch_size=batch_size, verbose=verbose) 1273 1274 def train_on_batch(self, x, y, /home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/keras/engine/training.py in _predict_loop(self, f, ins, batch_size, verbose) 943 ins_batch = slice_X(ins, batch_ids) 944 --> 945 batch_outs = f(ins_batch) 946 if not isinstance(batch_outs, list): 947 batch_outs = [batch_outs] /home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/keras/backend/theano_backend.py in __call__(self, inputs) 957 def __call__(self, inputs): 958 assert isinstance(inputs, (list, tuple)) --> 959 return self.function(*inputs) 960 961 /home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs) 857 t0_fn = time.time() 858 try: --> 859 outputs = self.fn() 860 except Exception: 861 if hasattr(self.fn, 'position_of_error'): KeyboardInterrupt:
er = hdist(val_preds, X_val_target)
er.mean()
0.033741556
K.equal()
To-do: simple to extend to validation data
cuts = [
1376503200, # 2013-08-14 18:00
1380616200, # 2013-10-01 08:30
1381167900, # 2013-10-07 17:45
1383364800, # 2013-11-02 04:00
1387722600 # 2013-12-22 14:30
]
np.any([train['TIMESTAMP'].map(lambda x: x in cuts)])
False
train['TIMESTAMP']
0 1372636858 1 1372637303 2 1372636951 3 1372636854 4 1372637091 5 1372636965 6 1372637210 7 1372637299 8 1372637274 9 1372637905 10 1372636875 11 1372637984 12 1372637343 13 1372638595 14 1372638151 15 1372637610 16 1372638481 17 1372639135 18 1372637482 19 1372639181 20 1372638161 21 1372637254 22 1372638502 23 1372639960 24 1372637658 25 1372639092 26 1372639535 27 1372640499 28 1372639635 29 1372640555 ... 1710640 1404151621 1710641 1404152121 1710642 1404170192 1710643 1386603894 1710644 1401596832 1710645 1404151410 1710646 1404172198 1710647 1404155241 1710648 1404171548 1710649 1404151498 1710650 1404168899 1710651 1404153627 1710652 1401475142 1710653 1403935197 1710654 1404166892 1710655 1404143157 1710656 1404014448 1710657 1380123541 1710658 1373986578 1710659 1403941536 1710660 1384165182 1710661 1404164723 1710662 1404155105 1710663 1388660427 1710664 1390403767 1710665 1404171463 1710666 1404171367 1710667 1388745716 1710668 1404141826 1710669 1404157147 Name: TIMESTAMP, dtype: int64
np.any(train['TIMESTAMP']==1381167900)
False
times = train['TIMESTAMP'].as_matrix()
X_train.columns
Index(['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID', 'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET', 'COORD_FEATURES', 'DAY_OF_WEEK', 'QUARTER_HOUR', 'WEEK_OF_YEAR'], dtype='object')
times
array([1372636858, 1372637303, 1372636951, ..., 1388745716, 1404141826, 1404157147])
count = 0
for index, row in X_val.iterrows():
for ts in cuts:
time = row['TIMESTAMP']
latitude = row['LATITUDE']
if time <= ts and time + 15 * (len(latitude) - 1) >= ts:
count += 1
one = count
count + one
304
import h5py
h = h5py.File(data_path+'original/data.hdf5', 'r')
evrData=h['/Configure:0000/Run:0000/CalibCycle:0000/EvrData::DataV3/NoDetector.0:Evr.0/data']
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-15-4bc7106cf938> in <module>() ----> 1 evrData=h['/Configure:0000/Run:0000/CalibCycle:0000/EvrData::DataV3/NoDetector.0:Evr.0/data'] h5py/_objects.pyx in h5py._objects.with_phil.wrapper (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/_objects.c:2856)() h5py/_objects.pyx in h5py._objects.with_phil.wrapper (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/_objects.c:2814)() /home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/h5py/_hl/group.py in __getitem__(self, name) 164 raise ValueError("Invalid HDF5 object reference") 165 else: --> 166 oid = h5o.open(self.id, self._e(name), lapl=self._lapl) 167 168 otype = h5i.get_type(oid) h5py/_objects.pyx in h5py._objects.with_phil.wrapper (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/_objects.c:2856)() h5py/_objects.pyx in h5py._objects.with_phil.wrapper (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/_objects.c:2814)() h5py/h5o.pyx in h5py.h5o.open (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/h5o.c:3742)() KeyError: 'Unable to open object (Component not found)'
c = np.load(data_path+'original/arrival-clusters.pkl')
--------------------------------------------------------------------------- UnicodeDecodeError Traceback (most recent call last) /home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/numpy/lib/npyio.py in load(file, mmap_mode, allow_pickle, fix_imports, encoding) 412 try: --> 413 return pickle.load(fid, **pickle_kwargs) 414 except: UnicodeDecodeError: 'ascii' codec can't decode byte 0xf7 in position 0: ordinal not in range(128) During handling of the above exception, another exception occurred: OSError Traceback (most recent call last) <ipython-input-13-2213758ffef0> in <module>() ----> 1 c = np.load(data_path+'original/arrival-clusters.pkl') /home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/numpy/lib/npyio.py in load(file, mmap_mode, allow_pickle, fix_imports, encoding) 414 except: 415 raise IOError( --> 416 "Failed to interpret file %s as a pickle" % repr(file)) 417 finally: 418 if own_fid: OSError: Failed to interpret file '/data/bckenstler/data/taxi/original/arrival-clusters.pkl' as a pickle
from fuel.utils import find_in_data_path
from fuel.datasets import H5PYDataset
original_path = '/data/bckenstler/data/taxi/original/'
train_set = H5PYDataset(original_path+'data.hdf5', which_sets=('train',),load_in_memory=True)
valid_set = H5PYDataset(original_path+'valid.hdf5', which_sets=('cuts/test_times_0',),load_in_memory=True)
print(train_set.num_examples)
1710670
print(valid_set.num_examples)
304
data = train_set.data_sources
data[0]
array([2, 1, 2, ..., 2, 1, 1], dtype=int8)
valid_data = valid_set.data_sources
valid_data[4][0]
array([ 41.1542, 41.1542, 41.154 , 41.1539, 41.1542, 41.1544, 41.1542, 41.1538, 41.1533, 41.1528, 41.1525, 41.1525, 41.1527, 41.1527, 41.1527, 41.1526, 41.1524, 41.1526, 41.1526, 41.1522, 41.1508, 41.1507, 41.1497, 41.1489, 41.1489, 41.1486, 41.1479, 41.1475, 41.1468, 41.1461, 41.1463, 41.1464, 41.146 , 41.1449, 41.1451, 41.1454, 41.1458, 41.1459, 41.1458, 41.1459, 41.146 , 41.146 ], dtype=float32)
stamps = valid_data[-3]
stamps[0]
1376502576
for i in range(0,304):
print(np.any([t==int(stamps[i]) for t in X_val['TIMESTAMP']]))
False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False False
type(X_train['TIMESTAMP'][0])
int
type(stamps[0])
numpy.int32
check = [s in stamps for s in X_val['TIMESTAMP']]
for s in X_val['TIMESTAMP']:
print(datetime.datetime.fromtimestamp(s))
2013-08-14 10:07:32 2013-08-14 10:14:21 2013-08-14 10:28:47 2013-08-14 10:36:23 2013-08-14 10:25:13 2013-08-14 10:31:23 2013-08-14 10:14:21 2013-08-14 10:14:13 2013-08-14 10:03:40 2013-08-14 11:06:08 2013-08-14 11:00:40 2013-08-14 11:18:32 2013-08-14 10:51:01 2013-08-14 10:15:37 2013-08-14 10:42:00 2013-08-14 09:15:51 2013-08-14 10:35:23 2013-08-14 11:05:51 2013-08-14 11:16:11 2013-08-14 11:47:27 2013-08-14 11:35:11 2013-08-14 11:43:53 2013-08-14 12:01:14 2013-08-14 11:09:23 2013-08-14 10:26:21 2013-08-14 11:22:43 2013-08-14 12:07:18 2013-08-14 10:29:38 2013-08-14 11:57:18 2013-08-14 11:23:06 2013-08-14 12:15:02 2013-08-14 11:06:17 2013-08-14 12:33:55 2013-08-13 22:42:40 2013-08-14 12:07:26 2013-08-14 09:02:36 2013-08-14 13:08:03 2013-08-14 07:25:36 2013-08-14 13:37:10 2013-08-14 13:52:50 2013-08-14 14:24:04 2013-08-14 15:15:05 2013-08-14 15:41:34 2013-08-14 19:15:39 2013-08-14 20:28:13 2013-08-14 19:58:07 2013-08-14 21:43:57 2013-08-14 21:41:07 2013-08-14 22:46:27 2013-08-14 23:11:28 2013-08-15 00:01:02 2013-08-15 01:40:11 2013-08-15 01:31:05 2013-08-15 04:04:21 2013-08-29 01:54:35 2013-09-30 07:58:58 2013-10-01 00:57:30 2013-10-01 01:14:21 2013-10-01 01:07:59 2013-10-01 01:12:46 2013-10-01 01:15:23 2013-10-01 00:56:55 2013-10-01 01:34:44 2013-09-30 10:05:15 2013-10-01 01:13:14 2013-10-01 00:50:05 2013-10-01 01:12:57 2013-10-01 01:34:34 2013-09-30 08:35:34 2013-10-01 01:39:09 2013-10-01 00:58:29 2013-10-01 00:53:42 2013-09-30 08:50:42 2013-10-01 00:59:11 2013-10-01 01:46:21 2013-10-01 00:57:02 2013-10-01 01:30:39 2013-10-01 00:40:31 2013-10-01 01:49:07 2013-10-01 01:52:21 2013-10-01 00:43:41 2013-10-01 02:06:20 2013-10-01 01:54:00 2013-10-01 01:13:36 2013-10-01 00:55:21 2013-10-01 02:00:42 2013-09-30 16:02:13 2013-10-01 01:55:31 2013-10-01 01:20:36 2013-09-30 14:18:09 2013-10-01 02:12:26 2013-10-01 01:46:34 2013-10-01 01:58:42 2013-10-01 01:59:55 2013-10-01 01:48:49 2013-10-01 01:50:59 2013-10-01 00:33:27 2013-09-30 13:02:58 2013-10-01 01:20:56 2013-10-01 02:05:00 2013-10-01 01:42:57 2013-10-01 01:37:42 2013-10-01 01:51:28 2013-10-01 01:40:01 2013-10-01 01:53:52 2013-10-01 02:18:16 2013-10-01 02:20:50 2013-10-01 02:22:04 2013-10-01 01:38:33 2013-10-01 01:53:27 2013-10-01 01:59:50 2013-10-01 00:59:27 2013-10-01 01:53:45 2013-10-01 02:11:18 2013-10-01 01:51:55 2013-10-01 01:46:14 2013-10-01 01:49:47 2013-10-01 02:17:16 2013-10-01 01:57:39 2013-10-01 02:09:57 2013-10-01 02:36:04 2013-10-01 01:51:49 2013-10-01 02:10:14 2013-10-01 02:15:34 2013-10-01 02:03:47 2013-10-01 02:01:06 2013-10-01 02:02:54 2013-10-01 02:39:46 2013-09-30 14:47:45 2013-10-01 02:34:19 2013-10-01 01:55:35 2013-10-01 02:04:15 2013-10-01 02:25:37 2013-10-01 02:53:51 2013-10-01 02:21:52 2013-10-01 02:17:23 2013-10-01 02:52:09 2013-10-01 03:10:34 2013-10-01 02:50:11 2013-10-01 02:17:02 2013-10-01 02:51:34 2013-10-01 02:47:29 2013-10-01 02:47:58 2013-10-01 02:48:11 2013-10-01 02:44:48 2013-10-01 02:55:34 2013-10-01 03:06:12 2013-10-01 04:22:22 2013-10-01 03:55:25 2013-10-01 09:55:50 2013-10-07 09:39:25 2013-10-07 10:22:21 2013-10-07 04:17:58 2013-10-07 10:25:18 2013-10-07 07:28:48 2013-10-07 09:53:31 2013-10-07 10:28:40 2013-10-07 09:43:36 2013-10-07 11:33:33 2013-10-07 09:47:13 2013-10-07 10:45:36 2013-10-07 11:36:41 2013-10-07 12:02:04 2013-10-07 11:37:48 2013-10-07 11:52:38 2013-10-07 12:06:22 2013-10-07 11:34:34 2013-10-07 10:18:22 2013-10-07 11:31:49 2013-10-07 11:54:39 2013-10-07 11:15:50 2013-10-07 11:25:14 2013-10-07 12:22:42 2013-10-07 11:58:31 2013-10-07 11:56:48 2013-10-07 11:58:08 2013-10-07 11:59:03 2013-10-07 06:53:29 2013-10-07 08:41:29 2013-10-07 12:23:19 2013-10-07 12:13:27 2013-10-07 12:52:41 2013-10-07 10:52:23 2013-10-07 11:12:36 2013-10-07 12:53:53 2013-10-07 12:45:15 2013-10-07 12:54:38 2013-10-07 10:46:32 2013-10-07 11:54:15 2013-10-07 11:52:09 2013-10-07 12:01:28 2013-10-07 11:35:00 2013-10-07 12:24:21 2013-10-07 13:07:04 2013-10-07 13:40:22 2013-10-07 13:47:05 2013-10-07 10:10:45 2013-10-07 13:28:27 2013-10-07 12:35:05 2013-10-07 13:09:15 2013-10-07 11:44:18 2013-10-07 14:42:34 2013-10-07 13:24:59 2013-10-07 13:11:00 2013-10-07 14:10:43 2013-10-07 15:09:55 2013-10-07 22:16:07 2013-10-07 21:46:40 2013-10-07 23:43:29 2013-10-07 09:15:06 2013-10-07 19:40:37 2013-10-08 00:10:51 2013-10-07 12:39:02 2013-10-07 13:55:44 2013-10-08 00:31:15 2013-10-07 23:57:18 2013-10-08 01:08:20 2013-10-08 04:09:15 2013-11-01 21:32:47 2013-11-01 21:14:53 2013-11-01 21:39:50 2013-11-01 21:30:52 2013-11-01 21:20:27 2013-11-01 21:09:21 2013-11-01 21:22:48 2013-11-01 21:38:38 2013-11-01 21:05:58 2013-11-01 21:38:29 2013-11-01 20:24:41 2013-11-01 21:45:04 2013-11-01 21:32:47 2013-11-01 21:06:05 2013-11-01 21:32:46 2013-11-01 21:40:51 2013-11-01 21:37:10 2013-11-01 20:36:02 2013-11-01 21:45:05 2013-11-01 21:33:28 2013-11-01 21:49:08 2013-11-01 21:37:25 2013-11-01 21:51:12 2013-11-01 21:13:05 2013-11-01 21:33:50 2013-11-01 21:35:31 2013-11-01 21:46:46 2013-11-01 21:37:35 2013-11-01 21:42:36 2013-11-01 21:53:26 2013-11-01 22:01:40 2013-11-01 21:38:20 2013-11-01 21:36:27 2013-11-01 22:05:16 2013-11-01 21:59:10 2013-11-01 18:00:02 2013-11-01 22:09:29 2013-11-01 21:58:45 2013-11-01 22:16:30 2013-11-01 21:06:47 2013-11-01 22:21:46 2013-11-01 22:12:47 2013-11-01 22:10:46 2013-11-01 22:20:50 2013-11-01 21:52:14 2013-11-01 22:12:02 2013-11-01 22:12:30 2013-11-01 22:59:32 2013-11-01 22:11:17 2013-11-01 23:35:01 2013-11-01 23:27:56 2013-11-02 09:37:04 2013-12-22 06:39:00 2013-12-22 06:39:18 2013-12-22 06:56:09 2013-12-22 07:57:34 2013-12-22 07:19:53 2013-12-22 07:33:46 2013-12-22 08:01:08 2013-12-22 08:01:17 2013-12-22 08:29:30 2013-12-22 08:01:29 2013-12-22 07:45:23 2013-12-22 08:08:20 2013-12-22 08:30:08 2013-12-21 13:07:37 2013-12-22 07:51:17 2013-12-22 07:11:40 2013-12-22 08:57:33 2013-12-22 08:49:51 2013-12-22 06:49:38 2013-12-22 09:00:47 2013-12-22 09:36:42 2013-12-22 09:02:56 2013-12-22 08:21:05 2013-12-22 10:05:26 2013-12-22 04:01:53 2013-12-22 10:02:21 2013-12-22 08:54:18 2013-12-22 10:31:35 2013-12-22 10:37:30 2013-12-22 11:28:57 2013-12-22 11:56:01 2013-12-22 15:40:59 2013-12-22 10:02:07 2013-12-23 00:48:48
for s in stamps:
print(datetime.datetime.fromtimestamp(s))
2013-08-14 10:49:36 2013-08-14 10:59:06 2013-08-14 10:55:42 2013-08-14 10:50:04 2013-08-14 10:50:11 2013-08-14 10:56:57 2013-08-14 10:36:51 2013-08-14 10:44:15 2013-08-14 10:55:50 2013-08-14 10:50:35 2013-08-14 10:50:27 2013-08-14 10:43:57 2013-08-14 10:16:48 2013-08-14 10:40:47 2013-08-14 10:45:55 2013-08-14 10:43:00 2013-08-14 10:53:22 2013-08-14 10:50:03 2013-08-14 10:26:22 2013-08-14 10:59:15 2013-08-14 10:50:17 2013-08-14 10:56:34 2013-08-14 10:53:42 2013-08-14 10:47:46 2013-08-14 10:58:46 2013-08-14 10:24:23 2013-08-14 10:55:19 2013-08-14 10:57:03 2013-08-14 10:56:11 2013-08-14 10:56:52 2013-08-14 10:57:57 2013-08-14 10:08:15 2013-08-14 10:51:14 2013-08-14 10:58:31 2013-08-14 10:47:31 2013-08-14 10:30:36 2013-08-14 10:17:59 2013-08-14 10:48:03 2013-08-14 10:55:52 2013-08-14 10:49:06 2013-08-14 10:58:55 2013-08-14 10:51:24 2013-08-14 10:54:12 2013-08-14 10:54:26 2013-08-14 10:51:18 2013-08-14 10:59:56 2013-08-14 10:48:31 2013-08-14 10:51:56 2013-08-14 10:39:22 2013-08-14 10:57:25 2013-08-14 10:57:28 2013-08-14 10:57:40 2013-08-14 10:39:01 2013-08-14 10:50:39 2013-08-14 09:48:19 2013-10-01 01:16:12 2013-10-01 01:28:04 2013-10-01 01:18:37 2013-10-01 01:24:48 2013-10-01 01:23:39 2013-10-01 01:28:37 2013-10-01 01:20:16 2013-10-01 01:23:49 2013-10-01 01:27:11 2013-10-01 01:06:20 2013-10-01 01:28:08 2013-10-01 01:29:02 2013-10-01 01:24:44 2013-10-01 01:24:44 2013-10-01 01:19:06 2013-10-01 00:28:33 2013-10-01 01:29:28 2013-10-01 01:27:31 2013-10-01 01:22:13 2013-10-01 01:26:03 2013-10-01 01:28:55 2013-10-01 01:18:10 2013-10-01 01:22:13 2013-10-01 01:14:30 2013-10-01 01:24:41 2013-10-01 01:22:16 2013-10-01 01:25:35 2013-10-01 01:21:27 2013-10-01 01:11:33 2013-10-01 01:10:18 2013-10-01 01:09:33 2013-10-01 01:01:15 2013-10-01 01:17:58 2013-10-01 01:18:00 2013-10-01 01:13:26 2013-10-01 01:18:01 2013-10-01 01:25:54 2013-10-01 01:21:20 2013-10-01 01:25:31 2013-10-01 01:25:54 2013-10-01 01:23:40 2013-10-01 01:26:46 2013-10-01 01:23:31 2013-10-01 01:17:09 2013-10-01 01:21:57 2013-10-01 00:29:09 2013-10-01 01:14:47 2013-10-01 01:04:25 2013-10-01 01:14:09 2013-10-01 01:16:59 2013-10-01 01:27:16 2013-10-01 01:16:26 2013-10-01 01:23:18 2013-10-01 01:16:05 2013-10-01 01:27:43 2013-10-01 01:08:13 2013-10-01 01:19:21 2013-10-01 01:21:19 2013-10-01 01:24:20 2013-10-01 01:26:45 2013-10-01 01:18:28 2013-10-01 01:19:45 2013-10-01 01:28:10 2013-10-01 01:22:20 2013-10-01 01:18:42 2013-10-01 01:19:52 2013-10-01 01:18:44 2013-10-01 01:15:11 2013-10-01 01:19:24 2013-10-01 01:23:58 2013-10-01 01:28:50 2013-10-01 01:13:24 2013-10-01 01:28:38 2013-10-01 01:24:50 2013-10-01 01:14:19 2013-10-01 01:10:05 2013-10-01 01:26:31 2013-10-01 01:28:01 2013-09-30 23:44:16 2013-10-01 01:21:43 2013-10-01 01:26:57 2013-10-01 01:25:25 2013-10-01 01:25:36 2013-10-01 01:16:34 2013-10-01 01:26:40 2013-10-01 01:14:56 2013-10-01 01:13:10 2013-10-01 01:28:34 2013-10-01 01:19:08 2013-10-01 01:24:57 2013-10-01 00:52:43 2013-10-01 01:25:28 2013-10-01 01:22:54 2013-10-01 01:28:49 2013-10-01 00:13:25 2013-10-07 10:34:47 2013-10-07 10:38:08 2013-10-07 10:31:10 2013-10-07 10:35:12 2013-10-07 10:41:50 2013-10-07 10:34:31 2013-10-07 10:42:02 2013-10-07 10:39:05 2013-10-07 10:31:43 2013-10-07 10:34:27 2013-10-07 10:31:48 2013-10-07 10:42:24 2013-10-07 10:38:37 2013-10-07 10:29:02 2013-10-07 10:33:55 2013-10-07 10:17:07 2013-10-07 10:44:31 2013-10-07 10:42:52 2013-10-07 10:26:05 2013-10-07 10:34:07 2013-10-07 10:40:59 2013-10-07 10:41:36 2013-10-07 10:33:47 2013-10-07 10:30:59 2013-10-07 10:38:59 2013-10-07 10:28:56 2013-10-07 10:41:24 2013-10-07 10:41:49 2013-10-07 10:42:47 2013-10-07 10:34:09 2013-10-07 10:40:31 2013-10-07 10:21:34 2013-10-07 10:43:52 2013-10-07 10:18:11 2013-10-07 10:41:47 2013-10-07 10:33:04 2013-10-07 10:40:53 2013-10-07 10:36:38 2013-10-07 10:41:46 2013-10-07 10:03:36 2013-10-07 10:44:45 2013-10-07 10:21:42 2013-10-07 10:24:07 2013-10-07 10:40:35 2013-10-07 10:41:00 2013-10-07 10:43:10 2013-10-07 10:23:55 2013-10-07 10:43:30 2013-10-07 10:25:24 2013-10-07 10:35:07 2013-10-07 10:43:33 2013-10-07 10:39:30 2013-10-07 10:31:42 2013-10-07 10:39:17 2013-10-07 10:42:47 2013-10-07 10:39:20 2013-10-07 10:44:41 2013-10-07 10:24:22 2013-10-07 10:12:39 2013-10-07 10:37:25 2013-10-07 10:42:55 2013-10-07 10:14:35 2013-10-07 10:37:12 2013-10-07 10:32:29 2013-10-07 10:42:37 2013-10-07 10:26:52 2013-10-07 10:31:19 2013-10-07 10:44:58 2013-11-01 20:47:37 2013-11-01 20:54:00 2013-11-01 20:58:53 2013-11-01 20:56:37 2013-11-01 20:56:09 2013-11-01 20:51:05 2013-11-01 20:50:58 2013-11-01 20:55:26 2013-11-01 20:53:43 2013-11-01 20:53:46 2013-11-01 20:54:55 2013-11-01 20:59:28 2013-11-01 20:56:54 2013-11-01 20:50:37 2013-11-01 20:48:40 2013-11-01 20:55:46 2013-11-01 20:45:20 2013-11-01 20:46:22 2013-11-01 20:48:25 2013-11-01 20:47:19 2013-11-01 20:57:31 2013-11-01 20:58:14 2013-11-01 20:49:30 2013-11-01 20:43:31 2013-11-01 20:59:00 2013-11-01 20:54:23 2013-11-01 20:51:01 2013-11-01 20:38:12 2013-11-01 20:59:31 2013-11-01 20:56:46 2013-11-01 20:53:51 2013-11-01 20:48:00 2013-11-01 20:58:04 2013-11-01 20:52:50 2013-11-01 20:58:12 2013-11-01 20:57:37 2013-11-01 20:53:33 2013-11-01 20:54:11 2013-11-01 20:48:49 2013-11-01 20:42:56 2013-11-01 20:55:36 2013-11-01 20:51:36 2013-11-01 20:48:45 2013-11-01 20:49:17 2013-11-01 20:53:50 2013-11-01 20:45:28 2013-11-01 20:45:04 2013-11-01 20:52:17 2013-11-01 20:52:10 2013-11-01 20:59:16 2013-11-01 20:51:37 2013-11-01 20:50:10 2013-12-22 06:24:50 2013-12-22 06:04:12 2013-12-22 06:16:27 2013-12-22 06:23:06 2013-12-22 06:24:04 2013-12-22 06:17:33 2013-12-22 06:22:55 2013-12-22 06:24:35 2013-12-22 06:21:56 2013-12-22 06:22:49 2013-12-22 06:25:31 2013-12-22 06:21:31 2013-12-22 06:27:31 2013-12-22 06:29:45 2013-12-22 06:26:09 2013-12-22 06:17:08 2013-12-22 06:26:00 2013-12-22 06:20:56 2013-12-22 06:23:09 2013-12-22 06:22:31 2013-12-22 06:29:59 2013-12-22 06:27:43 2013-12-22 06:23:04 2013-12-22 06:25:30 2013-12-22 06:19:16 2013-12-22 06:23:06 2013-12-22 06:26:01 2013-12-22 06:19:45 2013-12-22 02:34:23 2013-12-22 06:29:54 2013-12-22 06:28:39 2013-12-22 06:27:43 2013-12-22 06:16:23 2013-12-22 06:17:26
ids = valid_data[-1]
type(ids[0])
numpy.bytes_
ids
["b'1376502576620000126'", "b'1376503146620000161'", "b'1376502942620000500'", "b'1376502604620000105'", "b'1376502611620000022'", "b'1376503017620000272'", "b'1376501811620000617'", "b'1376502255620000663'", "b'1376502950620000005'", "b'1376502635620000276'", "b'1376502627620000596'", "b'1376502237620000675'", "b'1376500608620000409'", "b'1376502047620000574'", "b'1376502355620000338'", "b'1376502180620000080'", "b'1376502802620000680'", "b'1376502603620000142'", "b'1376501182620000651'", "b'1376503155620000026'", "b'1376502617620000657'", "b'1376502994620000604'", "b'1376502822620000093'", "b'1376502466620000561'", "b'1376503126620000410'", "b'1376501063620000343'", "b'1376502919620000166'", "b'1376503023620000010'", "b'1376502971620000517'", "b'1376503012620000273'", "b'1376503077620000470'", "b'1376500095620000569'", "b'1376502674620000426'", "b'1376503111620000674'", "b'1376502451620000310'", "b'1376501436620000344'", "b'1376500679620000108'", "b'1376502483620000356'", "b'1376502952620000687'", "b'1376502546620000254'", "b'1376503135620000053'", "b'1376502684620000503'", "b'1376502852620000321'", "b'1376502866620000421'", "b'1376502678620000460'", "b'1376503196620000386'", "b'1376502511620000480'", "b'1376502716620000224'", "b'1376501962620000507'", "b'1376503045620000633'", "b'1376503048620000349'", "b'1376503060620000049'", "b'1376501941620000667'", "b'1376502639620000281'", "b'1376498899620000172'", "b'1380615372620000303'", "b'1380616084620000260'", "b'1380615517620000372'", "b'1380615888620000588'", "b'1380615819620000042'", "b'1380616117620000325'", "b'1380615616620000040'", "b'1380615829620000682'", "b'1380616031620000001'", "b'1380614780620000352'", "b'1380616088620000513'", "b'1380616142620000289'", "b'1380615884620000166'", "b'1380615884620000671'", "b'1380615546620000187'", "b'1380612513620000172'", "b'1380616168620000472'", "b'1380616051620000597'", "b'1380615733620000105'", "b'1380615963620000137'", "b'1380616135620000672'", "b'1380615490620000574'", "b'1380615733620000051'", "b'1380615270620000612'", "b'1380615881620000031'", "b'1380615736620000246'", "b'1380615935620000367'", "b'1380615687620000577'", "b'1380615093620000272'", "b'1380615018620000632'", "b'1380614973620000258'", "b'1380614475620000032'", "b'1380615478620000138'", "b'1380615480620000381'", "b'1380615206620000397'", "b'1380615481620000077'", "b'1380615954620000546'", "b'1380615680620000192'", "b'1380615931620000068'", "b'1380615954620000395'", "b'1380615820620000482'", "b'1380616006620000080'", "b'1380615811620000431'", "b'1380615429620000602'", "b'1380615717620000497'", "b'1380612549620000161'", "b'1380615287620000675'", "b'1380614665620000458'", "b'1380615249620000222'", "b'1380615419620000487'", "b'1380616036620000669'", "b'1380615386620000476'", "b'1380615798620000523'", "b'1380615365620000215'", "b'1380616063620000065'", "b'1380614893620000011'", "b'1380615561620000391'", "b'1380615679620000004'", "b'1380615860620000429'", "b'1380616005620000695'", "b'1380615508620000361'", "b'1380615585620000665'", "b'1380616090620000562'", "b'1380615740620000398'", "b'1380615522620000156'", "b'1380615592620000674'", "b'1380615524620000279'", "b'1380615311620000540'", "b'1380615564620000216'", "b'1380615838620000324'", "b'1380616130620000356'", "b'1380615204620000387'", "b'1380616118620000649'", "b'1380615890620000159'", "b'1380615259620000393'", "b'1380615005620000249'", "b'1380615991620000589'", "b'1380616081620000633'", "b'1380609856620000609'", "b'1380615703620000410'", "b'1380616017620000470'", "b'1380615925620000177'", "b'1380615936620000547'", "b'1380615394620000400'", "b'1380616000620000140'", "b'1380615296620000020'", "b'1380615190620000477'", "b'1380616114620000151'", "b'1380615548620000247'", "b'1380615897620000616'", "b'1380613963620000005'", "b'1380615928620000449'", "b'1380615774620000158'", "b'1380616129620000281'", "b'1380611605620000351'", "b'1381167287620000123'", "b'1381167488620000626'", "b'1381167070620000142'", "b'1381167312620000337'", "b'1381167710620000684'", "b'1381167271620000159'", "b'1381167722620000624'", "b'1381167545620000419'", "b'1381167103620000114'", "b'1381167267620000668'", "b'1381167108620000307'", "b'1381167744620000051'", "b'1381167517620000356'", "b'1381166942620000518'", "b'1381167235620000529'", "b'1381166227620000901'", "b'1381167871620000463'", "b'1381167772620000495'", "b'1381166765620000008'", "b'1381167247620000345'", "b'1381167659620000235'", "b'1381167696620000085'", "b'1381167227620000156'", "b'1381167059620000004'", "b'1381167539620000256'", "b'1381166936620000426'", "b'1381167684620000621'", "b'1381167709620000249'", "b'1381167767620000094'", "b'1381167249620000675'", "b'1381167631620000116'", "b'1381166494620000480'", "b'1381167832620000074'", "b'1381166291620000326'", "b'1381167707620000653'", "b'1381167184620000560'", "b'1381167653620000295'", "b'1381167398620000686'", "b'1381167706620000321'", "b'1381165416620000697'", "b'1381167885620000280'", "b'1381166502620000297'", "b'1381166647620000657'", "b'1381167635620000662'", "b'1381167660620000594'", "b'1381167790620000093'", "b'1381166635620000195'", "b'1381167810620000431'", "b'1381166724620000311'", "b'1381167307620000591'", "b'1381167813620000267'", "b'1381167570620000648'", "b'1381167102620000525'", "b'1381167557620000424'", "b'1381167767620000160'", "b'1381167560620000633'", "b'1381167881620000391'", "b'1381166662620000189'", "b'1381165959620000138'", "b'1381167445620000344'", "b'1381167775620000049'", "b'1381166075620000068'", "b'1381167432620000001'", "b'1381167149620000257'", "b'1381167757620000324'", "b'1381166812620000595'", "b'1381167079620000535'", "b'1381167898620000667'", "b'1383364057620000066'", "b'1383364440620000010'", "b'1383364733620000009'", "b'1383364597620000601'", "b'1383364569620000356'", "b'1383364265620000007'", "b'1383364258620000574'", "b'1383364526620000108'", "b'1383364423620000015'", "b'1383364426620000632'", "b'1383364495620000611'", "b'1383364768620000388'", "b'1383364614620000372'", "b'1383364237620000455'", "b'1383364120620000403'", "b'1383364546620000041'", "b'1383363920620000020'", "b'1383363982620000591'", "b'1383364105620000665'", "b'1383364039620000618'", "b'1383364651620000513'", "b'1383364694620000364'", "b'1383364170620000239'", "b'1383363811620000031'", "b'1383364740620000252'", "b'1383364463620000345'", "b'1383364261620000436'", "b'1383363492620000672'", "b'1383364771620000320'", "b'1383364606620000508'", "b'1383364431620000233'", "b'1383364080620000527'", "b'1383364684620000005'", "b'1383364370620000140'", "b'1383364692620000118'", "b'1383364657620000570'", "b'1383364413620000492'", "b'1383364451620000309'", "b'1383364129620000013'", "b'1383363776620000434'", "b'1383364536620000217'", "b'1383364296620000112'", "b'1383364125620000625'", "b'1383364157620000648'", "b'1383364430620000542'", "b'1383363928620000616'", "b'1383363904620000105'", "b'1383364337620000612'", "b'1383364330620000333'", "b'1383364756620000540'", "b'1383364297620000596'", "b'1383364210620000153'", "b'1387722290620000362'", "b'1387721052620000311'", "b'1387721787620000046'", "b'1387722186620000565'", "b'1387722244620000068'", "b'1387721853620000403'", "b'1387722175620000633'", "b'1387722275620000172'", "b'1387722116620000187'", "b'1387722169620000060'", "b'1387722331620000058'", "b'1387722091620000607'", "b'1387722451620000540'", "b'1387722585620000430'", "b'1387722369620000120'", "b'1387721828620000123'", "b'1387722360620000391'", "b'1387722056620000089'", "b'1387722189620000480'", "b'1387722151620000184'", "b'1387722599620000137'", "b'1387722463620000314'", "b'1387722184620000057'", "b'1387722330620000171'", "b'1387721956620000373'", "b'1387722186620000197'", "b'1387722361620000697'", "b'1387721985620000173'", "b'1387708463620000329'", "b'1387722594620000900'", "b'1387722519620000482'", "b'1387722463620000481'", "b'1387721783620000030'", "b'1387721846620000247'"]
X_val
TRIP_ID | CALL_TYPE | ORIGIN_CALL | ORIGIN_STAND | TAXI_ID | TIMESTAMP | DAY_TYPE | MISSING_DATA | POLYLINE | LATITUDE | LONGITUDE | TARGET | COORD_FEATURES | DAY_OF_WEEK | QUARTER_HOUR | WEEK_OF_YEAR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1376500052620000184 | C | 0 | 0 | 115 | 1376500052 | 0 | False | [[-8.649891,41.154399],[-8.649981,41.154417],[... | [-0.0392686, -0.0390627, -0.0440035, -0.049458... | [-0.590024, -0.591592, -0.596627, -0.596793, -... | [-8.61043, 41.1411] | [-0.590024, -0.591592, -0.596627, -0.596793, -... | 2 | 40 | 33 |
1 | 1376500461620000525 | C | 0 | 0 | 214 | 1376500461 | 0 | False | [[-8.610876,41.145759],[-8.610849,41.145759],[... | [-0.155839, -0.155839, -0.151619, -0.14673, -0... | [0.0920491, 0.0925159, 0.0985014, 0.105587, 0.... | [-8.63072, 41.1547] | [0.0920491, 0.0925159, 0.0985014, 0.105587, 0.... | 2 | 40 | 33 |
2 | 1376501327620000095 | B | 0 | 11 | 367 | 1376501327 | 0 | False | [[-8.613243,41.166873],[-8.613252,41.166747],[... | [0.129025, 0.127327, 0.125474, 0.118835, 0.104... | [0.0506678, 0.0505178, 0.0497175, 0.0700247, 0... | [-8.61534, 41.1407] | [0.0506678, 0.0505178, 0.0497175, 0.0700247, 0... | 2 | 41 | 33 |
3 | 1376501783620000173 | B | 0 | 10 | 39 | 1376501783 | 0 | False | [[-8.606988,41.15025],[-8.607213,41.150007],[-... | [-0.0952637, -0.0985575, -0.112865, -0.113843,... | [0.160023, 0.156088, 0.148386, 0.145868, 0.144... | [-8.55426, 41.1628] | [0.160023, 0.156088, 0.148386, 0.145868, 0.144... | 2 | 42 | 33 |
4 | 1376501113620000252 | B | 0 | 13 | 364 | 1376501113 | 0 | False | [[-8.628273,41.157405],[-8.628255,41.157423],[... | [0.00128665, 0.00149252, 0.00236744, 0.0135356... | [-0.212091, -0.211775, -0.209724, -0.20894, -0... | [-8.61928, 41.1786] | [-0.212091, -0.211775, -0.209724, -0.20894, -0... | 2 | 41 | 33 |
5 | 1376501483620000424 | B | 0 | 19 | 25 | 1376501483 | 0 | False | [[-8.605818,41.153391],[-8.607339,41.153427],[... | [-0.0528556, -0.0523924, -0.0513116, -0.050694... | [0.18048, 0.153888, 0.112506, 0.0797781, 0.071... | [-8.64643, 41.1616] | [0.18048, 0.153888, 0.112506, 0.0797781, 0.071... | 2 | 42 | 33 |
6 | 1376500461620000326 | B | 0 | 14 | 240 | 1376500461 | 0 | False | [[-8.611137,41.149332],[-8.611263,41.149161],[... | [-0.107667, -0.109931, -0.110086, -0.110086, -... | [0.0874808, 0.08528, 0.0849633, 0.0848132, 0.0... | [-8.61446, 41.1422] | [0.0874808, 0.08528, 0.0849633, 0.0848132, 0.0... | 2 | 40 | 33 |
7 | 1376500453620000263 | C | 0 | 0 | 407 | 1376500453 | 0 | False | [[-8.586396,41.149224],[-8.586378,41.149026],[... | [-0.109108, -0.111784, -0.11199, -0.107873, -0... | [0.520016, 0.520333, 0.513247, 0.49249, 0.4643... | [-8.58591, 41.1486] | [0.520016, 0.520333, 0.513247, 0.49249, 0.4643... | 2 | 40 | 33 |
8 | 1376499820620000467 | C | 0 | 0 | 270 | 1376499820 | 0 | False | [[-8.625177,41.157333],[-8.625609,41.157405],[... | [0.000308796, 0.00128665, 0.00494074, 0.006021... | [-0.157972, -0.165525, -0.194935, -0.202171, -... | [-8.64726, 41.1732] | [-0.157972, -0.165525, -0.194935, -0.202171, -... | 2 | 40 | 33 |
9 | 1376503568620000213 | B | 0 | 28 | 431 | 1376503568 | 0 | False | [[-8.584335,41.163111],[-8.585127,41.162922],[... | [0.0782799, 0.0757066, 0.0835809, 0.0913522, 0... | [0.556046, 0.542208, 0.51058, 0.479736, 0.4769... | [-8.58525, 41.1689] | [0.556046, 0.542208, 0.51058, 0.479736, 0.4769... | 2 | 44 | 33 |
10 | 1376503240620000002 | B | 0 | 63 | 421 | 1376503240 | 0 | False | [[-8.609688,41.160348],[-8.609967,41.159277],[... | [0.040967, 0.0265565, 0.00370556, 0.000669059,... | [0.112823, 0.107938, 0.107938, 0.107471, 0.106... | [-8.61071, 41.1456] | [0.112823, 0.107938, 0.107938, 0.107471, 0.106... | 2 | 44 | 33 |
11 | 1376504312620000617 | C | 0 | 0 | 199 | 1376504312 | 0 | False | [[-8.624502,41.179554],[-8.624511,41.179527],[... | [0.300099, 0.299738, 0.299738, 0.299841, 0.299... | [-0.146168, -0.146318, -0.146485, -0.146318, -... | [-8.62455, 41.1796] | [-0.146168, -0.146318, -0.146485, -0.146318, -... | 2 | 45 | 33 |
12 | 1376502661620000400 | B | 0 | 29 | 117 | 1376502661 | 0 | False | [[-8.638443,41.170797],[-8.6382,41.170716],[-8... | [0.181932, 0.180852, 0.184866, 0.192174, 0.200... | [-0.389887, -0.385636, -0.36046, -0.330883, -0... | [-8.6206, 41.1739] | [-0.389887, -0.385636, -0.36046, -0.330883, -0... | 2 | 43 | 33 |
13 | 1376500537620000246 | B | 0 | 13 | 318 | 1376500537 | 0 | False | [[-8.628147,41.157198],[-8.628156,41.157198],[... | [-0.00149252, -0.00149252, -0.00128665, -0.001... | [-0.209891, -0.210041, -0.20879, -0.208473, -0... | [-8.61782, 41.1525] | [-0.209891, -0.210041, -0.20879, -0.208473, -0... | 2 | 41 | 33 |
14 | 1376502120620000557 | B | 0 | 32 | 245 | 1376502120 | 0 | False | [[-8.627643,41.157765],[-8.627958,41.1579],[-8... | [0.00612446, 0.00797724, 0.0135356, 0.0206894,... | [-0.201071, -0.206589, -0.20879, -0.228147, -0... | [-8.61148, 41.1461] | [-0.201071, -0.206589, -0.20879, -0.228147, -0... | 2 | 42 | 33 |
15 | 1376496951620000012 | A | 7 | 0 | 79 | 1376496951 | 0 | False | [[-8.604045,41.182569],[-8.604135,41.182353],[... | [0.340757, 0.337875, 0.316876, 0.295724, 0.278... | [0.211474, 0.209907, 0.197003, 0.183148, 0.161... | [-8.62064, 41.1643] | [0.211474, 0.209907, 0.197003, 0.183148, 0.161... | 2 | 37 | 33 |
16 | 1376501723620000554 | B | 0 | 53 | 183 | 1376501723 | 0 | False | [[-8.613945,41.141277],[-8.613972,41.141286],[... | [-0.216312, -0.216209, -0.221047, -0.222642, -... | [0.0383969, 0.03793, 0.0220411, 0.0168393, 0.0... | [-8.63607, 41.1592] | [0.0383969, 0.03793, 0.0220411, 0.0168393, 0.0... | 2 | 42 | 33 |
17 | 1376503551620000376 | B | 0 | 34 | 246 | 1376503551 | 0 | False | [[-8.615556,41.14071],[-8.615565,41.140692],[-... | [-0.22398, -0.224186, -0.22434, -0.22362, -0.2... | [0.0102369, 0.0100702, 0.0100702, 0.010387, 0.... | [-8.64072, 41.1612] | [0.0102369, 0.0100702, 0.0100702, 0.010387, 0.... | 2 | 44 | 33 |
18 | 1376504171620000146 | B | 0 | 10 | 338 | 1376504171 | 0 | False | [[-8.606979,41.150268],[-8.607285,41.150124],[... | [-0.0950063, -0.096962, -0.0962415, -0.0962415... | [0.160173, 0.154838, 0.148852, 0.148536, 0.128... | [-8.61805, 41.1525] | [0.160173, 0.154838, 0.148852, 0.148536, 0.128... | 2 | 45 | 33 |
19 | 1376506047620000026 | B | 0 | 57 | 167 | 1376506047 | 0 | False | [[-8.610804,41.145741],[-8.610822,41.145768],[... | [-0.156097, -0.155736, -0.155839, -0.151722, -... | [0.0933162, 0.0929994, 0.0917323, 0.0961339, 0... | [-8.60417, 41.1489] | [0.0933162, 0.0929994, 0.0917323, 0.0961339, 0... | 2 | 47 | 33 |
20 | 1376505311620000392 | A | 7 | 0 | 349 | 1376505311 | 0 | False | [[-8.583165,41.164713],[-8.583012,41.164407],[... | [0.0998956, 0.0957268, 0.0964474, 0.105557, 0.... | [0.576503, 0.579187, 0.580438, 0.580121, 0.597... | [-8.6118, 41.1429] | [0.576503, 0.579187, 0.580438, 0.580121, 0.597... | 2 | 46 | 33 |
21 | 1376505833620000120 | B | 0 | 13 | 144 | 1376505833 | 0 | False | [[-8.628345,41.15763],[-8.628345,41.157576],[-... | [0.00432315, 0.00360262, 0.00504367, 0.0026247... | [-0.213342, -0.213342, -0.206906, -0.178896, -... | [-8.61802, 41.1501] | [-0.213342, -0.213342, -0.206906, -0.178896, -... | 2 | 46 | 33 |
22 | 1376506874620000255 | B | 0 | 33 | 194 | 1376506874 | 0 | False | [[-8.600184,41.182686],[-8.600031,41.182758],[... | [0.342352, 0.34333, 0.33736, 0.335559, 0.33314... | [0.278965, 0.281649, 0.310276, 0.319096, 0.341... | [-8.56627, 41.1814] | [0.278965, 0.281649, 0.310276, 0.319096, 0.341... | 2 | 48 | 33 |
23 | 1376503763620000015 | B | 0 | 60 | 48 | 1376503763 | 0 | False | [[-8.609706,41.151276],[-8.609679,41.151294],[... | [-0.0814193, -0.081162, -0.0792063, -0.0644355... | [0.112506, 0.112973, 0.115491, 0.107788, 0.108... | [-8.61818, 41.1696] | [0.112506, 0.112973, 0.115491, 0.107788, 0.108... | 2 | 44 | 33 |
24 | 1376501181620000360 | B | 0 | 0 | 37 | 1376501181 | 0 | False | [[-8.598996,41.149026],[-8.598843,41.148873],[... | [-0.111784, -0.113843, -0.115284, -0.119195, -... | [0.299739, 0.302423, 0.303357, 0.305408, 0.304... | [-8.60023, 41.1493] | [0.299739, 0.302423, 0.303357, 0.305408, 0.304... | 2 | 41 | 33 |
25 | 1376504563620000017 | A | 954 | 0 | 335 | 1376504563 | 0 | False | [[-8.618022,41.151519],[-8.618337,41.151447],[... | [-0.0781255, -0.0791033, -0.0844558, -0.087132... | [-0.0328782, -0.0383802, -0.0624553, -0.079444... | [-8.59822, 41.1484] | [-0.0328782, -0.0383802, -0.0624553, -0.079444... | 2 | 45 | 33 |
26 | 1376507238620000114 | C | 0 | 0 | 165 | 1376507238 | 0 | False | [[-8.63028,41.157432],[-8.630505,41.157153],[-... | [0.00164691, -0.00211011, -0.00452901, 0.01085... | [-0.24717, -0.251105, -0.271246, -0.289819, -0... | [-8.65056, 41.1615] | [-0.24717, -0.251105, -0.271246, -0.289819, -0... | 2 | 48 | 33 |
27 | 1376501378620000195 | B | 0 | 60 | 67 | 1376501378 | 0 | False | [[-8.609499,41.151294],[-8.609535,41.151312],[... | [-0.081162, -0.0809046, -0.0778681, -0.0758095... | [0.116124, 0.115491, 0.117375, 0.111556, 0.100... | [-8.61674, 41.137] | [0.116124, 0.115491, 0.117375, 0.111556, 0.100... | 2 | 41 | 33 |
28 | 1376506638620000038 | B | 0 | 17 | 140 | 1376506638 | 0 | False | [[-8.632323,41.164326],[-8.632917,41.164065],[... | [0.0946461, 0.0911464, 0.0867718, 0.093205, 0.... | [-0.2829, -0.293287, -0.317345, -0.346305, -0.... | [-8.65428, 41.181] | [-0.2829, -0.293287, -0.317345, -0.346305, -0.... | 2 | 47 | 33 |
29 | 1376504586620000608 | B | 0 | 18 | 310 | 1376504586 | 0 | False | [[-8.619921,41.148018],[-8.620218,41.147712],[... | [-0.125371, -0.129489, -0.1176, -0.104013, -0.... | [-0.0660733, -0.0712751, -0.0792946, -0.084179... | [-8.61061, 41.1515] | [-0.0660733, -0.0712751, -0.0792946, -0.084179... | 2 | 45 | 33 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
274 | 1387725593620000440 | B | 0 | 42 | 233 | 1387725593 | 0 | False | [[-8.612145,41.172777],[-8.612568,41.172768],[... | [0.208643, 0.20854, 0.216569, 0.226142, 0.2362... | [0.0698579, 0.062472, 0.0542858, 0.0513014, 0.... | [-8.58568, 41.1489] | [0.0698579, 0.062472, 0.0542858, 0.0513014, 0.... | 6 | 29 | 51 |
275 | 1387726426620000621 | A | 1602 | 0 | 34 | 1387726426 | 0 | False | [[-8.648964,41.179752],[-8.648982,41.179752],[... | [0.302775, 0.302775, 0.314663, 0.316104, 0.316... | [-0.573819, -0.574136, -0.57807, -0.576336, -0... | [-8.63323, 41.1756] | [-0.573819, -0.574136, -0.57807, -0.576336, -0... | 6 | 30 | 51 |
276 | 1387728068620000012 | A | 3521 | 0 | 79 | 1387728068 | 0 | False | [[-8.658126,41.154876],[-8.657829,41.154579],[... | [-0.0328353, -0.0368497, -0.0219245, -0.001286... | [-0.733992, -0.728807, -0.743912, -0.769238, -... | [-8.65425, 41.1809] | [-0.733992, -0.728807, -0.743912, -0.769238, -... | 6 | 32 | 51 |
277 | 1387728077620000502 | B | 0 | 54 | 116 | 1387728077 | 0 | False | [[-8.630316,41.15754],[-8.629668,41.157],[-8.6... | [0.00308796, -0.00416875, -0.00844043, -0.0272... | [-0.247804, -0.236483, -0.233182, -0.24497, -0... | [-8.6304, 41.1554] | [-0.247804, -0.236483, -0.233182, -0.24497, -0... | 6 | 32 | 51 |
278 | 1387729770620000384 | A | 3184 | 0 | 225 | 1387729770 | 0 | False | [[-8.6121,41.158674],[-8.6121,41.158674],[-8.6... | [0.0183734, 0.0183734, 0.00452901, -0.0170353,... | [0.0706582, 0.0706582, 0.064356, 0.0511347, 0.... | [-8.62106, 41.151] | [0.0706582, 0.0706582, 0.064356, 0.0511347, 0.... | 6 | 33 | 51 |
279 | 1387728089620000640 | B | 0 | 26 | 218 | 1387728089 | 0 | False | [[-8.580204,41.15934],[-8.580627,41.159241],[-... | [0.0273799, 0.0260418, 0.015131, -0.005301, -0... | [0.628271, 0.620869, 0.633156, 0.637241, 0.637... | [-8.58601, 41.1486] | [0.628271, 0.620869, 0.633156, 0.637241, 0.637... | 6 | 32 | 51 |
280 | 1387727123620000055 | B | 0 | 7 | 352 | 1387727123 | 0 | False | [[-8.63991,41.15979],[-8.640693,41.159664],[-8... | [0.0334529, 0.0317546, 0.00586713, -0.0161089,... | [-0.41553, -0.429218, -0.438821, -0.447324, -0... | [-8.6178, 41.1471] | [-0.41553, -0.429218, -0.438821, -0.447324, -0... | 6 | 31 | 51 |
281 | 1387728500620000271 | B | 0 | 57 | 234 | 1387728500 | 0 | False | [[-8.610885,41.14566],[-8.610885,41.145669],[-... | [-0.157177, -0.157074, -0.157435, -0.156714, -... | [0.091899, 0.091899, 0.0917323, 0.0923659, 0.0... | [-8.66138, 41.1481] | [0.091899, 0.091899, 0.0917323, 0.0923659, 0.0... | 6 | 32 | 51 |
282 | 1387729808620000151 | B | 0 | 28 | 146 | 1387729808 | 0 | False | [[-8.584335,41.163156],[-8.584425,41.163102],[... | [0.078846, 0.0781255, 0.0776623, 0.0841985, 0.... | [0.556046, 0.554479, 0.533088, 0.506012, 0.478... | [-8.6117, 41.16] | [0.556046, 0.554479, 0.533088, 0.506012, 0.478... | 6 | 34 | 51 |
283 | 1387660057620000026 | B | 0 | 57 | 167 | 1387660057 | 0 | False | [[-8.610768,41.145642],[-8.610759,41.145642],[... | [-0.157435, -0.157435, -0.157074, -0.156354, -... | [0.0939331, 0.0940998, 0.0942499, 0.0939331, 0... | [-8.63085, 41.1466] | [0.0939331, 0.0940998, 0.0942499, 0.0939331, 0... | 5 | 52 | 51 |
284 | 1387727477620000513 | B | 0 | 53 | 366 | 1387727477 | 0 | False | [[-8.613972,41.141349],[-8.613963,41.141349],[... | [-0.215334, -0.215334, -0.216929, -0.205607, -... | [0.03793, 0.0380801, 0.029277, 0.0308442, 0.03... | [-8.61403, 41.1499] | [0.03793, 0.0380801, 0.029277, 0.0308442, 0.03... | 6 | 31 | 51 |
285 | 1387725100620000157 | A | 254 | 0 | 390 | 1387725100 | 0 | False | [[-8.676234,41.15484],[-8.676198,41.154822],[-... | [-0.03335, -0.0335559, -0.0335559, -0.0334529,... | [-1.05057, -1.04994, -1.04994, -1.04962, -1.04... | [-8.6488, 41.1486] | [-1.05057, -1.04994, -1.04994, -1.04962, -1.04... | 6 | 28 | 51 |
286 | 1387731453620000032 | A | 9559 | 0 | 371 | 1387731453 | 0 | False | [[-8.657946,41.148234],[-8.657937,41.148207],[... | [-0.122438, -0.122798, -0.122798, -0.122695, -... | [-0.730841, -0.730691, -0.730691, -0.730541, -... | [-8.65648, 41.1532] | [-0.730841, -0.730691, -0.730691, -0.730541, -... | 6 | 35 | 51 |
287 | 1387730991620000217 | A | 20908 | 0 | 321 | 1387730991 | 0 | False | [[-8.569818,41.170158],[-8.569278,41.169996],[... | [0.173338, 0.171125, 0.158052, 0.160934, 0.166... | [0.809852, 0.819288, 0.845881, 0.854534, 0.858... | [-8.572, 41.1629] | [0.809852, 0.819288, 0.845881, 0.854534, 0.858... | 6 | 35 | 51 |
288 | 1387723778620000364 | B | 0 | 21 | 419 | 1387723778 | 0 | False | [[-8.628867,41.160996],[-8.628849,41.160951],[... | [0.0497162, 0.0490986, 0.0543481, 0.0718466, 0... | [-0.222478, -0.222162, -0.209724, -0.202021, -... | [-8.71435, 41.2082] | [-0.222478, -0.222162, -0.209724, -0.202021, -... | 6 | 27 | 51 |
289 | 1387731647620000129 | B | 0 | 57 | 265 | 1387731647 | 0 | False | [[-8.610759,41.145651],[-8.610768,41.145678],[... | [-0.15728, -0.15692, -0.155839, -0.154244, -0.... | [0.0940998, 0.0939331, 0.0936163, 0.0936163, 0... | [-8.63835, 41.1592] | [0.0940998, 0.0939331, 0.0936163, 0.0936163, 0... | 6 | 36 | 51 |
290 | 1387733802620000364 | B | 0 | 21 | 419 | 1387733802 | 0 | False | [[-8.628786,41.161041],[-8.628579,41.160897],[... | [0.0503338, 0.0483781, 0.0476576, 0.0474002, 0... | [-0.221061, -0.217443, -0.21776, -0.21791, -0.... | [-8.596, 41.1696] | [-0.221061, -0.217443, -0.21776, -0.21791, -0.... | 6 | 38 | 51 |
291 | 1387731776620000207 | B | 0 | 36 | 211 | 1387731776 | 0 | False | [[-8.649423,41.154345],[-8.6499,41.154273],[-8... | [-0.0399891, -0.040967, -0.0452387, -0.0437976... | [-0.581838, -0.590191, -0.59616, -0.579487, -0... | [-8.57125, 41.1646] | [-0.581838, -0.590191, -0.59616, -0.579487, -0... | 6 | 36 | 51 |
292 | 1387729265620000068 | B | 0 | 0 | 185 | 1387729265 | 0 | False | [[-8.608779,41.147793],[-8.608734,41.147802],[... | [-0.128408, -0.128305, -0.128305, -0.128408, -... | [0.128712, 0.129496, 0.129812, 0.133114, 0.133... | [-8.62051, 41.1651] | [0.128712, 0.129496, 0.129812, 0.133114, 0.133... | 6 | 33 | 51 |
293 | 1387735526620000023 | C | 0 | 0 | 404 | 1387735526 | 0 | False | [[-8.597673,41.142681],[-8.597682,41.142681]] | [-0.197372, -0.197372] | [0.322864, 0.322714] | [-8.59768, 41.1427] | [0.322864, 0.322864, 0.322864, 0.322864, 0.322... | 6 | 40 | 51 |
294 | 1387713713620000255 | A | 34988 | 0 | 194 | 1387713713 | 0 | False | [[-8.594352,41.169375],[-8.594352,41.169375],[... | [0.162787, 0.162787, 0.16289, 0.162993, 0.1631... | [0.380934, 0.380934, 0.381084, 0.381084, 0.381... | [-8.58298, 41.1704] | [0.380934, 0.380934, 0.381084, 0.381084, 0.381... | 6 | 16 | 51 |
295 | 1387735341620000216 | B | 0 | 12 | 331 | 1387735341 | 0 | False | [[-8.630766,41.154948],[-8.631414,41.15439],[-... | [-0.0318575, -0.039423, -0.054554, -0.0752434,... | [-0.255673, -0.267011, -0.283683, -0.29422, -0... | [-8.63564, 41.1406] | [-0.255673, -0.267011, -0.283683, -0.29422, -0... | 6 | 40 | 51 |
296 | 1387731258620000486 | C | 0 | 0 | 75 | 1387731258 | 0 | False | [[-8.59698,41.171328],[-8.595054,41.172327],[-... | [0.189138, 0.20257, 0.253367, 0.308848, 0.3575... | [0.334985, 0.368663, 0.395873, 0.406426, 0.397... | [-8.33168, 41.2035] | [0.334985, 0.368663, 0.395873, 0.406426, 0.397... | 6 | 35 | 51 |
297 | 1387737095620000217 | A | 495 | 0 | 321 | 1387737095 | 0 | False | [[-8.591688,41.159556],[-8.591625,41.159421],[... | [0.0303135, 0.0284607, 0.0216672, 0.0165721, 0... | [0.427501, 0.428601, 0.428134, 0.413496, 0.402... | [-8.60578, 41.1498] | [0.427501, 0.428601, 0.428134, 0.413496, 0.402... | 6 | 42 | 51 |
298 | 1387737450620000384 | B | 0 | 52 | 225 | 1387737450 | 0 | False | [[-8.61327,41.154453],[-8.613297,41.154147],[-... | [-0.0385481, -0.0426654, -0.0465768, -0.047657... | [0.050201, 0.0497175, 0.0495675, 0.0564866, 0.... | [-8.58762, 41.1885] | [0.050201, 0.0497175, 0.0495675, 0.0564866, 0.... | 6 | 42 | 51 |
299 | 1387740537620000657 | B | 0 | 47 | 17 | 1387740537 | 0 | False | [[-8.654796,41.173551],[-8.654526,41.173668],[... | [0.219091, 0.220686, 0.236486, 0.239369, 0.233... | [-0.675771, -0.671053, -0.652646, -0.632039, -... | [-8.63023, 41.1584] | [-0.675771, -0.671053, -0.652646, -0.632039, -... | 6 | 45 | 51 |
300 | 1387742161620000503 | C | 0 | 0 | 33 | 1387742161 | 0 | False | [[-8.639487,41.167422],[-8.639424,41.16753],[-... | [0.136436, 0.137878, 0.135819, 0.12393, 0.1178... | [-0.408144, -0.407043, -0.402008, -0.397757, -... | [-8.66577, 41.2102] | [-0.408144, -0.407043, -0.402008, -0.397757, -... | 6 | 47 | 51 |
301 | 1387755659620000372 | A | 481 | 0 | 27 | 1387755659 | 0 | False | [[-8.679753,41.156559],[-8.679717,41.156568],[... | [-0.0101388, -0.0100359, -0.00308796, -0.00710... | [-1.11209, -1.11146, -1.0954, -1.07763, -1.058... | [-8.61165, 41.1461] | [-1.11209, -1.11146, -1.0954, -1.07763, -1.058... | 6 | 62 | 51 |
302 | 1387735327620000068 | B | 0 | 27 | 185 | 1387735327 | 0 | False | [[-8.608707,41.147811],[-8.608689,41.147829],[... | [-0.12815, -0.127945, -0.128665, -0.13304, -0.... | [0.129962, 0.130279, 0.13328, 0.129812, 0.1073... | [-8.62782, 41.1698] | [0.129962, 0.130279, 0.13328, 0.129812, 0.1073... | 6 | 40 | 51 |
303 | 1387788528620000010 | A | 8312 | 0 | 26 | 1387788528 | 0 | False | [[-8.609247,41.155182],[-8.60922,41.155254],[-... | [-0.0287181, -0.0277402, -0.0210496, -0.021409... | [0.120526, 0.121009, 0.117541, 0.108105, 0.106... | [-8.61635, 41.163] | [0.120526, 0.121009, 0.117541, 0.108105, 0.106... | 0 | 3 | 52 |
304 rows × 16 columns