%config Completer.use_jedi = False
%matplotlib inline
import os
import sys
import pytz
from datetime import datetime
import geopandas as gpd
from copy import copy, deepcopy
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from glob import glob
from shapely.geometry import Point, Polygon
from collections import Counter
from dask.distributed import Client
from mpl_toolkits.axes_grid1 import make_axes_locatable
import skmob
import mobilkit
sns.set_context("notebook", font_scale=1.5)
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
So far, given the implementation of skmob
the communication happens via the TrajDataFrame
.
We load a sample file from csv in skmob
, filter and analyze it, port it to dask
and then back it to skmob
.
accColName = "acc"
# Load a sample file...
df = pd.read_csv("/data/datiCuebiqTiled/part-00000-c02e38a8-f763-4f0f-a3f0-3ebbf4738748-c000.csv.gz",
sep=",", compression="gzip", names=["UTC","uid","OS","lat","lng","acc","tz","tile_ID"])
# Add the datetime col
mobilkit.loader.compute_datetime_col(df, "America/Mexico_City")
# Port it to skmob...
traj = skmob.TrajDataFrame(df)
from skmob.preprocessing import filtering
# filter out all points with a speed (in km/h) from the previous point higher than 500 km/h
ftdf = filtering.filter(traj, max_speed_kmh=500.)
from skmob.preprocessing import detection
# compute the stops for each individual in the TrajDataFrame
stdf = detection.stops(ftdf, stop_radius_factor=0.5, minutes_for_a_stop=20.0, spatial_radius_km=0.2, leaving_time=True)
mobilkit
for later analysis¶import dask.dataframe as dd
from importlib import reload
reload(mobilkit)
reload(mobilkit.loader)
<module 'mobilkit.loader' from '/home/ubi/Sandbox/mobilkit_dask/mobilkit/loader.py'>
df_dask = mobilkit.loader.load_from_skmob(stdf)
skmob
¶# Back to skmob
back_df = mobilkit.loader.dask_to_skmob(df_dask)