import pandas as pd
import urllib
import ssl
import io
import os
from zipfile import ZipFile
from urllib.request import urlopen
from scipy.io import loadmat
context = ssl.SSLContext() # Ignore SSL certificate verification for simplicity
colnames = ['drum pressure',
'excess oxygen',
'water level',
'steam flow'
]
url = 'https://www.cs.ucr.edu/~eamonn/iSAX/steamgen.dat'
raw_bytes = urllib.request.urlopen(url, context=context).read()
data = io.BytesIO(raw_bytes)
steam_df = pd.read_csv(data, header=None, sep="\\s+")
steam_df.columns = colnames
steam_df.to_csv('STUMPY_Basics_steamgen.csv', index=False)
# Ref - https://github.com/stanford-futuredata/ASAP
taxi_df = pd.read_csv("https://raw.githubusercontent.com/stanford-futuredata/ASAP/master/Taxi.csv", sep=',')
taxi_df.to_csv('STUMPY_Basics_Taxi.csv', index=False)
url = 'https://sites.google.com/site/timeserieschain/home/Kohls_data.mat?attredirects=0&revision=1'
raw_bytes = urllib.request.urlopen(url, context=context).read()
data = io.BytesIO(raw_bytes)
mat = loadmat(data)
mdata = mat['VarName1']
mdtype = mdata.dtype
df = pd.DataFrame(mdata, dtype=mdtype, columns=['volume'])
df.to_csv('Time_Series_Chains_Kohls_data.csv', index=False)
url = 'https://sites.google.com/site/timeserieschain/home/TiltABP_210_25000.txt'
raw_bytes = urllib.request.urlopen(url, context=context).read()
data = io.BytesIO(raw_bytes)
df = pd.read_csv(data, header=None)
df = df.reset_index().rename({'index': 'time', 0: 'abp'}, axis='columns')
df.to_csv('Semantic_Segmentation_TiltABP.csv', index=False)
T_url = 'https://www.cs.unm.edu/~mueen/robot_dog.txt'
T_raw_bytes = urllib.request.urlopen(T_url, context=context).read()
T_data = io.BytesIO(T_raw_bytes)
T_df = pd.read_csv(T_data, header=None, sep='\s+', names=['Acceleration'])
T_df.to_csv('Fast_Pattern_Searching_robot_dog.csv', index=False)
Q_url = 'https://www.cs.unm.edu/~mueen/carpet_query.txt'
Q_raw_bytes = urllib.request.urlopen(Q_url, context=context).read()
Q_data = io.BytesIO(Q_raw_bytes)
Q_df = pd.read_csv(Q_data, header=None, sep='\s+', names=['Acceleration'])
Q_df.to_csv('carpet_query.csv', index=False)
fzip = ZipFile(io.BytesIO(urlopen("http://alumni.cs.ucr.edu/~lexiangy/Shapelet/gun.zip").read()))
# training set
train = fzip.extract("gun_train")
train_df = pd.read_csv(train, sep="\\s+", header=None)
os.remove(train)
train_df.to_csv("gun_point_train_data.csv", index=False)
fzip = ZipFile(io.BytesIO(urlopen("http://alumni.cs.ucr.edu/~lexiangy/Shapelet/gun.zip").read()))
test = fzip.extract("gun_test")
test_df = pd.read_csv(test, sep="\\s+", header=None)
os.remove(test)
test_df.to_csv("gun_point_test_data.csv", index=False)
fzip = ZipFile(io.BytesIO(urlopen("https://www.dropbox.com/s/ybzkw5v6h46bv22/figure9_10.zip?dl=1&sa=D&sntz=1&usg=AFQjCNEDp3G8OKGC-Zj5yucpSSCz7WRpRg").read()))
mat = fzip.extract("figure9_10/data.mat")
data = loadmat(mat)
queen_df = pd.DataFrame(data['mfcc_queen'][0], columns=['under_pressure'])
vanilla_ice_df = pd.DataFrame(data['mfcc_vanilla_ice'][0], columns=['ice_ice_baby'])
queen_df.to_csv("queen.csv", index=False)
vanilla_ice_df.to_csv("vanilla_ice.csv", index=False)
T_url = 'https://sites.google.com/site/consensusmotifs/dna.zip?attredirects=0&d=1'
T_raw_bytes = urllib.request.urlopen(T_url, context=context).read()
T_data = io.BytesIO(T_raw_bytes)
T_zipfile = ZipFile(T_data)
animals = ['python', 'hippo', 'red_flying_fox', 'alpaca']
for animal in animals:
with T_zipfile.open(f'dna/data/{animal}.mat') as f:
data = loadmat(f)['ts'].flatten().astype(float)
df = pd.DataFrame(data)
df.to_csv(f"{animal}.csv", index=False)
url = "https://github.com/mcyeh/mstamp/blob/master/Python/toy_data.mat?raw=true"
raw_bytes = urllib.request.urlopen(url, context=context).read()
data = io.BytesIO(raw_bytes)
mat = loadmat(data)
mdata = mat['data']
mdtype = mdata.dtype
df = pd.DataFrame(mdata, dtype=mdtype, columns=['T3', 'T2', 'T1'])
df = df[['T1', 'T2', 'T3']]
df.to_csv("toy.csv", index=False)