# download dataset & unzip
import io
import os
import urllib
import zipfile
DATA_URL = 'https://www.cs.ucr.edu/~eamonn/time_series_data_2018/UCR_TimeSeriesAnomalyDatasets2021.zip'
response = urllib.request.urlopen(DATA_URL)
bytes_io = io.BytesIO(response.read())
with zipfile.ZipFile(bytes_io) as zf:
zf.extractall()
DATA_PATH = os.path.join('AnomalyDatasets_2021',
'UCR_TimeSeriesAnomalyDatasets2021',
'FilesAreInHere',
'UCR_Anomaly_FullData')
SAVE_TO = 'UCR'
os.makedirs(SAVE_TO, exist_ok=True)
import csv
import numpy as np
import pandas as pd
from tqdm import tqdm
012_UCR_Anomaly_tiltAPB1_100000_114283_114350.txt
012
Dataset numbertiltAPB1
Mnemonic name100000
From 1 to X is training data114283
Begin anomaly114350
End anomalydef build_df(data, start=0):
index = np.array(range(start, start + len(data)))
step = 300
initial_time = 1222819200
timestamp = index * step + initial_time
if len(data.shape) > 1 and data.shape[1] > 1:
print("MULTIVARIATE")
df = pd.DataFrame(data)
df['timestamp'] = timestamp
else:
df = pd.DataFrame({'timestamp': timestamp, 'value': data.reshape(-1, )})
df['timestamp'] = df['timestamp'].astype('int64')
return df
df = build_df(np.loadtxt(DATA_PATH + '/204_UCR_Anomaly_CHARISfive_12412_15000_15070.txt'))
df.head()
timestamp | value | |
---|---|---|
0 | 1222819200 | 1990.0 |
1 | 1222819500 | 1996.0 |
2 | 1222819800 | 1958.0 |
3 | 1222820100 | 1958.0 |
4 | 1222820400 | 1923.0 |
files = os.listdir(DATA_PATH)
file_names, train_sizes, intervals = [], [], []
for file in tqdm(files):
file_num_str, _, _, file_name, train_size_str, begin_str, end_str = file.split("_")
train_size, begin_anomaly = int(train_size_str), int(begin_str)
end_anomaly = int(end_str.split('.')[0])
file_name = file_num_str + "-" + file_name
# get timestamp from data
df = build_df(np.loadtxt(os.path.join(DATA_PATH, file)))
begin_anomaly = int(df.timestamp.iloc[begin_anomaly])
end_anomaly = int(df.timestamp.iloc[end_anomaly])
# train - test split
train_df = df.iloc[: train_size]
test_df = df.iloc[train_size: ]
# save file
train_df.to_csv(SAVE_TO + '/{}-train.csv'.format(file_name), index=False)
test_df.to_csv(SAVE_TO + '/{}-test.csv'.format(file_name), index=False)
df.to_csv(SAVE_TO + '/{}.csv'.format(file_name), index=False)
file_names.append(file_name)
train_sizes.append(train_size)
intervals.append([begin_anomaly, end_anomaly])
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 250/250 [00:19<00:00, 12.84it/s]
rows = []
for index, file_name in enumerate(file_names):
row = [file_name, [intervals[index]]]
rows.append(row)
rows
[['183-qtdbSel100MLII', [[1226839200, 1226959200]]], ['194-sddb49', [[1243204200, 1243279200]]], ['069-DISTORTEDinsectEPG5', [[1225369200, 1225369500]]], ['023-DISTORTEDGP711MarkerLFM5z5', [[1225402800, 1225434000]]], ['212-Italianpowerdemand', [[1231663200, 1231670400]]], ['180-ltstdbs30791ES', [[1238599200, 1238659200]]], ['058-DISTORTEDapneaecg', [[1226491200, 1226511600]]], ['130-GP711MarkerLFM5z4', [[1224777300, 1224812700]]], ['079-DISTORTEDresperation2', [[1273294200, 1273294200]]], ['224-mit14046longtermecg', [[1280179200, 1280329200]]], ['044-DISTORTEDPowerDemand1', [[1228364700, 1228465500]]], ['233-mit14157longtermecg', [[1230169200, 1230169500]]], ['042-DISTORTEDLab2Cmac011215EPG6', [[1226476200, 1226545200]]], ['114-CIMIS44AirTemperature2', [[1224530100, 1224537300]]], ['204-CHARISfive', [[1227319200, 1227340200]]], ['241-taichidbS0715Master', [[1474039200, 1474549200]]], ['028-DISTORTEDInternalBleeding17', [[1223778600, 1223811900]]], ['026-DISTORTEDInternalBleeding15', [[1224524400, 1224575400]]], ['092-DISTORTEDtiltAPB4', [[1243217700, 1243218000]]], ['214-STAFFIIIDatabase', [[1260535200, 1260730200]]], ['070-DISTORTEDltstdbs30791AI', [[1238599200, 1238659200]]], ['006-DISTORTEDCIMIS44AirTemperature2', [[1224530100, 1224537300]]], ['036-DISTORTEDInternalBleeding9', [[1224798900, 1224823500]]], ['198-tiltAPB2', [[1260066900, 1260314700]]], ['149-Lab2Cmac011215EPG5', [[1228036200, 1228075200]]], ['032-DISTORTEDInternalBleeding4', [[1224221700, 1224329100]]], ['099-NOISEInternalBleeding6', [[1223861400, 1223907900]]], ['137-InternalBleeding18', [[1224164700, 1224195300]]], ['216-STAFFIIIDatabase', [[1271035200, 1271230200]]], ['057-DISTORTEDapneaecg4', [[1227619200, 1227649200]]], ['087-DISTORTEDsel840mECG1', [[1238230200, 1238341200]]], ['103-NOISETkeepThirdMARS', [[1224232500, 1224261900]]], ['223-mit14046longtermecg', [[1262179200, 1262329200]]], ['179-ltstdbs30791AS', [[1238599200, 1238659200]]], ['178-ltstdbs30791AI', [[1238599200, 1238659200]]], ['189-resperation3', [[1270294200, 1270294500]]], ['225-mit14046longtermecg', [[1265719200, 1265809200]]], ['190-resperation4', [[1261348200, 1261348500]]], ['229-mit14134longtermecg', [[1240207200, 1240210200]]], ['186-resperation1', [[1255897200, 1255942800]]], ['064-DISTORTEDgaitHunt3', [[1234339200, 1234579200]]], ['219-STAFFIIIDatabase', [[1242208800, 1242274800]]], ['172-gaitHunt3', [[1234339200, 1234579200]]], ['144-InternalBleeding9', [[1224798900, 1224823500]]], ['027-DISTORTEDInternalBleeding16', [[1224075300, 1224078900]]], ['033-DISTORTEDInternalBleeding5', [[1224679200, 1224730200]]], ['005-DISTORTEDCIMIS44AirTemperature1', [[1224436500, 1224436800]]], ['003-DISTORTED3sddb40', [[1236799200, 1236889200]]], ['250-weallwalk', [[1225006200, 1225008000]]], ['039-DISTORTEDLab2Cmac011215EPG3', [[1227736200, 1227745200]]], ['111-3sddb40', [[1236799200, 1236889200]]], ['150-Lab2Cmac011215EPG6', [[1226476200, 1226545200]]], ['002-DISTORTED2sddb40', [[1239799200, 1239889200]]], ['221-STAFFIIIDatabase', [[1271908800, 1272274800]]], ['077-DISTORTEDresperation11', [[1256059200, 1256059500]]], ['227-mit14134longtermecg', [[1231519200, 1231549200]]], ['222-mit14046longtermecg', [[1250179200, 1250329200]]], ['066-DISTORTEDinsectEPG2', [[1225219200, 1225226700]]], ['013-DISTORTEDECG3', [[1227619200, 1227649200]]], ['051-DISTORTEDTkeepSecondMARS', [[1225618200, 1225621200]]], ['163-apneaecg2', [[1229104200, 1229149200]]], ['141-InternalBleeding5', [[1224679200, 1224730200]]], ['041-DISTORTEDLab2Cmac011215EPG5', [[1228036200, 1228075200]]], ['151-MesoplodonDensirostris', [[1228603200, 1228651200]]], ['213-STAFFIIIDatabase', [[1260895200, 1261030200]]], ['016-DISTORTEDECG4', [[1227889200, 1227949200]]], ['074-DISTORTEDqtdbSel1005V', [[1226539200, 1226659200]]], ['045-DISTORTEDPowerDemand2', [[1229826300, 1229934300]]], ['247-tilt12755mtable', [[1259389200, 1259413200]]], ['147-Lab2Cmac011215EPG3', [[1227736200, 1227745200]]], ['104-NOISEapneaecg4', [[1227619200, 1227649200]]], ['062-DISTORTEDgaitHunt1', [[1232740200, 1232773200]]], ['108-NOISEresperation2', [[1273294200, 1273294200]]], ['125-ECG4', [[1227919200, 1227949200]]], ['030-DISTORTEDInternalBleeding19', [[1224075300, 1224078300]]], ['106-NOISEgaitHunt2', [[1232179200, 1232374200]]], ['146-Lab2Cmac011215EPG2', [[1231177800, 1231198800]]], ['084-DISTORTEDs20101mML2', [[1233551400, 1233581400]]], ['022-DISTORTEDGP711MarkerLFM5z4', [[1224777300, 1224812700]]], ['060-DISTORTEDgait2', [[1236769200, 1236859200]]], ['202-CHARISfive', [[1226118600, 1226127600]]], ['210-Italianpowerdemand', [[1245289200, 1245318000]]], ['242-tilt12744mtable', [[1254208200, 1254286200]]], ['126-ECG4', [[1227919200, 1227949200]]], ['055-DISTORTEDapneaecg2', [[1229104200, 1229149200]]], ['165-apneaecg4', [[1227619200, 1227649200]]], ['059-DISTORTEDgait1', [[1234369200, 1234459200]]], ['220-STAFFIIIDatabase', [[1298035200, 1298230200]]], ['199-tiltAPB3', [[1257019200, 1257130200]]], ['048-DISTORTEDTkeepFifthMARS', [[1224615600, 1224644700]]], ['193-s20101m', [[1233551400, 1233581400]]], ['232-mit14134longtermecg', [[1240078200, 1240156200]]], ['152-PowerDemand1', [[1228364700, 1228465500]]], ['164-apneaecg3', [[1226152500, 1226182500]]], ['020-DISTORTEDGP711MarkerLFM5z2', [[1224971700, 1225035600]]], ['244-tilt12754table', [[1254208200, 1254286200]]], ['173-insectEPG1', [[1224919200, 1224928200]]], ['124-ECG4', [[1227889200, 1227949200]]], ['208-CHARISten', [[1231197900, 1231215900]]], ['158-TkeepForthMARS', [[1224615600, 1224644700]]], ['053-DISTORTEDWalkingAceleration1', [[1223648400, 1223717700]]], ['031-DISTORTEDInternalBleeding20', [[1224546900, 1224594900]]], ['015-DISTORTEDECG4', [[1227859200, 1227949200]]], ['056-DISTORTEDapneaecg3', [[1226152500, 1226182500]]], ['184-resperation10', [[1262029200, 1262383200]]], ['157-TkeepFirstMARS', [[1224428700, 1224433200]]], ['035-DISTORTEDInternalBleeding8', [[1224578700, 1224611400]]], ['226-mit14046longtermecg', [[1259719200, 1259809200]]], ['052-DISTORTEDTkeepThirdMARS', [[1224232500, 1224261900]]], ['171-gaitHunt2', [[1232179200, 1232374200]]], ['160-TkeepThirdMARS', [[1224232500, 1224261900]]], ['231-mit14134longtermecg', [[1237078200, 1237156200]]], ['001-DISTORTED1sddb40', [[1238419200, 1238605200]]], ['113-CIMIS44AirTemperature1', [[1224436500, 1224436800]]], ['040-DISTORTEDLab2Cmac011215EPG4', [[1228036200, 1228075200]]], ['093-NOISE1sddb40', [[1238419200, 1238605200]]], ['228-mit14134longtermecg', [[1237168200, 1237174200]]], ['102-NOISEMesoplodonDensirostris', [[1228603200, 1228651200]]], ['096-NOISEECG4', [[1227889200, 1227949200]]], ['136-InternalBleeding17', [[1223778600, 1223811900]]], ['082-DISTORTEDresperation4', [[1261348200, 1261348500]]], ['097-NOISEGP711MarkerLFM5z3', [[1224603600, 1224617100]]], ['012-DISTORTEDECG2', [[1227619200, 1227649200]]], ['246-tilt12755mtable', [[1304059200, 1304140200]]], ['107-NOISEinsectEPG3', [[1224919200, 1224934200]]], ['120-ECG2', [[1227619200, 1227649200]]], ['134-InternalBleeding15', [[1224524400, 1224575400]]], ['004-DISTORTEDBIDMC1', [[1224439200, 1224499200]]], ['207-CHARISten', [[1230897900, 1230915900]]], ['073-DISTORTEDpark3m', [[1244464200, 1244567700]]], ['008-DISTORTEDCIMIS44AirTemperature4', [[1224483900, 1224498300]]], ['105-NOISEgait3', [[1240789200, 1240969200]]], ['068-DISTORTEDinsectEPG4', [[1224771600, 1224786600]]], ['139-InternalBleeding20', [[1224546900, 1224594900]]], ['017-DISTORTEDECG4', [[1227919200, 1227949200]]], ['196-sel840mECG2', [[1237630200, 1237741200]]], ['086-DISTORTEDsddb49', [[1243204200, 1243279200]]], ['085-DISTORTEDs20101m', [[1233551400, 1233581400]]], ['217-STAFFIIIDatabase', [[1268035200, 1268230200]]], ['211-Italianpowerdemand', [[1234591200, 1234620000]]], ['181-park3m', [[1244464200, 1244567700]]], ['129-GP711MarkerLFM5z3', [[1224603600, 1224617100]]], ['245-tilt12754table', [[1304059200, 1304140200]]], ['234-mit14157longtermecg', [[1230199200, 1230199500]]], ['161-WalkingAceleration1', [[1223648400, 1223717700]]], ['067-DISTORTEDinsectEPG3', [[1224919200, 1224934200]]], ['170-gaitHunt1', [[1232740200, 1232773200]]], ['118-CIMIS44AirTemperature6', [[1224621000, 1224635400]]], ['143-InternalBleeding8', [[1224578700, 1224611400]]], ['155-PowerDemand4', [[1230020700, 1230042300]]], ['162-WalkingAceleration5', [[1224595200, 1224612900]]], ['191-resperation9', [[1265842500, 1265872500]]], ['034-DISTORTEDInternalBleeding6', [[1223861400, 1223907900]]], ['110-2sddb40', [[1239799200, 1239889200]]], ['009-DISTORTEDCIMIS44AirTemperature5', [[1224274800, 1224289200]]], ['133-InternalBleeding14', [[1224501300, 1224509400]]], ['088-DISTORTEDsel840mECG2', [[1237630200, 1237741200]]], ['203-CHARISfive', [[1226117700, 1226127600]]], ['010-DISTORTEDCIMIS44AirTemperature6', [[1224621000, 1224635400]]], ['089-DISTORTEDtiltAPB1', [[1257104100, 1257124200]]], ['037-DISTORTEDLab2Cmac011215EPG1', [[1227982200, 1227997200]]], ['249-weallwalk', [[1225304700, 1225313700]]], ['081-DISTORTEDresperation3', [[1270294200, 1270294500]]], ['128-GP711MarkerLFM5z2', [[1224971700, 1225035600]]], ['201-CHARISfive', [[1227919500, 1227924000]]], ['071-DISTORTEDltstdbs30791AS', [[1238599200, 1238659200]]], ['083-DISTORTEDresperation9', [[1265842500, 1265872500]]], ['166-apneaecg', [[1226491200, 1226511600]]], ['235-mit14157longtermecg', [[1245454200, 1245454500]]], ['230-mit14134longtermecg', [[1228672200, 1228702200]]], ['091-DISTORTEDtiltAPB3', [[1257019200, 1257130200]]], ['148-Lab2Cmac011215EPG4', [[1228036200, 1228075200]]], ['049-DISTORTEDTkeepFirstMARS', [[1224428700, 1224433200]]], ['175-insectEPG3', [[1224919200, 1224934200]]], ['038-DISTORTEDLab2Cmac011215EPG2', [[1231177800, 1231198800]]], ['236-mit14157longtermecg', [[1236724200, 1236736200]]], ['167-gait1', [[1234369200, 1234459200]]], ['243-tilt12744mtable', [[1283825700, 1283839200]]], ['187-resperation2', [[1273294200, 1273294200]]], ['043-DISTORTEDMesoplodonDensirostris', [[1228603200, 1228651200]]], ['174-insectEPG2', [[1225219200, 1225226700]]], ['182-qtdbSel1005V', [[1226539200, 1226659200]]], ['153-PowerDemand2', [[1229826300, 1229934300]]], ['014-DISTORTEDECG3', [[1227919200, 1227949200]]], ['095-NOISECIMIS44AirTemperature4', [[1224483900, 1224498300]]], ['237-mit14157longtermecg', [[1249687200, 1249930200]]], ['238-mit14157longtermecg', [[1244599200, 1244653200]]], ['078-DISTORTEDresperation1', [[1255897200, 1255942800]]], ['156-TkeepFifthMARS', [[1224615600, 1224644700]]], ['116-CIMIS44AirTemperature4', [[1224483900, 1224498300]]], ['011-DISTORTEDECG1', [[1226359200, 1226449200]]], ['007-DISTORTEDCIMIS44AirTemperature3', [[1224775200, 1224782400]]], ['169-gait3', [[1240789200, 1240969200]]], ['218-STAFFIIIDatabase', [[1286035200, 1286230200]]], ['054-DISTORTEDWalkingAceleration5', [[1224595200, 1224612900]]], ['209-Fantasia', [[1230910200, 1231000200]]], ['046-DISTORTEDPowerDemand3', [[1229840700, 1229862300]]], ['025-DISTORTEDInternalBleeding14', [[1224501300, 1224509400]]], ['094-NOISEBIDMC1', [[1224439200, 1224499200]]], ['142-InternalBleeding6', [[1223861400, 1223907900]]], ['159-TkeepSecondMARS', [[1225618200, 1225621200]]], ['206-CHARISten', [[1231543200, 1231561200]]], ['138-InternalBleeding19', [[1224075300, 1224078300]]], ['123-ECG4', [[1227859200, 1227949200]]], ['135-InternalBleeding16', [[1224075300, 1224078900]]], ['192-s20101mML2', [[1233551400, 1233581400]]], ['185-resperation11', [[1256059200, 1256059500]]], ['115-CIMIS44AirTemperature3', [[1224775200, 1224782400]]], ['140-InternalBleeding4', [[1224221700, 1224329100]]], ['101-NOISELab2Cmac011215EPG4', [[1228036200, 1228075200]]], ['080-DISTORTEDresperation2', [[1273294200, 1273294500]]], ['119-ECG1', [[1226359200, 1226449200]]], ['200-tiltAPB4', [[1243217700, 1243218000]]], ['090-DISTORTEDtiltAPB2', [[1260066900, 1260314700]]], ['177-insectEPG5', [[1225369200, 1225369500]]], ['205-CHARISfive', [[1231517700, 1231544700]]], ['061-DISTORTEDgait3', [[1240789200, 1240969200]]], ['047-DISTORTEDPowerDemand4', [[1230020700, 1230042300]]], ['117-CIMIS44AirTemperature5', [[1224274800, 1224289200]]], ['145-Lab2Cmac011215EPG1', [[1227982200, 1227997200]]], ['019-DISTORTEDGP711MarkerLFM5z1', [[1224669600, 1224682800]]], ['131-GP711MarkerLFM5z5', [[1225402800, 1225434000]]], ['127-GP711MarkerLFM5z1', [[1224669600, 1224682800]]], ['021-DISTORTEDGP711MarkerLFM5z3', [[1224603600, 1224617100]]], ['168-gait2', [[1236769200, 1236859200]]], ['065-DISTORTEDinsectEPG1', [[1224919200, 1224928200]]], ['154-PowerDemand3', [[1229840700, 1229862300]]], ['072-DISTORTEDltstdbs30791ES', [[1238599200, 1238659200]]], ['121-ECG3', [[1227619200, 1227649200]]], ['076-DISTORTEDresperation10', [[1262029200, 1262383200]]], ['029-DISTORTEDInternalBleeding18', [[1224164700, 1224195300]]], ['248-weallwalk', [[1224229800, 1224231300]]], ['239-taichidbS0715Master', [[1400854200, 1400873400]]], ['024-DISTORTEDInternalBleeding10', [[1224177000, 1224186000]]], ['195-sel840mECG1', [[1238230200, 1238341200]]], ['188-resperation2', [[1273294200, 1273294500]]], ['112-BIDMC1', [[1224439200, 1224499200]]], ['122-ECG3', [[1227919200, 1227949200]]], ['100-NOISELab2Cmac011215EPG1', [[1227982200, 1227997200]]], ['215-STAFFIIIDatabase', [[1254835200, 1255030200]]], ['240-taichidbS0715Master', [[1488049200, 1488079200]]], ['018-DISTORTEDECG4', [[1227919200, 1227949200]]], ['197-tiltAPB1', [[1257104100, 1257124200]]], ['050-DISTORTEDTkeepForthMARS', [[1224615600, 1224644700]]], ['063-DISTORTEDgaitHunt2', [[1232179200, 1232374200]]], ['075-DISTORTEDqtdbSel100MLII', [[1226839200, 1226959200]]], ['132-InternalBleeding10', [[1224177000, 1224186000]]], ['109-1sddb40', [[1238419200, 1238605200]]], ['176-insectEPG4', [[1224771600, 1224786600]]], ['098-NOISEInternalBleeding16', [[1224075300, 1224078900]]]]
# save anomalies
with open(SAVE_TO + '/anomalies.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerows(rows)
new_labels = pd.read_csv(SAVE_TO + '/anomalies.csv', header=None, names=['signal', 'events'])
new_labels
signal | events | |
---|---|---|
0 | 183-qtdbSel100MLII | [[1226839200, 1226959200]] |
1 | 194-sddb49 | [[1243204200, 1243279200]] |
2 | 069-DISTORTEDinsectEPG5 | [[1225369200, 1225369500]] |
3 | 023-DISTORTEDGP711MarkerLFM5z5 | [[1225402800, 1225434000]] |
4 | 212-Italianpowerdemand | [[1231663200, 1231670400]] |
... | ... | ... |
245 | 075-DISTORTEDqtdbSel100MLII | [[1226839200, 1226959200]] |
246 | 132-InternalBleeding10 | [[1224177000, 1224186000]] |
247 | 109-1sddb40 | [[1238419200, 1238605200]] |
248 | 176-insectEPG4 | [[1224771600, 1224786600]] |
249 | 098-NOISEInternalBleeding16 | [[1224075300, 1224078900]] |
250 rows × 2 columns