Instead of having to have a labels.csv and a metadata.csv which are matched via another numeric index, we use the clip_id for the labels.csv files so we can match directly with the Mel spectrogram .npz files without the need of the additional *metadata.csv file.
(These files are used in Tutorial Part 1)
import pandas as pd
from os.path import join
# DEFINE PATHS and FILE PATTERNS
METADATA_PATH = 'metadata'
# INPUT
# here, %s will be replaced by 'instrumental', 'genres' or 'moods'
LABEL_FILE_PATTERN = join(METADATA_PATH, 'ismir2018_tut_part_1_%s_labels_subset_post.csv')
META_FILE_PATTERN = join(METADATA_PATH, 'ismir2018_tut_part_1_%s_metadata_subset.csv')
# OUTPUT
OUT_LABEL_FILE_PATTERN = join(METADATA_PATH, 'ismir2018_tut_part_1_%s_labels_subset_w_clipid.csv')
tasks = ['instrumental','genres','moods']
for task in tasks:
label_file = LABEL_FILE_PATTERN % task
meta_file = META_FILE_PATTERN % task
label_file_out = OUT_LABEL_FILE_PATTERN % task
labels = pd.read_csv(label_file, index_col=0)
metadata = pd.read_csv(meta_file, index_col=0)
print("Task:", task)
print("Labels shape:", labels.shape)
print("Metadata shape:", metadata.shape)
print("Assigning clip id from metadata to labels")
# replace the numeric index in labels by clip_id from metadata column 'clip_id'
clip_ids_sorted_by_index_of_labels = metadata.loc[labels.index]['clip_id']
labels.index = clip_ids_sorted_by_index_of_labels
#print(labels.head())
# write
labels.to_csv(label_file_out)
print("Created " + label_file_out)
Task: instrumental Labels shape: (1680, 1) Metadata shape: (1703, 10) Assigning clip id from metadata to labels Created metadata/ismir2018_tut_part_1_instrumental_labels_subset_w_clipid.csv Task: genres Labels shape: (1998, 8) Metadata shape: (1998, 10) Assigning clip id from metadata to labels Created metadata/ismir2018_tut_part_1_genres_labels_subset_w_clipid.csv Task: moods Labels shape: (719, 4) Metadata shape: (719, 10) Assigning clip id from metadata to labels Created metadata/ismir2018_tut_part_1_moods_labels_subset_w_clipid.csv
# just to check
labels.head()
loud | quiet | soft | strange | |
---|---|---|---|---|
clip_id | ||||
30064 | 0 | 0 | 0 | 1 |
5862 | 0 | 0 | 0 | 0 |
38362 | 1 | 0 | 0 | 0 |
44901 | 0 | 0 | 0 | 1 |
16246 | 0 | 0 | 1 | 1 |
# just to check
metadata.head()
clip_id | mp3_path | track_number | title | artist | album | url | segmentStart | segmentEnd | original_url | |
---|---|---|---|---|---|---|---|---|---|---|
13708 | 30064 | D:/Research/Data/MIR/MagnaTagATune/mp3_full/c/... | 7 | -A Lake- | LVX Nova | LVX Nova | http://www.magnatune.com/artists/albums/lvxnov... | 59 | 88 | http://he3.magnatune.com/all/07--A%20Lake--LVX... |
2697 | 5862 | D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/... | 2 | -BWV54 - II Recitative- | American Bach Soloists | J.S. Bach Solo Cantatas | http://www.magnatune.com/artists/albums/abs-so... | 30 | 59 | http://he3.magnatune.com/all/02--BWV54%20-%20I... |
17495 | 38362 | D:/Research/Data/MIR/MagnaTagATune/mp3_full/0/... | 9 | -Die Today- | Rocket City Riot | Last Of The Pleasure Seekers | http://www.magnatune.com/artists/albums/rocket... | 88 | 117 | http://he3.magnatune.com/all/09--Die%20Today--... |
20431 | 44901 | D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/... | 11 | -In Gaway- | The Headroom Project | Jetuton Andawai | http://www.magnatune.com/artists/albums/headro... | 30 | 59 | http://he3.magnatune.com/all/11--In%20Gaway--T... |
7423 | 16246 | D:/Research/Data/MIR/MagnaTagATune/mp3_full/f/... | 4 | -Industrial Blues- | The Headroom Project | Jetuton Andawai | http://www.magnatune.com/artists/albums/headro... | 30 | 59 | http://he3.magnatune.com/all/04--Industrial%20... |