#!/usr/bin/env python # coding: utf-8 YearPredictionMSD Предсказание года выпуска аудиотрека. Features extracted from the 'timbre' features from The Echo Nest API. Выборка состоит из 90 признаков и 515345 объектов. # In[31]: import numpy as np import pandas as pd from matplotlib import pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[34]: column_names = ['year', *('average' + str(i) for i in range(12)), *('covariance' + str(i) for i in range(78))] data = pd.read_csv("YearPredictionMSD.txt", sep = ",", header=None, names = column_names) # In[35]: data.head() You should respect the following train / test split: train: first 463,715 examples test: last 51,630 examples It avoids the 'producer effect' by making sure no song from a given artist ends up in both the train and test set. # In[36]: train = data.iloc[:463715, :] test = data.iloc[463715:, :] len(train), len(test) # In[37]: first_year, last_year = 1922, 2011 num_years = last_year-first_year+1 # In[38]: from torch import nn # In[112]: model = nn.Sequential() model.add_module('l1', nn.Linear(90, num_years)) #model.add_module('activ', nn.ReLU()) model.add_module('smax', nn.Softmax(0)) # In[ ]: # In[113]: import torch opt = torch.optim.Adam(model.parameters(), lr=1e-3) # In[44]: #check that answers dffer every time a = np.random.randint(0, len(train), 5) b = np.random.randint(0, len(train), 5) a, b # In[52]: X = train.iloc[:, 1:].values Y = train.iloc[:, 0].values # In[57]: list(Y) # In[60]: Y = list(Y) for idx in range(len(Y)): year=Y[idx] Y[idx] = [int(i+first_year == year) for i in range(num_years)] # In[74]: Y = torch.tensor(Y, dtype=torch.float32) # In[114]: history = [] batch_size = int(len(train)/50) for i in range(10):#around a thousand samples # sample batch_size random data ix = np.random.randint(0, len(train), batch_size) x_batch = torch.tensor(X[ix], dtype=torch.float32) y_batch = torch.tensor(Y[ix], dtype=torch.float32) # predict probabilities y_predicted = model(x_batch) #assert y_predicted.dim() == 1, "did you forget to select first column with [:, 0]" # compute loss, just like before loss = torch.mean( (y_predicted - y_batch)**2 ) loss.backward() # add new gradients opt.step() # change weights opt.zero_grad() # clear gradients history.append(loss.data.numpy()) if i % 1 == 0: print("step #%i | mean loss = %.3f" % (i, np.mean(history[-10:]))) # In[ ]: # In[ ]: