#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd from pathlib import Path import tensorflow as tf import keras # In[2]: data_path = Path("Data/") train_files = sorted(data_path.glob("train/chorale_*.csv")) valid_files = sorted(data_path.glob("valid/chorale_*.csv")) test_files = sorted(data_path.glob("test/chorale_*.csv")) def load_chorales(filepaths): return [pd.read_csv(filepath).values.tolist() for filepath in filepaths] train_chorales = load_chorales(train_files) valid_chorales = load_chorales(valid_files) test_chorales = load_chorales(test_files) # In[3]: notes = set() for chorales in (train_chorales, valid_chorales, test_chorales): for chorale in chorales: for chord in chorale: notes |= set(chord) n_notes = len(notes) min_note = min(notes - {0}) max_note = max(notes) assert min_note == 36 assert max_note == 81 # In[4]: def create_target(batch): X = batch[:, :-1] Y = batch[:, 1:] # predict next note in each arpegio, at each step return X, Y def preprocess(window): window = tf.where(window == 0, window, window - min_note + 1) # shift values return tf.reshape(window, [-1]) # convert to arpegio def bach_dataset(chorales, batch_size=32, shuffle_buffer_size=None, window_size=32, window_shift=16, cache=True): def batch_window(window): return window.batch(window_size + 1) def to_windows(chorale): dataset = tf.data.Dataset.from_tensor_slices(chorale) dataset = dataset.window(window_size + 1, window_shift, drop_remainder=True) return dataset.flat_map(batch_window) chorales = tf.ragged.constant(chorales, ragged_rank=1) dataset = tf.data.Dataset.from_tensor_slices(chorales) dataset = dataset.flat_map(to_windows).map(preprocess) if cache: dataset = dataset.cache() if shuffle_buffer_size: dataset = dataset.shuffle(shuffle_buffer_size) dataset = dataset.batch(batch_size) dataset = dataset.map(create_target) return dataset.prefetch(1) # In[5]: train_set = bach_dataset(train_chorales, shuffle_buffer_size=1000) valid_set = bach_dataset(valid_chorales) test_set = bach_dataset(test_chorales) # In[6]: model = keras.Sequential(name="Music_RNN", layers=[ keras.layers.Embedding(input_dim=n_notes, output_dim=5, input_shape=[None]), keras.layers.Conv1D(32, kernel_size=2, padding="causal", activation="relu"), keras.layers.BatchNormalization(), keras.layers.Conv1D(48, kernel_size=2, padding="causal", activation="relu", dilation_rate=2), keras.layers.BatchNormalization(), keras.layers.Conv1D(64, kernel_size=2, padding="causal", activation="relu", dilation_rate=4), keras.layers.BatchNormalization(), keras.layers.Conv1D(96, kernel_size=2, padding="causal", activation="relu", dilation_rate=8), keras.layers.BatchNormalization(), keras.layers.Conv1D(128, kernel_size=2, padding="causal", activation="relu", dilation_rate=16), keras.layers.BatchNormalization(), keras.layers.LSTM(256, return_sequences=True), keras.layers.Dense(n_notes, activation="softmax") ]) model.summary() # In[7]: optimizer = tf.keras.optimizers.Nadam(learning_rate=1e-3) model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"]) model.fit(train_set, epochs=20, validation_data=valid_set) # In[8]: model.evaluate(test_set) # In[9]: def generate_chorale(model, seed_chords, length, temperature=1): arpegio = preprocess(tf.constant(seed_chords, dtype=tf.int64)) arpegio = tf.reshape(arpegio, [1, -1]) for chord in range(length): for note in range(4): next_note_probas = model.predict(arpegio)[0, -1:] rescaled_logits = tf.math.log(next_note_probas) / temperature next_note = tf.random.categorical(rescaled_logits, num_samples=1) arpegio = tf.concat([arpegio, next_note], axis=1) arpegio = tf.where(arpegio == 0, arpegio, arpegio + min_note - 1) return tf.reshape(arpegio, shape=[-1, 4]) # In[10]: import numpy as np from IPython.display import Audio def notes_to_frequencies(notes): # Frequency doubles when you go up one octave; there are 12 semi-tones # per octave; Note A on octave 4 is 440 Hz, and it is note number 69. return 2 ** ((np.array(notes) - 69) / 12) * 440 def frequencies_to_samples(frequencies, tempo, sample_rate): note_duration = 60 / tempo # the tempo is measured in beats per minutes # To reduce click sound at every beat, we round the frequencies to try to # get the samples close to zero at the end of each note. frequencies = (note_duration * frequencies).round() / note_duration n_samples = int(note_duration * sample_rate) time = np.linspace(0, note_duration, n_samples) sine_waves = np.sin(2 * np.pi * frequencies.reshape(-1, 1) * time) # Removing all notes with frequencies ≤ 9 Hz (includes note 0 = silence) sine_waves *= (frequencies > 9.).reshape(-1, 1) return sine_waves.reshape(-1) def chords_to_samples(chords, tempo, sample_rate): freqs = notes_to_frequencies(chords) freqs = np.r_[freqs, freqs[-1:]] # make last note a bit longer merged = np.mean([frequencies_to_samples(melody, tempo, sample_rate) for melody in freqs.T], axis=0) n_fade_out_samples = sample_rate * 60 // tempo # fade out last note fade_out = np.linspace(1., 0., n_fade_out_samples)**2 merged[-n_fade_out_samples:] *= fade_out return merged def play_chords(chords, tempo=160, amplitude=0.1, sample_rate=44100, filepath=None): samples = amplitude * chords_to_samples(chords, tempo, sample_rate) if filepath: from scipy.io import wavfile samples = (2**15 * samples).astype(np.int16) wavfile.write(filepath, sample_rate, samples) return display(Audio(filepath)) else: return display(Audio(samples, rate=sample_rate)) # In[11]: seed_chords = test_chorales[3][:8] new_chorale_v2_cold = generate_chorale(model, seed_chords, 56, temperature=1.5) play_chords(new_chorale_v2_cold, filepath="bach.wav")