This commit is contained in:
Harry Stuart 2020-01-01 15:13:30 +11:00
parent 58ee46434f
commit 69d965f1d1
68 changed files with 319 additions and 1 deletions

1
.gitignore vendored
View File

@ -1 +0,0 @@

3
.vs/ProjectSettings.json Normal file
View File

@ -0,0 +1,3 @@
{
"CurrentProjectSetting": null
}

BIN
.vs/slnx.sqlite Normal file

Binary file not shown.

2
README.md Normal file
View File

@ -0,0 +1,2 @@
# tfworldhackathon
GitHub repo for the Tensorflow World hackathon

BIN
models/128.h5 Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

1
models/js/model.json Normal file

File diff suppressed because one or more lines are too long

BIN
output/piano/0-0.wav Normal file

Binary file not shown.

BIN
output/piano/100-0.wav Normal file

Binary file not shown.

BIN
output/piano/104-0.wav Normal file

Binary file not shown.

BIN
output/piano/108-0.wav Normal file

Binary file not shown.

BIN
output/piano/112-0.wav Normal file

Binary file not shown.

BIN
output/piano/12-0.wav Normal file

Binary file not shown.

BIN
output/piano/16-0.wav Normal file

Binary file not shown.

BIN
output/piano/20-0.wav Normal file

Binary file not shown.

BIN
output/piano/24-0.wav Normal file

Binary file not shown.

BIN
output/piano/28-0.wav Normal file

Binary file not shown.

BIN
output/piano/32-0.wav Normal file

Binary file not shown.

BIN
output/piano/36-0.wav Normal file

Binary file not shown.

BIN
output/piano/4-0.wav Normal file

Binary file not shown.

BIN
output/piano/40-0.wav Normal file

Binary file not shown.

BIN
output/piano/44-0.wav Normal file

Binary file not shown.

BIN
output/piano/48-0.wav Normal file

Binary file not shown.

BIN
output/piano/52-0.wav Normal file

Binary file not shown.

BIN
output/piano/56-0.wav Normal file

Binary file not shown.

BIN
output/piano/60-0.wav Normal file

Binary file not shown.

BIN
output/piano/64-0.wav Normal file

Binary file not shown.

BIN
output/piano/68-0.wav Normal file

Binary file not shown.

BIN
output/piano/72-0.wav Normal file

Binary file not shown.

BIN
output/piano/76-0.wav Normal file

Binary file not shown.

BIN
output/piano/8-0.wav Normal file

Binary file not shown.

BIN
output/piano/80-0.wav Normal file

Binary file not shown.

BIN
output/piano/84-0.wav Normal file

Binary file not shown.

BIN
output/piano/88-0.wav Normal file

Binary file not shown.

BIN
output/piano/92-0.wav Normal file

Binary file not shown.

BIN
output/piano/96-0.wav Normal file

Binary file not shown.

BIN
output/piano/real-0.wav Normal file

Binary file not shown.

BIN
output/piano/real-1.wav Normal file

Binary file not shown.

BIN
output/piano/real-2.wav Normal file

Binary file not shown.

BIN
output/piano/real-3.wav Normal file

Binary file not shown.

BIN
output/piano/real-4.wav Normal file

Binary file not shown.

BIN
output/piano/real-5.wav Normal file

Binary file not shown.

BIN
output/piano/real-6.wav Normal file

Binary file not shown.

BIN
output/piano/real-7.wav Normal file

Binary file not shown.

BIN
output/piano/real-8.wav Normal file

Binary file not shown.

BIN
output/piano/real-9.wav Normal file

Binary file not shown.

124
scripts/GANModels.py Normal file
View File

@ -0,0 +1,124 @@
# Import modules
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Lambda, Dense, LSTM, Activation, Input, Bidirectional, Dropout
from tensorflow.keras.layers import Reshape, Conv2DTranspose, TimeDistributed, Conv1D, LeakyReLU, Layer, ReLU
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K
def Generator(d, num_samples, c=16):
input_layer = Input(shape=(100,))
# output shape = (None, 16, 16d)
dense_layer0 = Dense(16*c*d)(input_layer)
reshape_layer0 = Reshape((c, c*d))(dense_layer0)
relu_layer0 = ReLU()(reshape_layer0)
# Upsampling
# output shape = (None, 64, 8d)
c //= 2
expanded_layer0 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer0)
conv1d_t_layer0 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer0)
slice_layer0 = Lambda(lambda x: x[:, 0])(conv1d_t_layer0)
relu_layer1 = ReLU()(slice_layer0)
# output shape = (None, 256, 4d)
c //= 2
expanded_layer1 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer1)
conv1d_t_layer1 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer1)
slice_layer1 = Lambda(lambda x: x[:, 0])(conv1d_t_layer1)
relu_layer2 = ReLU()(slice_layer1)
# output shape = (None, 1024, 2d)
c //= 2
expanded_layer2 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer2)
conv1d_t_layer2 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer2)
slice_layer2 = Lambda(lambda x: x[:, 0])(conv1d_t_layer2)
relu_layer3 = ReLU()(slice_layer2)
# output shape = (None, 4096, d)
c //= 2
expanded_layer3 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer3)
conv1d_t_layer3 = Conv2DTranspose(c*d, (1, 25), strides=(1, 4), padding='same')(expanded_layer3)
slice_layer3 = Lambda(lambda x: x[:, 0])(conv1d_t_layer3)
relu_layer4 = ReLU()(slice_layer3)
# output shape = (None, 16384, 1)
expanded_layer4 = Lambda(lambda x: K.expand_dims(x, axis=1))(relu_layer4)
conv1d_t_layer4 = Conv2DTranspose(1, (1, 25), strides=(1, 4), padding='same')(expanded_layer4)
slice_layer4 = Lambda(lambda x: x[:, 0])(conv1d_t_layer4)
#### The number of transposed convolution operations should be modified
#### in accordance with num_samples. This current architecture expects
#### num_samples == 16384
# Squeeze values between (-1, 1)
tanh_layer0 = Activation('tanh')(slice_layer4)
model = Model(inputs=input_layer, outputs=tanh_layer0)
return model
# Makes critic invariant to upsampling artifacts of generator to avoid the critic learning to
# easily identify generated audio from said artifacts
def _apply_phaseshuffle(x, rad=2, pad_type='reflect'):
b, x_len, nch = x.get_shape().as_list()
phase = tf.random.uniform([], minval=-rad, maxval=rad + 1, dtype=tf.int32)
pad_l = tf.maximum(phase, 0)
pad_r = tf.maximum(-phase, 0)
phase_start = pad_r
x = tf.pad(x, [[0, 0], [pad_l, pad_r], [0, 0]], mode=pad_type)
x = x[:, phase_start:phase_start+x_len]
x.set_shape([b, x_len, nch])
return x
def Critic(d, num_samples, c=1):
input_layer = Input(shape=(num_samples, 1))
# Downsampling
# output shape = (None, 4096, d)
conv1d_layer0 = Conv1D(c*d, 25, strides=4, padding='same')(input_layer)
LReLU_layer0 = LeakyReLU(alpha=0.2)(conv1d_layer0)
phaseshuffle_layer0 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer0)
# output shape = (None, 1024, 2d)
c *= 2
conv1d_layer1 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer0)
LReLU_layer1 = LeakyReLU(alpha=0.2)(conv1d_layer1)
phaseshuffle_layer1 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer1)
# output shape = (None, 256, 4d)
c *= 2
conv1d_layer2 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer1)
LReLU_layer2 = LeakyReLU(alpha=0.2)(conv1d_layer2)
phaseshuffle_layer2 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer2)
# output shape = (None, 64, 8d)
c *= 2
conv1d_layer3 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer2)
LReLU_layer3 = LeakyReLU(alpha=0.2)(conv1d_layer3)
phaseshuffle_layer3 = Lambda(lambda x: _apply_phaseshuffle(x))(LReLU_layer3)
# output shape = (None, 16, 16d)
c *= 2
conv1d_layer4 = Conv1D(c*d, 25, strides=4, padding='same')(phaseshuffle_layer3)
LReLU_layer4 = LeakyReLU(alpha=0.2)(conv1d_layer4)
#### The number of convolution operations should be modified
#### in accordance with num_samples. This current architecture expects
#### num_samples == 16384
# output shape = (None, 256d)
reshape_layer0 = Reshape((16*c*d,))(LReLU_layer4)#
# Output a critic score
dense_layer1 = Dense(1)(reshape_layer0)
model = Model(inputs=input_layer, outputs=dense_layer1)
return model

189
scripts/WGAN-GP.py Normal file
View File

@ -0,0 +1,189 @@
# Import modules
import tensorflow as tf
from tensorflow.keras.optimizers import Adam, RMSprop
import numpy as np
import librosa
import random
import os
import sys
import time
import GANModels
# Setup GPU settings
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)
# Define hyperparameters
MODEL_DIMS = 64
NUM_SAMPLES = 16384
D_UPDATES_PER_G_UPDATE = 5
GRADIENT_PENALTY_WEIGHT = 10.0
NOISE_LEN = 100
EPOCHS = 128
EPOCHS_PER_SAMPLE = 2
BATCH_SIZE = 16
Fs = 16000
# Define class that contains GAN infrastructure
class GAN:
def __init__(self, model_dims=MODEL_DIMS, num_samples=NUM_SAMPLES,
gradient_penalty_weight=GRADIENT_PENALTY_WEIGHT,
noise_len=NOISE_LEN, batch_size=BATCH_SIZE, sr=Fs):
self.model_dims = model_dims
self.num_samples = num_samples
self.noise_dims = (noise_len,)
self.batch_size = batch_size
self.G = GANModels.Generator(self.model_dims, num_samples)
print(self.G.summary())
self.D = GANModels.Critic(self.model_dims, num_samples)
print(self.D.summary())
self.G_optimizer = Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9)
self.D_optimizer = Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9)
self.gradient_penalty_weight = gradient_penalty_weight
self.sr = sr
# Loss function for critic
def _d_loss_fn(self, r_logit, f_logit):
r_loss = - tf.reduce_mean(r_logit)
f_loss = tf.reduce_mean(f_logit)
return r_loss, f_loss
# Loss function for generator
def _g_loss_fn(self, f_logit):
f_loss = - tf.reduce_mean(f_logit)
return f_loss
# Calculates gradient penalty
def _gradient_penalty(self, real, fake):
def _interpolate(a, b):
shape = [tf.shape(a)[0]] + [1] * (a.shape.ndims - 1)
alpha = tf.random.uniform(shape=shape, minval=0., maxval=1.)
inter = a + alpha * (b - a)
inter.set_shape(a.shape)
return inter
x = _interpolate(real, fake)
with tf.GradientTape() as t:
t.watch(x)
pred = self.D(x, training=True)
grad = t.gradient(pred, x)
norm = tf.norm(tf.reshape(grad, [tf.shape(grad)[0], -1]), axis=1)
gp = tf.reduce_mean((norm - 1.)**2)
return gp
# Trains generator by keeping critic constant
@tf.function
def train_G(self):
with tf.GradientTape() as t:
z = tf.random.normal(shape=(self.batch_size,) + self.noise_dims)
x_fake = self.G(z, training=True)
x_fake_d_logit = self.D(x_fake, training=True)
G_loss = self._g_loss_fn(x_fake_d_logit)
G_grad = t.gradient(G_loss, self.G.trainable_variables)
self.G_optimizer.apply_gradients(zip(G_grad, self.G.trainable_variables))
return {'g_loss': G_loss}
# Trains critic by keeping generator constant
@tf.function
def train_D(self, x_real):
with tf.GradientTape() as t:
z = tf.random.normal(shape=(x_real.shape[0],) + self.noise_dims)
x_fake = self.G(z, training=True)
x_real_d_logit = self.D(x_real, training=True)
x_fake_d_logit = self.D(x_fake, training=True)
x_real_d_loss, x_fake_d_loss = self._d_loss_fn(x_real_d_logit, x_fake_d_logit)
gp = self._gradient_penalty(x_real, x_fake)
D_loss = (x_real_d_loss + x_fake_d_loss) + gp * self.gradient_penalty_weight
D_grad = t.gradient(D_loss, self.D.trainable_variables)
self.D_optimizer.apply_gradients(zip(D_grad, self.D.trainable_variables))
return {'d_loss': x_real_d_loss + x_fake_d_loss, 'gp': gp}
# Creates music samples and saves current generator model
def sample(self, epoch, num_samples=10):
self.G.save(f"models/{epoch}.h5")
z = tf.random.normal(shape=(num_samples,) + self.noise_dims)
result = self.G(z, training=False)
for i in range(num_samples):
audio = result[i, :, :]
audio = np.reshape(audio, (self.num_samples,))
librosa.output.write_wav(f"output/piano/{epoch}-{i}.wav", audio, sr=self.sr)
###############################################################################################
# Instantiate model
gan = GAN()
# Create training data
X_train = []
for file in os.listdir(r"D:\ML_Datasets\mancini_piano\piano\train"):
with open(r"D:\ML_Datasets\mancini_piano\piano\train" + fr"\{file}", "rb") as f:
samples, _ = librosa.load(f, Fs)
# Pad short audio files to NUM_SAMPLES duration
if len(samples) < NUM_SAMPLES:
audio = np.array([np.array([sample]) for sample in samples])
padding = np.zeros(shape=(NUM_SAMPLES - len(samples), 1), dtype='float32')
X_train.append(np.append(audio, padding, axis=0))
# Create slices of length NUM_SAMPLES from long audio
else:
p = len(samples) // (NUM_SAMPLES)
for i in range(p - 1):
sample = np.expand_dims(samples[i*NUM_SAMPLES:(i+1)*NUM_SAMPLES], axis=1)
X_train.append(sample)
print(f"X_train shape = {(len(X_train),) + X_train[0].shape}")
# Save some random training data slices and create baseline generated data for comparison
for i in range(10):
librosa.output.write_wav(f"output/piano/real-{i}.wav",
X_train[random.randint(0, len(X_train) - 1)], sr=Fs)
gan.sample("fake")
train_summary_writer = tf.summary.create_file_writer("logs/train")
# Train GAN
with train_summary_writer.as_default():
steps_per_epoch = len(X_train) // BATCH_SIZE
for e in range(EPOCHS):
for i in range(steps_per_epoch):
D_loss_sum = 0
# Update dcritic a set number of times for each update of the generator
for n in range(D_UPDATES_PER_G_UPDATE):
gan.D.reset_states()
D_loss_dict = gan.train_D(np.array(random.sample(X_train, BATCH_SIZE)))
D_loss_sum += D_loss_dict['d_loss']
# Calculate average loss of critic for current step
D_loss = D_loss_sum / D_UPDATES_PER_G_UPDATE
G_loss_dict = gan.train_G()
G_loss = G_loss_dict['g_loss']
# Write logs
tf.summary.scalar('d_loss', D_loss, step=(e*steps_per_epoch)+i)
tf.summary.scalar('g_loss', G_loss, step=(e*steps_per_epoch)+i)
print(f"step {(e*steps_per_epoch)+i}: d_loss = {D_loss} g_loss = {G_loss}")
# Periodically sample generator
if e % EPOCHS_PER_SAMPLE == 0:
gan.sample(e)

Binary file not shown.