まずは、ライブラリをインポートしデータなどを読み込む。この部分はlecture.htmlと同一である。
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
import time
cmap='tab10' #グラフをプロットしたときのカラーコードの指定
def load_csv(csv):
xx=np.array(pd.read_csv(csv))
x_data=xx[:,1:].astype('float32') / 255
y_data=xx[:,0]
return x_data, y_data
x_train, y_train=load_csv('./mnist_train.csv')
x_test, y_test=load_csv('./mnist_test.csv')
n=int(x_train.shape[0]/10) #データの個数(x_train.shape[0])を10で割って整数にしている
x_train2=x_train[:n,]
y_train2=y_train[:n]
n=int(x_test.shape[0]/10)
x_test2=x_test[:n,]
y_test2=y_test[:n]
import keras
from keras.layers import Lambda, Input, Dense, Dropout
from keras.models import Model
from keras.losses import mse, binary_crossentropy
from keras.models import Sequential
from keras.layers import MaxPooling2D
from keras import layers
from keras.layers import Conv2D, Flatten
from keras.layers import Reshape, Conv2DTranspose
from keras import backend as K
pixel_size=28
以下の関数vae_cnnがCNNを変換に用いたVAEを実行する関数となる。使い方は、関数vae_mlpと同じである。
def vae_cnn(x_train, x_test, latent_dim = 2, epochs = 10, pixel_size=pixel_size):
x_train = x_train.reshape((x_train.shape[0],pixel_size,pixel_size,1))
x_test = x_test.reshape((x_test.shape[0],pixel_size,pixel_size,1))
# network parameters
input_shape = (pixel_size, pixel_size, 1)
batch_size = 128
kernel_size = 3
filters = 16
# VAE model = encoder + decoder
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = inputs
x = Conv2D(filters=32,
kernel_size=kernel_size,
activation='relu',
strides=2,
padding='same')(x)
x = Conv2D(filters=64,
kernel_size=kernel_size,
activation='relu',
strides=2,
padding='same')(x)
# shape info needed to build decoder model
shape = K.int_shape(x)
# generate latent vector Q(z|X)
x = Flatten()(x)
x = Dense(16, activation='relu')(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])
# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()
#plot_model(encoder, to_file='vae_cnn_encoder.png', show_shapes=True)
# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(latent_inputs)
x = Reshape((shape[1], shape[2], shape[3]))(x)
x = Conv2DTranspose(filters=64,
kernel_size=kernel_size,
activation='relu',
strides=2,
padding='same')(x)
x = Conv2DTranspose(filters=32,
kernel_size=kernel_size,
activation='relu',
strides=2,
padding='same')(x)
outputs = Conv2DTranspose(filters=1,
kernel_size=kernel_size,
activation='sigmoid',
padding='same',
name='decoder_output')(x)
# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()
#plot_model(decoder, to_file='vae_cnn_decoder.png', show_shapes=True)
# instantiate VAE model
outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae')
#models = (encoder, decoder)
#data = (x_test, y_test)
# VAE loss = mse_loss or xent_loss + kl_loss
reconstruction_loss = binary_crossentropy(K.flatten(inputs),
K.flatten(outputs))
reconstruction_loss *= pixel_size * pixel_size
kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
kl_loss = K.sum(kl_loss, axis=-1)
kl_loss *= -0.5
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='rmsprop')
vae.summary()
# train the autoencoder
result = vae.fit(x_train,
epochs=epochs,
batch_size=batch_size,
validation_data=(x_test, None))
return encoder, decoder, result
def sampling(args):
z_mean, z_log_var = args
batch = K.shape(z_mean)[0]
dim = K.int_shape(z_mean)[1]
epsilon = K.random_normal(shape=(batch, dim))
return z_mean + K.exp(0.5 * z_log_var) * epsilon
この関数を実行するには以下のようにする。下のセルでは、x_train2と削減したデータセットを入力しているが、全部のデータを使う場合はそれに対応した入力にすれば良い。
encoder, decoder, result= vae_cnn(x_train2, x_test2, latent_dim = 2, epochs = 30, pixel_size=pixel_size)
lecture.htmlのときと同様に、lossのプロットは以下のようにすると出来る。
plt.plot(result.history['loss'],label='loss',color='r')
plt.plot(result.history['val_loss'],label='val_loss',color='b')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()
次に、潜在空間を描くが、z_meanを求めるためには、入力の形式を28x28とcnnの入力に合わせる必要があるため、encoder.predictへの入力を以下のようにする)。
z_mean, _, _ = encoder.predict(x_train2.reshape((x_train2.shape[0],pixel_size,pixel_size,1)))
plt.figure(figsize=(6, 5))
plt.scatter(z_mean[:, 0], z_mean[:, 1], s=3, c=y_train2, cmap=cmap)
plt.colorbar()
さらに、潜在空間でのzの値を変化させたときに、どのような形が出力されるかを描くには、以下の関数を使えばよい。
def plot_latent(decoder):
n=30
figure = np.zeros((pixel_size * n, pixel_size * n))
grid_x = np.linspace(-4, 4, n)
grid_y = np.linspace(-4, 4, n)[::-1]
for i, yi in enumerate(grid_y):
for j, xi in enumerate(grid_x):
z_sample = np.array([[xi, yi]])
x_decoded = decoder.predict(z_sample)
digit = x_decoded[0].reshape(pixel_size, pixel_size)
figure[i * pixel_size: (i + 1) * pixel_size,
j * pixel_size: (j + 1) * pixel_size] = digit
plt.figure(figsize=(10, 10))
start_range = pixel_size // 2
end_range = (n - 1) * pixel_size + start_range + 1
pixel_range = np.arange(start_range, end_range, pixel_size)
sample_range_x = np.round(grid_x, 1)
sample_range_y = np.round(grid_y, 1)
plt.xticks(pixel_range, sample_range_x)
plt.yticks(pixel_range, sample_range_y)
plt.xlabel("z[0]")
plt.ylabel("z[1]")
plt.imshow(figure, cmap='Greys_r')
plt.show()
plot_latent(decoder)
畳み込みを用いないVAEと用いるVAEの差は、1/10に削減したMNISTデータではクリアに出ないかもしれない。削減前のデータで、loss_valの下がり方や、潜在空間の再構成の様相を比較してみよ。ちょっと時間がかかるかもしれないが。
以下に全データを用いた結果を示す。
t1 = time.time()
encoder, decoder, result= vae_cnn(x_train, x_test, latent_dim = 2, epochs = 30, pixel_size=pixel_size)
t2 = time.time()
print(f"経過時間:{t2-t1}")
plot_latent(decoder)
z_mean, _, _ = encoder.predict(x_train2.reshape((x_train2.shape[0],pixel_size,pixel_size,1)))
plt.figure(figsize=(6, 5))
plt.scatter(z_mean[:, 0], z_mean[:, 1], s=3, c=y_train2, cmap=cmap)
plt.colorbar()