In [5]:
import sys
import numpy as np
import numpy.random as rnd
import tensorflow as tf
from functools import partial
from sklearn.preprocessing import StandardScaler
from tensorflow.examples.tutorials.mnist import input_data
from __future__ import division, print_function, unicode_literals

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

Building PCA with a linear autoencoder.

In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

reset_graph()

n_inputs = 3
n_hidden = 2  # codings
n_outputs = n_inputs
learning_rate = 0.01
n_epochs = 1000

########################################
# building a 3D dataset and normalized them
########################################
rnd.seed(4)
m = 200
w1, w2 = 0.1, 0.3
noise = 0.1

angles = rnd.rand(m) * 3 * np.pi / 2 - 0.5
data = np.empty((m, 3))
data[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * rnd.randn(m) / 2
data[:, 1] = np.sin(angles) * 0.7 + noise * rnd.randn(m) / 2
data[:, 2] = data[:, 0] * w1 + data[:, 1] * w2 + noise * rnd.randn(m)

scaler = StandardScaler()
X_train = scaler.fit_transform(data[:100])
X_test = scaler.transform(data[100:])

########################################
# defining placeholders
########################################
X = tf.placeholder(tf.float32, shape=[None, n_inputs])

########################################
# building the model
########################################
with tf.name_scope("autoenc"):
    hidden = tf.layers.dense(X, n_hidden) # the coding layer
    outputs = tf.layers.dense(hidden, n_outputs)

########################################
# defining the cost function
########################################
with tf.name_scope("loss"):
    loss = tf.reduce_mean(tf.square(outputs - X)) # MSE
    
########################################
# training the model
########################################
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)
    
########################################
# executing the model
########################################
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        training_op.run(feed_dict={X: X_train})
        codings_val = hidden.eval(feed_dict={X: X_test})
    
########################################
# plotting the result
########################################
fig = plt.figure(figsize=(4,3))
plt.plot(codings_val[:,0], codings_val[:, 1], "b.")
plt.xlabel("$z_1$", fontsize=18)
plt.ylabel("$z_2$", fontsize=18, rotation=0)

plt.show()

Building an stacked autoencoder for mnist, using He initialization, the ELU activation function, and l2 regularization.

In [2]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150  # codings
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
l2_reg = 0.0001
n_epochs = 100
batch_size = 150

########################################
# loading dataset
########################################
mnist = input_data.read_data_sets("/tmp/data/")
X_test = mnist.test.images

########################################
# defining placeholders
########################################
X = tf.placeholder(tf.float32, shape=[None, n_inputs])

########################################
# building the model
########################################
with tf.name_scope("autoenc"):
    he_init = tf.contrib.layers.variance_scaling_initializer() # He initialization
    l2_regularizer = tf.contrib.layers.l2_regularizer(l2_reg)
    my_dense_layer = partial(tf.layers.dense, activation=tf.nn.elu, kernel_initializer=he_init,
                             kernel_regularizer=l2_regularizer)
    hidden1 = my_dense_layer(X, n_hidden1)
    hidden2 = my_dense_layer(hidden1, n_hidden2)
    hidden3 = my_dense_layer(hidden2, n_hidden3)
    outputs = my_dense_layer(hidden3, n_outputs, activation=None)

########################################
# defining the cost function
########################################
with tf.name_scope("loss"):
    reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add_n([reconstruction_loss] + reg_losses)
    
########################################
# training the model
########################################
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

########################################
# executing the model
########################################
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch})
        loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
        print(epoch, "MSE:", loss_train)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
0 MSE: 0.020708656
1 MSE: 0.011428968
2 MSE: 0.010215068
3 MSE: 0.009897818
4 MSE: 0.010377758


Now, we impement the above network, but this time we use tying weights. In the following code, `weight3` and `weights4` are not variables, and they are respectively the transpose of `weights2` and `weights1`, and since they are not variables, it's no use regularizing them: we only regularize `weights1` and `weights2`.

In [6]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150  # codings
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
l2_reg = 0.0005
n_epochs = 100
batch_size = 150

########################################
# loading dataset
########################################
mnist = input_data.read_data_sets("/tmp/data/")
X_test = mnist.test.images

########################################
# defining variables and placeholders
########################################
initializer = tf.contrib.layers.variance_scaling_initializer()

X = tf.placeholder(tf.float32, shape=[None, n_inputs])

weights1_init = initializer([n_inputs, n_hidden1])
weights2_init = initializer([n_hidden1, n_hidden2])

weights1 = tf.Variable(weights1_init, dtype=tf.float32, name="weights1")
weights2 = tf.Variable(weights2_init, dtype=tf.float32, name="weights2")
weights3 = tf.transpose(weights2, name="weights3")  # tied weights
weights4 = tf.transpose(weights1, name="weights4")  # tied weights

biases1 = tf.Variable(tf.zeros(n_hidden1), name="biases1")
biases2 = tf.Variable(tf.zeros(n_hidden2), name="biases2")
biases3 = tf.Variable(tf.zeros(n_hidden3), name="biases3")
biases4 = tf.Variable(tf.zeros(n_outputs), name="biases4")

########################################
# building the model
########################################
with tf.name_scope("autoenc"):
    activation = tf.nn.elu
    hidden1 = activation(tf.matmul(X, weights1) + biases1)
    hidden2 = activation(tf.matmul(hidden1, weights2) + biases2)
    hidden3 = activation(tf.matmul(hidden2, weights3) + biases3)
    outputs = tf.matmul(hidden3, weights4) + biases4

########################################
# defining the loss function
########################################
with tf.name_scope("loss"):
    regularizer = tf.contrib.layers.l2_regularizer(l2_reg)
    reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
    reg_loss = regularizer(weights1) + regularizer(weights2)
    loss = reconstruction_loss + reg_loss
    
########################################
# training the model
########################################
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

########################################
# executing the model
########################################
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch})
        loss_train = reconstruction_loss.eval(feed_dict={X: X_batch})
        print(epoch, "MSE:", loss_train)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
0 MSE: 0.014426452
1 MSE: 0.0155632505
2 MSE: 0.016616063
3 MSE: 0.016558
4 MSE: 0.017175082


Below, we implement a stacked denoising autoencoder, using Gaussian noise.

In [4]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 150  # codings
n_hidden3 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.01
noise_level = 1.0
n_epochs = 10
batch_size = 150

########################################
# loading dataset
########################################
mnist = input_data.read_data_sets("/tmp/data/")
X_test = mnist.test.images

########################################
# defining variables and placeholders
########################################
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
X_noisy = X + noise_level * tf.random_normal(tf.shape(X))

########################################
# building the model
########################################
with tf.name_scope("autoenc"):
    hidden1 = tf.layers.dense(X_noisy, n_hidden1, activation=tf.nn.relu, name="hidden1")
    hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
    hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3")
    outputs = tf.layers.dense(hidden3, n_outputs, name="outputs")

########################################
# defining the loss function
########################################
with tf.name_scope("loss"):
    loss = tf.reduce_mean(tf.square(outputs - X)) # MSE
    
########################################
# training the model
########################################
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

########################################
# executing the model
########################################
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        n_batches = mnist.train.num_examples // batch_size
        for iteration in range(n_batches):
            print("\r{}%".format(100 * iteration // n_batches), end="")
            sys.stdout.flush()
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch})
        loss_train = loss.eval(feed_dict={X: X_batch})
        print("\r{}".format(epoch), "Train MSE:", loss_train)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
0%

NameError: name 'sys' is not defined

Finally, we imeplement a variational autoencoder.

In [None]:
reset_graph()

n_inputs = 28 * 28
n_hidden1 = 500
n_hidden2 = 500
n_hidden3 = 20  # codings
n_hidden4 = n_hidden2
n_hidden5 = n_hidden1
n_outputs = n_inputs
learning_rate = 0.001
n_epochs = 50
batch_size = 150

########################################
# loading dataset
########################################
mnist = input_data.read_data_sets("/tmp/data/")
X_test = mnist.test.images

########################################
# defining variables and placeholders
########################################
X = tf.placeholder(tf.float32, [None, n_inputs])

########################################
# building the model
########################################
with tf.name_scope("autoenc"):
    initializer = tf.contrib.layers.variance_scaling_initializer()

    my_dense_layer = partial(tf.layers.dense, activation=tf.nn.elu, kernel_initializer=initializer)
    
    hidden1 = my_dense_layer(X, n_hidden1)
    hidden2 = my_dense_layer(hidden1, n_hidden2)
    hidden3_mean = my_dense_layer(hidden2, n_hidden3, activation=None)
    hidden3_sigma = my_dense_layer(hidden2, n_hidden3, activation=None)
    noise = tf.random_normal(tf.shape(hidden3_sigma), dtype=tf.float32)
    hidden3 = hidden3_mean + hidden3_sigma * noise
    hidden4 = my_dense_layer(hidden3, n_hidden4)
    hidden5 = my_dense_layer(hidden4, n_hidden5)
    logits = my_dense_layer(hidden5, n_outputs, activation=None)
    outputs = tf.sigmoid(logits)

########################################
# defining the loss function
########################################
with tf.name_scope("loss"):
    eps = 1e-10 # smoothing term to avoid computing log(0) which is NaN
    latent_loss = 0.5 * tf.reduce_sum(tf.square(hidden3_sigma) + tf.square(hidden3_mean) 
                                      - 1 - tf.log(eps + tf.square(hidden3_sigma)))
    xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=X, logits=logits)
    reconstruction_loss = tf.reduce_sum(xentropy)
    loss = reconstruction_loss + latent_loss
    
########################################
# training the model
########################################
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

########################################
# executing the model
########################################
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        n_batches = mnist.train.num_examples // batch_size
        for iteration in range(n_batches):
            print("\r{}%".format(100 * iteration // n_batches), end="")
            sys.stdout.flush()
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            sess.run(training_op, feed_dict={X: X_batch})
        loss_val, reconstruction_loss_val, latent_loss_val = sess.run([loss, reconstruction_loss, latent_loss], feed_dict={X: X_batch})
        print("\r{}".format(epoch), "Train total loss:", loss_val, "\tReconstruction loss:", reconstruction_loss_val, "\tLatent loss:", latent_loss_val)
