In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from __future__ import division, print_function, unicode_literals

# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

Let's first bjuild an RNN manually.

In [None]:
# manual rnn
reset_graph()

n_inputs = 3
n_neurons = 5

########################################
# reading the dataset
########################################
X0_batch = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 0, 1]]) # t = 0
X1_batch = np.array([[9, 8, 7], [0, 0, 0], [6, 5, 4], [3, 2, 1]]) # t = 1

########################################
# defining variables and placeholders
########################################
X0 = tf.placeholder(tf.float32, [None, n_inputs])
X1 = tf.placeholder(tf.float32, [None, n_inputs])

Wx = tf.Variable(tf.random_normal(shape=[n_inputs, n_neurons], dtype=tf.float32))
Wy = tf.Variable(tf.random_normal(shape=[n_neurons, n_neurons], dtype=tf.float32))
b = tf.Variable(tf.zeros([1, n_neurons], dtype=tf.float32))

########################################
# building the model
########################################
h0 = tf.tanh(tf.matmul(X0, Wx) + b)
h1 = tf.tanh(tf.matmul(h0, Wy) + tf.matmul(X1, Wx) + b)

########################################
# training the model
########################################
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    h0_val, h1_val = sess.run([h0, h1], feed_dict={X0: X0_batch, X1: X1_batch})
    print(h0_val)
    print(h1_val)

Now, we use the `dyanmic_rnn` to build the above RNN model.

In [None]:
# using dynamic_rnn
reset_graph()

n_inputs = 3
n_neurons = 5
n_steps = 2

########################################
# reading the dataset
########################################
X_batch = np.array([
        # t = 0      t = 1 
        [[0, 1, 2], [9, 8, 7]], # instance 1
        [[3, 4, 5], [0, 0, 0]], # instance 2
        [[6, 7, 8], [6, 5, 4]], # instance 3
        [[9, 0, 1], [3, 2, 1]], # instance 4
    ])

########################################
# defining variables and placeholders
########################################
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])

########################################
# building the model
########################################
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)

########################################
# training the model
########################################
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    outputs_val = outputs.eval(feed_dict={X: X_batch})
    states_val = states.eval(feed_dict={X: X_batch})
    print(outputs_val)
    print(states_val)

Let's train an RNN to classify MNIST images. We will treat each image as a sequence of 28 rows of 28 pixels each (since each MNIST image is 28Ã—28 pixels). Assume we will use cells of 100 recurrent neurons, plus a fully connected layer containing 10 neurons connected to the output of the last time step, followed by a softmax layer.

In [None]:
# multi-layer RNN network for mnist
reset_graph()

n_steps = 28
n_inputs = 28
n_neurons = 100
n_layers = 3
n_outputs = 10
learning_rate = 0.001
n_epochs = 20
batch_size = 50

########################################
# loading dataset
########################################
mnist = input_data.read_data_sets("/tmp/data/")
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels.astype("int")

########################################
# defining variables and placeholders
########################################
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

########################################
# building the model
########################################
with tf.name_scope("rnn"):
    layers = [tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu) for layer in range(n_layers)]
    multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)
    outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)    
    states_concat = tf.concat(axis=1, values=states)
    logits = tf.layers.dense(states_concat, n_outputs)

########################################
# defining the loss function
########################################
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy)
    
########################################
# training the model
########################################
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)

########################################
# defining the evaluation metrics
########################################
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

########################################
# executing the model
########################################
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            X_batch = X_batch.reshape((-1, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)

We build the above network again for the MNIST dataset, but this time we use LTSM instead of regular RNN.

In [None]:
# building an LSTM RNN
reset_graph()

n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10
n_layers = 3
learning_rate = 0.001
n_epochs = 10
batch_size = 150

########################################
# loading dataset
########################################
mnist = input_data.read_data_sets("/tmp/data/")
X_test = mnist.test.images.reshape((-1, n_steps, n_inputs))
y_test = mnist.test.labels.astype("int")

########################################
# defining variables and placeholders
########################################
X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.int32, [None])

########################################
# building the model
########################################
with tf.name_scope("rnn"):
    lstm_cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons) for layer in range(n_layers)]
    multi_cell = tf.contrib.rnn.MultiRNNCell(lstm_cells)
    outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)
    top_layer_h_state = states[-1][1]
    logits = tf.layers.dense(top_layer_h_state, n_outputs, name="softmax")

########################################
# defining the loss function
########################################
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
########################################
# training the model
########################################
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    training_op = optimizer.minimize(loss)
    
########################################
# defining the evaluation metrics
########################################
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
########################################
# executing the model
########################################
init = tf.global_variables_initializer()

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(mnist.train.num_examples // batch_size):
            X_batch, y_batch = mnist.train.next_batch(batch_size)
            X_batch = X_batch.reshape((batch_size, n_steps, n_inputs))
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
        print(epoch, "Train accuracy:", acc_train, "Test accuracy:", acc_test)