Feedforward Neural Network¶

Fan Gong 03/12/2019

This notebook tries to construct a simple feedforward neural network from scratch by using Tensorflow

1. Load Data and Library¶

# Import Libraries
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data

# Load Data
mnist = input_data.read_data_sets('../data/mnist')

Extracting ../data/mnist/train-images-idx3-ubyte.gz
Extracting ../data/mnist/train-labels-idx1-ubyte.gz
Extracting ../data/mnist/t10k-images-idx3-ubyte.gz
Extracting ../data/mnist/t10k-labels-idx1-ubyte.gz

mnist.train.images.shape

(55000, 784)

mnist.train.labels

array([7, 3, 7, ..., 1, 8, 5], dtype=uint8)

plt.imshow(mnist.train.images[2].reshape(28,28),cmap = 'gray')
plt.show()

2. NN Structure¶

Here I am going to construct a three-layer neural networks with first layer 256 nodes, second layer 128 nodes and output layer 10 nodes (10 classes)

# Hyperparameter
learning_rate = 0.001
training_epochs = 10
batch_size = 3000
display_step = 1

# Network Parameters
n_hidden_1 = 256 # first layer number of neurons 
n_hidden_2 = 128 # second layer number of neurons 
n_input = 784 
n_classes = 10

# tf Graph input
X = tf.placeholder('float', [None, n_input])
Y = tf.placeholder('float', [None, n_classes])

# Layers weights
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1,n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}

# Layers Bias
bias = {
    'h1': tf.Variable(tf.random_normal([n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out' : tf.Variable(tf.random_normal([n_classes]))
}

# Create Model
def feedforward_nn(x):
    layer_1 = tf.nn.tanh(tf.matmul(x, weights['h1'])+bias['h1'], name = 'layer_1')
    layer_2 = tf.nn.tanh(tf.matmul(layer_1, weights['h2'])+bias['h2'], name = 'layer_2')
    layer_out = tf.matmul(layer_2, weights['out'])+bias['out']
    
    return layer_out

logits = feedforward_nn(X)
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    y_one_hot_train = tf.one_hot(mnist.train.labels, depth = 10)
    y_one_hot_train = y_one_hot_train.eval(session=sess)
    y_one_hot_test = tf.one_hot(mnist.test.labels, depth = 10)
    y_one_hot_test = y_one_hot_test.eval(session=sess)
    sess.run(init)
    
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples/batch_size)
        # loop over all batchs
        for i in range(total_batch):
            batch_x = mnist.train.images[:(i+1)*batch_size,:]
            batch_y = y_one_hot_train[:(i+1)*batch_size,:]
            # we need to run train_op but don't need its output, only need loss
            _, c = sess.run([train_op,loss_op], feed_dict = {X: batch_x, 
                                                             Y: batch_y})
            
            #compute average loss
            avg_cost += c/total_batch
            
        #Display logs per epoch step
        if epoch % display_step == 0:
            print('Epoch: ' + str(epoch + 1) + '; cost={}'.format(avg_cost))
    
    print('Optimization Finished')
    
    # Test the Model
    pred = tf.nn.softmax(logits)
    correct_prediction = tf.equal(tf.argmax(pred,1), tf.argmax(Y,1))
    
    # Calculate Accuracy 
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
    print("Accuracy:", accuracy.eval({X: mnist.test.images, Y: y_one_hot_test}))

Epoch: 1; cost=13.611111852857803
Epoch: 2; cost=7.916272216373019
Epoch: 3; cost=5.179198768403794
Epoch: 4; cost=3.8036623928282003
Epoch: 5; cost=2.975706868701511
Epoch: 6; cost=2.4367065959506573
Epoch: 7; cost=2.0588350892066956
Epoch: 8; cost=1.7766193019019236
Epoch: 9; cost=1.5598017440901866
Epoch: 10; cost=1.387758874230915
Optimization Finished
Accuracy: 0.691

Others¶

def tanh(z):
    return((np.exp(z) - np.exp(-z))/(np.exp(z)+np.exp(-z))) 

def relu(z):
    return([max(zi,0) for zi in z])

z = np.arange(-10,10,0.01)

fig = plt.figure()
ax1 = fig.add_subplot(111)
ax1.plot(z,tanh(z))
ax1.set_title('Tanh Function')
plt.show()

fig = plt.figure()
ax2 = fig.add_subplot(111)
ax2.plot(z,relu(z))
ax2.set_title('Relu Function')
plt.show()