# A simple neural network using NumPy
# Created for (self-) education purposes
# Configured to be run in Jupyter
# Author: Andrew M. Chap
# Last edited August 2018

import numpy as np
import random
import time

# -----------------------------
# Define training function
# -----------------------------
def train_function(x):
    N = x.shape[0]
    y = np.zeros((N,1))
    for i in range(0,N):
        if x[i,0] + x[i,1] < 1:
            if x[i,0] > x[i,1]:
                y[i,0] = x[i,1]/x[i,0]
            else:
                y[i,0] = x[i,0] + x[i,1]
        else:
            if x[i,0] > x[i,1]:
                y[i,0] = x[i,0] - x[i,1]
            else:
                y[i,0] = x[i,0]*x[i,1]
    return y

# -----------------------------
# Set network parameters
# -----------------------------
# Number of nodes in each neural layer, including 
# input layer (first) and output layer (last)
layers = [2,40,40,40,1]
# More suboptimal heuristics follow:
epsilon = 0.02   # learning rate for gradient descent
numPasses = 50000
batchSize = 10000
regLambda = 1/(200*batchSize)

# Number of frames in our output video
numPlots = 201
# At which iterations we plot
plotTimes = np.linspace(0,1,numPlots)
plotIterations = np.round(plotTimes*numPasses)
# Create grid on which to plot model output
nx = 100 # number of grid points in x0 and x1
x0_grid,x1_grid = np.meshgrid(np.linspace(0,1,nx),
                              np.linspace(0,1,nx))
X_mesh = np.column_stack((x0_grid.ravel(),
                          x1_grid.ravel()))
# empty array for storing our 
# network's output during training
Y_hypothesis = np.empty([nx,nx,numPlots]) 
# true data for plotting error
Y_train = train_function(X_mesh)
Y_train = Y_train.reshape([nx,nx])
# Set up error tracking
Y_error = np.empty(numPlots)
# track computation time
compTime = np.zeros(numPlots)

# -----------------------------
# Create neural network
# -----------------------------
np.random.seed(0) # for repeatability
class Network:
    def __init__(self,
                 layers,
                 regLambda=0.0,
                 epsilon=0.10):
        self.layers = layers
        self.N = len(layers) # Number of layers
        self.W = []    # List of weights matrices
        self.B = []    # List of bias vectors
        self.A = []    # List of activation vectors
        self.dW = []   # Change in weights
        self.dB = []   # Change in bias vectors
        self.epsilon = epsilon
        self.regLambda = regLambda
        self.eDumps = 0
        self.error = 0
        ii = 0  
        for layer0, layer1 in zip(self.layers[:-1],
                                  self.layers[1:]):
            self.W.append(np.random.randn(
                layer0, layer1)/np.sqrt(layer0))
            self.B.append(np.zeros((1,layer1)))
            self.dW.append(np.zeros((layer0,layer1)))
            self.dB.append(np.zeros((1,layer1)))
            if ii > 0:
                self.A.append(np.zeros((1,layer0)))
            ii += 1

    def initialize_batch(self):
        for i in range(0,self.N-1):
            self.dW[i] *= 0
            self.dB[i] *= 0

    def forward_propogate(self,X):
        Z = X.dot(self.W[0]) + self.B[0]
        for i in range(1,self.N-1):
             self.A[i-1] = np.tanh(Z)
             Z = self.A[i-1].dot(self.W[i]) + self.B[i]
        return Z

    def back_propogate(self,X,Y,G,oneOverB):
        delta = G-Y # error for every output of batch
        self.error = (self.error*self.eDumps + 
                      np.mean(np.abs(delta)))/ \
                      (self.eDumps+1) 
        self.eDumps += 1
        for ii in range(self.N-2,0,-1):
            self.dW[ii] += oneOverB*(
                           (self.A[ii-1]).T).dot(delta)
            self.dB[ii] += oneOverB*np.sum(
                           delta,axis=0,keepdims=True)
            delta = delta.dot(self.W[ii].T) * \
                        (1 - self.A[ii-1]*self.A[ii-1])
        self.dW[0] += oneOverB*(X.T).dot(delta)
        self.dB[0] += oneOverB*np.sum(delta,axis=0,
                                      keepdims=True)

    def update_weights(self):
        for ii in range(0,self.N-1):
            self.W[ii] -= self.epsilon*(
                          self.dW[ii] + 
                          self.regLambda*self.W[ii])
            self.B[ii] -= self.epsilon*self.dB[ii]

    def test(self,X):
        Y = np.zeros(X.shape)
        for ii in range(0,X.shape[0]):
            Y[ii,:] = self.forward_propogate(X[ii,:])
        return Y

    def printError(self,i):
        print("i = {} of {}, error = {:0.3f}".
              format(i,numPasses,self.error))
        self.eDumps = 0

network = Network(layers=layers,
                  epsilon=0.1,
                  regLambda=regLambda)

# -----------------------------
# Train neural network
# -----------------------------
tb = 0
plotNumber = 0 # plotting iterator
tStart = time.time()
for i in range(0, numPasses+1):

    network.initialize_batch()

    # Use random training data input
    X = np.random.rand(batchSize,layers[0])

    # Neural network guess
    G = network.forward_propogate(X=X)

    # Generate training data output
    Y = train_function(X)

    # Train network
    network.back_propogate(X=X,
                           Y=Y,
                           G=G,
                           oneOverB=1./batchSize)
    network.update_weights()

    # Output progress
    if i in plotIterations:
        compTime[plotNumber] = time.time() - tStart
        Y_error[plotNumber] = network.error
        Y_hypothesis[:,:,plotNumber] = \
            network.test(X_mesh)[:,0] \
            .reshape(x1_grid.shape)
        network.printError(i)
        plotNumber += 1

# -----------------------------
# Set up plot
# -----------------------------
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.animation
# Allows \mathrm in plots
matplotlib.rcParams['text.usetex'] = True 
# Display the animation in Jupyter
matplotlib.rc('animation', html='html5')  


plt.close()
labelsize = 14
fig,axes = plt.subplots(1,3)
fig.dpi = 120
c_space = np.linspace(0,1,11) # for colorbar
fig.set_size_inches(5,2.7)
contourplots = []
for ax in axes:

    # Fill all contours with training data grid
    contourplots.append(ax.contourf(x0_grid,x1_grid,
                                    Y_train,c_space))

    ax.set_xlim([0,1])
    ax.set_ylim([0,1])
    ax.set_xticks([0,1])

    # Share axes between subplots to save real estate
    if ax == axes[0]:
        ax.set_yticks([0,1])
        ax.set_ylabel(r'$X_1$',rotation=0,
                      fontsize=labelsize,
                      verticalalignment='center')
    else:
        ax.set_yticks([])
        ax.tick_params(labelleft=False)
    if ax == axes[1]:
        ax.set_xlabel(r'$X_0$',
                      fontsize = labelsize,
                      labelpad=-8)

axes[0].set_title(r'$\textrm{Training data}$')
axes[1].set_title(r'$\textrm{Hypothesis}$')
axes[2].set_title(r'$\left|\textrm{Error}\right|$')
fig.tight_layout()

# make space for colorbar, error plot, and annotation
subplots_adjust_right = 0.85
subplots_adjust_bottom = 0.45
fig.subplots_adjust(bottom=subplots_adjust_bottom,
                    right=subplots_adjust_right)

# Add in a colorbar with same top and bottom as subplots
boxbottom = axes[0]._position._points[0,1]
boxtop =    axes[0]._position._points[1,1]
leftmostside = axes[0]._position._points[0,0]
boxheight = boxtop - boxbottom
boxgap = axes[1]._position._points[0,0] - \
         axes[0]._position._points[1,0]
colorbaroffset = subplots_adjust_right + boxgap
colorbarwidth = 0.03
cbar_ax = fig.add_axes([colorbaroffset, boxbottom,
                        colorbarwidth, boxheight])
cbar = fig.colorbar(contourplots[0], cax=cbar_ax,
                    boundaries = [0.0, 0.5, 1.0])
cbar.ax.text(subplots_adjust_right,1.05,r'$Y$',
             rotation=0,fontsize=labelsize,
             horizontalalignment='right')

# lower and upper bounds for error plot
emin = 0.01
emax = 1
# Add in error plot
eplot = fig.add_axes([0.18, 0.17, .45, .15])
eplot.set_xlabel(r'$\textrm{Epoch}$',labelpad=-4)
eplot.set_ylabel(r'$\langle \textrm{Error} \rangle$',
                 rotation=0,labelpad=14,
                 verticalalignment='center')
eplot.semilogy(plotIterations,Y_error)
eplot.set_xlim([0,plotIterations[-1]])
eplot.set_ylim([emin,emax])
eplot.set_yticklabels(['','0.01','0.1','1'])
xticks = np.round(np.linspace(0,numPasses,5)).astype(int)
eplot.set_xticks(xticks)
xticklabels = xticks.astype(str)
xticklabels[2] = ''
eplot.set_xticklabels(xticklabels)
eplot.grid(True)

# Add in computation time annotation
ctx = 0.97
cty0 = 0.32
cty1 = cty0 - 0.08
eplot.annotate(r'$\underline{\textrm{Computation time:}}$',
                xy=(ctx, cty0), xycoords='figure fraction',
                horizontalalignment='right',
                verticalalignment='top',
                fontsize=13,color=[0,0,0])
comptimestring = r'$\textrm{{{} \small{{(HH:MM:SS)}}}}$'
eplot.annotate(comptimestring.format(
                time.strftime('%H:%M:%S',time.gmtime(0))),
                xy=(ctx, cty1), xycoords='figure fraction',
                horizontalalignment='right', 
                verticalalignment='top',
                fontsize=13,color=[0,0,0])

plt.show()

# -----------------------------
# Animate progress and results
# -----------------------------
def update(i):
    print('animating {} of {}'.format(i,numPlots))
    # replace hypothesis and error contour plots 
    # with new data
    contourplots[1] = axes[1].contourf(x0_grid,x1_grid,
                                       Y_hypothesis[:,:,i],
                                       c_space)
    contourplots[2] = axes[2].contourf(
                          x0_grid,x1_grid,
                          np.abs(Y_hypothesis[:,:,i]
                                 -Y_train),
                          c_space)
    # Update y-data for error plot
    Y_error_plot = Y_error.copy()
    # nan out the values we "haven't gotten to yet"
    Y_error_plot[i+1:] = np.nan 
    eplot.lines[0].set_ydata(Y_error_plot)
    # Update computation time string
    eplot.texts[1].set_text(comptimestring.format(
                            time.strftime('%H:%M:%S',
                            time.gmtime(compTime[i]))))

    return contourplots[1].collections + \
           contourplots[2].collections


anim = matplotlib.animation.FuncAnimation(
           fig, update, frames=numPlots,
           interval=60, blit=True, repeat=True)
anim # Animate into video