Workshop 06

Workshop 06 - Implementing an Artificial Neural Network (ANN)

← Home →

Neuron implementation

It is important to mention, the following examples are focused on pedagogy, not on efficiency.

The neuron contains a list of "dendrites" and the activation function, injected using the strategies pattern.

		
public class Dendrite
{
	public double Input { get; set; }
	public double Weight { get; set; }
}
	
public class Neuron
{
	protected List<Dendrite> _dendrites;
	public double Threshold { get; set; }
	public ActivationFunctionType ActivationFunctionType { get; set; }  

	public double Activation { get; protected set; }
	public double Output { get; protected set; }


	public Neuron(int dimension, ActivationFunctionType activationFunctionType)
	{
		ActivationFunctionType = activationFunctionType;
		_dendrites = new List<Dendrite>(dimension);
		for (int index = 0; index < dimension; index++)
		{
			var dendrite = new Dendrite();
			_dendrites.Add(dendrite);
		}
	}

	protected IActivationFunctionFactory _activationFunctionFactory = null;
	public IActivationFunctionFactory ActivationFunctionFactory
	{
		get
		{
			return _activationFunctionFactory ?? (_activationFunctionFactory = new ActivationFunctionFactory());
		}
		set
		{
			_activationFunctionFactory = value;
		}
	}

	protected IActivationFunction _activationFunction = null;
	public IActivationFunction ActivationFunction 
	{ 
		get
		{
			return _activationFunction ?? (_activationFunction = ActivationFunctionFactory.GetActivationFunction(ActivationFunctionType));
		}
	}

	public List<Dendrite> Dendrites
	{
		get
		{
			return _dendrites;
		}
	}

	public int Dimension
	{
		get
		{
			return _dendrites.Count;
		}
	}

	// Set the IEnumarable input value and compute the neuron output
	public IEnumerable<double> Input
	{
		set
		{
			Activation = 0;
			int index = 0;
			foreach (var input in value)
			{
				var dendrite = _dendrites[index];
				dendrite.Input = input;
				Activation += dendrite.Input * dendrite.Weight;
				index++;
			}
			Activation += Threshold;
			Output = ActivationFunction.f(Activation);
		}
	}
}

The activation functions instantiation responsibility is delegated to a factory pattern implementation.

	
public enum ActivationFunctionType
{
	Step = 1,
	Sigmoid = 2
}

public interface IActivationFunctionFactory
{
	IActivationFunction GetActivationFunction(ActivationFunctionType function);
}

public IActivationFunction GetActivationFunction(ActivationFunctionType function)
{
    switch (function)
    {
        case ActivationFunctionType.Step:
            return new StepActivationFunction();
        case ActivationFunctionType.Sigmoid:
            return new SigmoidActivationFunction();
        default:
            return null;
    }
}

Two activation functions were implemented: Step (Heaviside) and Sigmoid.

	
public interface IActivationFunction
{
	// the activation function
	double f(double x);

	// the derivative of the activation function
	double df(double x);
}

public class StepActivationFunction : IActivationFunction
{
	public double f(double x)
	{
		if (x >= 0)
		{
			return 1;
		}
		else
		{
			return 0;
		}
	}

	public double df(double x)
	{
		return 0;
	}
}

public class SigmoidActivationFunction : IActivationFunction
{
   private double K = 1;

   public double f(double x)
   {
	   var value = 1 + Math.Exp(-K * x);
	   return 1.0 / value;
   }

   public double df(double x)
   {
	   var fx = f(x);
	   // easy to prove formula
	   return fx * (1 - fx); 
   }
}

The Sigmoid activation function is preferred as it is continuous and differentiable.

Neurons are separating the hyperspace by a hyperplane.
For example, in 2D, there are an infinite number of solutions for the OR problem:

Neurons are limited to linearly separable problems.
For example, the XOR values cannot be separated by a line.

Multi-Layer Perceptron (MLP) Implementation

For non-linear separable problems (e.g. XOR), one could interconnect multiple neurons (the connectionist paradigm). There are various Artificial Neural Network topologies (e.g. completely interconnected).
We'll discuss the Multi-Layer Perceptron => feed-forward ANN composed by locally interconnected neural layers.

The Layer class represents a set of not connected neurons, with the same input space dimension.

	
public class Layer : IEnumerable<double>
{
	protected List<Neuron> _neurons = new List<Neuron>();
	public List<Neuron> Neurons
	{
		get
		{
			return _neurons;
		}
	}

	public Layer(int inputDimension, int neuronsCount)
	{
		for (int index = 0; index < neuronsCount; index++)
		{
			var neuron = new Neuron(inputDimension);
			_neurons.Add(neuron);
		}
	}

	// Set the IEnumarable input for each neuron => compute also the corresponding output
	public IEnumerable<double> Input
	{
		set
		{
			foreach (var neuron in Neurons)
			{
				neuron.Input = value;
			}
		}
	}

	public int Count
	{
		get
		{
			return _neurons.Count;
		}
	}

	// the Layer output is the set of all outputs of its neurons
	public IEnumerable<double> Output
	{
		get
		{
			return (IEnumerable<double>)this;
		}
	}

	#region Output IEnumarable

	public IEnumerator<double> GetEnumerator()
	{
		return new LayerEnumerator(this);
	}

	System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
	{
		return (IEnumerator)GetEnumerator();
	}

	#endregion
}

The Layer Enumerator (the Layer class was designed to implement the IEnumerator interface):

	
public class LayerEnumerator : IEnumerator<double>
{
   private List<Neuron> _neurons;
   private int _currentIndex = -1;

   public LayerEnumerator(Layer layer)
   {
	   _neurons = layer.Neurons;
   }

   public double Current
   {
	   get 
	   {
		   try
		   {
			   return _neurons[_currentIndex].Output;
		   }
		   catch (IndexOutOfRangeException)
		   {
			   throw new InvalidOperationException();
		   }
	   }
   }

   public void Dispose()
   {
	   
   }

   object System.Collections.IEnumerator.Current
   {
	   get { return Current; }
   }

   public bool MoveNext()
   {
	   _currentIndex++;
	   return _currentIndex < _neurons.Count;
   }

   public void Reset()
   {
	   _currentIndex = -1;
   }
}

MLP is a list of neural layers:

public class MultiLayerPerceptron
{
   protected List<Layer> _layers = new List<Layer>();
   public int InputDimension {get; protected set;}
   public bool IsTrained { get; set; }

   public MultiLayerPerceptron(int inputDimension)
   {
	   InputDimension = inputDimension;
   }

   public List<Layer> Layers
   {
	   get
	   {
		   return _layers;
	   }
   }

   public void AddLayer(int neuronsNumber)
   {
	   int dimension = 0;
	   if (_layers.Count == 0)
	   {
		   dimension = InputDimension;
	   }
	   else
	   {
		   dimension = _layers[_layers.Count - 1].Neurons.Count;
	   }
	   var layer = new Layer(dimension, neuronsNumber);
	   _layers.Add(layer);
   }


   public Layer InputLayer
   {
	   get
	   {
		   return _layers[0];
	   }
   }

   public Layer OutputLayer
   {
	   get
	   {
		   return _layers[_layers.Count - 1];
	   }
   }

   // Set the input of each MLP layer => compute also the MLP output
   public IEnumerable<double> Input
   {
	   set
	   {
		   _layers[0].Input = value;
		   for (int index = 1; index < _layers.Count; index++)
		   {
			   _layers[index].Input = _layers[index - 1].Output;
		   }
	   }
   }

   // The MLP output is the output of its last layer
   public IEnumerable<double> Output
   {
	   get
	   {
		   return _layers[_layers.Count - 1].Output;
	   }
   }
}

The Backpropagation Training Algorithm

Supervised Learning algorithm => the training set containing labeled data is presented multiple times to the ANN.
In the discussed implementation, the training set is represented as a generic list of TrainingObject instances.

public class TrainingObject
{
	protected double[] _inputForm;
	protected double[] _expectedOutput;


	public TrainingObject(int formDimension, int numberOfClasses)
	{
		_inputForm = new double[formDimension];
		_expectedOutput = new double[numberOfClasses];
	}

	public double[] InputForm
	{
		get
		{
			return _inputForm;
		}
	}

	public double[] ExpectedOutput
	{
		get
		{
			return _expectedOutput;
		}
	}

	public int FormDimension
	{
		get
		{
			return _inputForm.Length;
		}
	}

	public int NumberOfClasses
	{
		get
		{
			return _expectedOutput.Length;
		}
	}
}

The ANN weights are initialized with random small values.
Backpropagation is a gradient descent algorithm => minimizing the classification error = criteria expressed as a function J of all ANN weights.
Requires differentiable activation functions (e.g. Sigmoid).
Risk: getting stuck in a local minimum.

The ANN weights are adjusted from the output to the input layer (back-propagation) proportionally with the anti-gradient of criteria function, J.

For implementing MLP training algorithms one could start from the template design pattern.

// Template design pattern
public abstract class TrainAlgorithmBase
{
	public event EventHandler<TrainingProgressEventArgs> TrainingProgress;

	protected virtual void OnTrainingProgress(TrainingProgressEventArgs e)
	{
		if (TrainingProgress != null)
		{
			TrainingProgress(this, e);
		}
	}
	
	protected abstract void InitializeWeights();
	protected abstract void AdjustWeights();

	public void Train()
	{
		InitializeWeights();
		AdjustWeights();
	}
}

The TrainingProgress event is used for providing information on training evolution and allows processing intreruption (set Cancel to True).

public class TrainingProgressEventArgs : EventArgs
{
	public double Error { get; set; }
	public bool Cancel { get; set; }
}

Backpropagation is inheriting the TrainAlgorithmBase base class.

public class BackPropagation : TrainAlgorithmBase
{
	// The number of iterations is limited to an enough large number 
	public long MaxNumberOfSteps { get; set; }
	
	// The wights are adjusted proportionally with the gradient of J => Learning Constant parameter
	public double LearningConstant { get; set; }
	
	// The training process will stop when the target error is reached (enough small value)
	public double TargetError { get; set; }

	// MLP to be trained
	public MultiLayerPerceptron Perceptron { get; set; }
	
	// Supervised learning => the training set will be presented multiple times to MLP
	public List<TrainingObject> TrainingSet { get; set; }

	// Constructor
	public BackPropagation(
		MultiLayerPerceptron perceptron, 
		List<TrainingObject> trainingSet, 
		double learningConstant = 0.01, 
		double targetError = 0.01)
	{
		MaxNumberOfSteps = 500000;
		Perceptron = perceptron;
		TrainingSet = trainingSet;
		LearningConstant = learningConstant;
		TargetError = targetError;
	}
	
	// Compute the error as the Euclidean distance between the output and the expected value 
	public double GetError(Layer layer, TrainingObject trainingObject)
	{
		int index = 0;
		double error = 0;
		foreach (var neuron in layer.Neurons)
		{
			error += 0.5 * Math.Pow(neuron.Output - trainingObject.ExpectedOutput[index], 2);
			index++;
		}
		return error;
	}
	
	// Compute the MLP error for one element of the training set
	public double GetError(MultiLayerPerceptron perceptron, TrainingObject trainingObject)
	{
		return GetError(perceptron.OutputLayer, trainingObject);
	}

	// Initial weights => recomended to be in the [-2/n,2/n] range
	//    where n is the number of input values for one neuron 
	//    see: Retele Neuronale - Teorie si Aplicatii p.224
	protected override void InitializeWeights()
	{
		var random = new Random();
		foreach (var layer in Perceptron.Layers)
		{
			foreach(var neuron in layer.Neurons)
			{
				var dendritesCount = neuron.Dendrites.Count();
				for(int index = 0; index <= dendritesCount; index++)
				{
					// random number in [-1,1]
					var rnd = 2 * random.NextDouble() - 1;

					rnd = (2.0 / dendritesCount) * rnd;
					if (index < dendritesCount)
					{
						var dendrite = neuron.Dendrites[index];
						dendrite.Weight = rnd;
					}
					else
					{
						neuron.Treshold = rnd;
					}
				}
			}
		}
	}

	// Backpropagation
	protected override void AdjustWeights()
	{
		int index, neuronIndex, outputIndex;
		bool IsTrained = false;

		InitializeWeights();

		long currentStep = 0;
		do
		{
			double totalError = 0;
			// The training set is presented multiple times to MLP
			foreach (var trainingObject in TrainingSet)
			{
				Perceptron.Input = trainingObject.InputForm;

				// output layer => compute the output "error signal" 
				index = 0;
				var dOutput = new double[Perceptron.OutputLayer.Neurons.Count];
				foreach (var neuron in Perceptron.OutputLayer.Neurons)
				{
					var neuronError = trainingObject.ExpectedOutput[index] - neuron.Output;
					var activationFunction = neuron.ActivationFunction;
					var dActivation = activationFunction.df(neuron.Activation);
					dOutput[index] = neuronError * dActivation;
					index++;
				}
				
				// input layer => compute the hidden "error signal" 
				var dInput = new double[Perceptron.InputLayer.Neurons.Count];
				index = 0;
				foreach (var neuron in Perceptron.InputLayer.Neurons)
				{
					double neuronError = 0;
					outputIndex = 0;
					// the error of an output neuron propagates to each hidden neuron
					foreach (var outputNeuron in Perceptron.OutputLayer.Neurons)
					{
						var weight = outputNeuron.Dendrites[index].Weight;
						neuronError += weight * dOutput[outputIndex];
						outputIndex++;
					}
					var activationFunction = neuron.ActivationFunction;
					var dActivation = activationFunction.df(neuron.Activation);
					dInput[index] = neuronError * dActivation;
					index++;
				}

				// change weights:
				
				// adjust weights for the output layer
				neuronIndex = 0;
				foreach (var neuron in Perceptron.OutputLayer.Neurons)
				{
					// adjust weights proportionally with the anti-gradient of J
					foreach (var dendrite in neuron.Dendrites)
					{
						dendrite.Weight += LearningConstant * dendrite.Input * dOutput[neuronIndex];
					}
					neuron.Treshold += LearningConstant * dOutput[neuronIndex];
					neuronIndex++;
				}
				
				// adjust weights for the hidden layer
				neuronIndex = 0;
				foreach (var neuron in Perceptron.InputLayer.Neurons)
				{
					// adjust weights proportionally with the anti-gradient of J
					foreach (var dendrite in neuron.Dendrites)
					{
						dendrite.Weight += LearningConstant * dendrite.Input * dInput[neuronIndex];
					}
					neuron.Treshold += LearningConstant * dInput[neuronIndex];
					neuronIndex++;
				}

				// check error
				double error = GetError(Perceptron, trainingObject);
				totalError += error;
			}
			// Check if the target error was reached
			IsTrained = (totalError / TrainingSet.Count) <= TargetError;

			currentStep++;
			
			// Training Progress Event
			if ((currentStep % 100) == 0)
			{
				var trainingProgressArgs = new TrainingProgressEventArgs { Error = totalError / TrainingSet.Count, Cancel = false };
				OnTrainingProgress(trainingProgressArgs);
				if (trainingProgressArgs.Cancel) break;
			}
			
			// Mix the training set in order to minimize the risk of finding a local minimum
			TrainingSet.Mix();
		} while (!IsTrained && currentStep < MaxNumberOfSteps);

		Perceptron.IsTrained = IsTrained;
	}
}

Example: MLP for XOR

For solving XOR a MLP with 2 layers and 3 neurons is necessary

The XOR training set is presented to the ANN multiple times (until the expected error is reached)

At each step, the neural weights are adjusted proportionally with the anti-gradient of the criteria function J.

public class MLPXOR : Singleton<MLPXOR>
{
  private MultiLayerPerceptron _perceptron = null;
  private TrainAlgorithmBase _algorithm = null;

  public MLPXOR()
  {
	  var trainingSet = new List<TrainingObject>();

	  var trainingObject = new TrainingObject(2, 1);
	  trainingObject.InputForm[0] = 0;
	  trainingObject.InputForm[1] = 0;
	  trainingObject.ExpectedOutput[0] = 0;
	  trainingSet.Add(trainingObject);

	  trainingObject = new TrainingObject(2, 1);
	  trainingObject.InputForm[0] = 0;
	  trainingObject.InputForm[1] = 1;
	  trainingObject.ExpectedOutput[0] = 1;
	  trainingSet.Add(trainingObject);

	  trainingObject = new TrainingObject(2, 1);
	  trainingObject.InputForm[0] = 1;
	  trainingObject.InputForm[1] = 0;
	  trainingObject.ExpectedOutput[0] = 1;
	  trainingSet.Add(trainingObject);

	  trainingObject = new TrainingObject(2, 1);
	  trainingObject.InputForm[0] = 1;
	  trainingObject.InputForm[1] = 1;
	  trainingObject.ExpectedOutput[0] = 0;
	  trainingSet.Add(trainingObject);

	  _algorithm = new BackPropagation(MLP, trainingSet);
  }

  public MultiLayerPerceptron MLP
  {
	  get
	  {
		  if (_perceptron == null)
		  {
			  _perceptron = new MultiLayerPerceptron(2);
			  _perceptron.AddLayer(2);
			  _perceptron.AddLayer(1);
		  }
		  return _perceptron;
	  }
	  set
	  {
		  _perceptron = value;
	  }
  }

  public bool Train(double learningConst, double expectedError)
  {
	  var backpropagation = (BackPropagation)_algorithm;
	  backpropagation.LearningConstant = learningConst;
	  backpropagation.TargetError = expectedError;
	  backpropagation.Train();
	  return MLP.IsTrained;
  }
}

The loop ends when the expected value of J is reached.

When getting stuck in a local minimum, one possible strategy is relaunching the training process.

References:

D. Dumitrescu, H. Costin, Retele Neuronale Teorie si Aplicatii, Teora, Romania, 1996
Single qubit neural quantum circuit for solving Exclusive-OR

← Home →