Implementing layers in NN (part 1)
The first article will discuss the implementation of the module Sequential and layers Dense And Input via Numpy library for multi-layer NN.
This article is aimed at the practical implementation of layers with a minimum of theory, and it is assumed that the reader is familiar with the basic theory of learning neural networks.
Let’s start by importing libraries:
import numpy as np
Implementing a Dense layer
class DenseLayer():
def __init__(self, units=1, activation='relu', weights=np.array([]), b=np.array([])):
self.units = units
self.fl_init = True
self.activation = activation
self.weights = weights
self.b_new = b
self.w, self.b = np.array([]), np.array([])
Let me explain the parameters:
units – number of neurons
activation – activation function
weights And b_new – adjusted weights and displacements, which we will later transfer to the model
w, b – initial weights and offsets
fl_init – flag indicating whether initial weights and offsets have been created
Next, we’ll use the magic call method to turn our class into functor:
def __call__(self, x):
if (self.fl_init == True) and (self.weights.shape[0] == 0):
self.w = np.random.normal(loc=0.0, scale=1.0, size=(x.shape[-1], self.units))/np.sqrt(2.0/x.shape[-1])
self.b = np.ones(shape=(self.units, ), dtype=np.float32)
self.fl_init = False
# print(self.w.shape, self.weights)
elif self.weights.shape[0] != 0:
self.weights = self.weights.reshape((x.shape[-1], self.units))
self.w = self.weights
self.fl_init = False
self.b_new = self.b_new.reshape((self.units, ))
self.b = self.b_new
self.fl_init = False
y = x.dot(self.w) + self.b
if self.activation == 'relu':
return np.maximum(np.zeros(shape=y.shape), y), self.w, self.b, 1, self.units, self.activation
if self.activation == 'Leaky_relu':
return np.maximum(0.01*y, y), self.w, self.b, 1, self.units, self.activation
if self.activation == 'softmax':
return np.exp(y)/np.sum(np.exp(y), axis=0), self.w, self.b, 1, self.units, self.activation
if self.activation == 'sigmoid':
return 1 / (1 + np.exp(-y)), self.w, self.b, 1, self.units, self.activation
if self.activation == 'tanh':
return (np.exp(2*y) - 1)/(np.exp(2*y) + 1), self.w, self.b, 1, self.units, self.activation
if self.activation == 'linear':
return y, self.w, self.b, 1, self.units, self.activation
The operating principle is as follows:
First, there is a check that the initial weights have not yet been created and that the already adjusted ones have not been transferred to us.
If the condition is met, then the initial weights are created with a normal distribution (mathematical expectation = 0, variance = 1), and the initial offsets are set as units -> The flag is set to False.
If the adjusted weights were transferred, then we replace the initial weights with them.
We calculate y
We pass it through the passed activation function (if not passed, then we pass it through ‘relu‘)
Let’s implement a simple Input class:
class Input():
def __init__(self, shape=None):
self.shape = shape
def __call__(self, x):
if self.shape is not None:
if x.shape != self.shape:
return x.reshape(shape=self.shape), 0
else:
return x, 0
return x, 0
Let’s move on to writing the module Sequential:
class Sequential():
def __init__(self, layers):
self.layers = layers # слои в NN
Let’s start with the method fit:
First we implement the helper function predict, which will return for each layer the outputs, activation functions, weights and biases, the layer used and the number of neurons. We will need it later for the method backpropagation error(BP).
def predict(x):
activations = []
predict_for_layers = []
weights = []
b_coef = []
layer_2 = []
units = []
predict = self.layers[0](x)
layer_2.append(predict[1])
predict_for_layers.append(predict[0])
for i in range(1, len(self.layers)):
predict = self.layers[i](predict[0])
activations.append(predict[-1])
predict_for_layers.append(predict[0])
weights.append(predict[1])
b_coef.append(predict[2])
layer_2.append(predict[3])
units.append(predict[4])
#print(len(units))
return predict_for_layers, activations, weights, b_coef, layer_2, units
Next, we implement the gradient calculation functions:
def sigmoid_gradient(output):
return output * (1 - output)
def tanh_gradient(out):
return 1/((np.exp(out) + np.exp(-out)/2)**2)
def relu_gradient(x):
return (x > 0) * 1
def leaky_relu_gradient(x):
return (x > 0) * 1 + (x <= 0) * 0.01
def linear_gradient(x):
return 1
Let’s move on to the implementation itself Backpropagation:
list_back = self.layers[::-1]
for elem in range(x_input.shape[0]):
x, y = x_input[elem].reshape(1, -1), y_input[elem]
for epoch in range(epochs):
predict_layers = predict(x) # 1 - y, 2 - w, 3 - b, 4 - слой, 5 - кол. нейронов
predict_for_layers, activations, weights, b_coef, layers = predict_layers[0][::-1], predict_layers[1][::-1], predict_layers[2][::-1], predict_layers[3][::-1], predict_layers[4]
units = predict_layers[5]
layer_error = predict_for_layers[0] - y
if len(layer_error.shape) == 1:
layer_error = layer_error.reshape(1, -1)
for ind in range(len(list_back) - 1):
delta_weights = 0
if activations[ind] == 'linear':
delta_weights = layer_error * relu_gradient(predict_for_layers[ind])
if activations[ind] == 'Leaky_relu':
delta_weights = layer_error * leaky_relu_gradient(predict_for_layers[ind])
if activations[ind] == 'relu':
delta_weights = layer_error * relu_gradient(predict_for_layers[ind])
if activations[ind] == 'sigmoid':
delta_weights = layer_error * sigmoid_gradient(predict_for_layers[ind])
if activations[ind] == 'tanh':
delta_weights = layer_error * tanh_gradient(predict_for_layers[ind])
b_coef[ind] -= alpha * (np.full(b_coef[ind].shape, layer_error.sum()))
layer_error = delta_weights.dot(np.transpose(weights[ind]))
weights[ind] -= alpha * (np.transpose(predict_for_layers[ind + 1]).dot(delta_weights))
weights_inp = weights[::-1]
b_inp = b_coef[::-1]
activations_inp = activations[::-1]
for indx in range(1, len(self.layers)):
if layers[indx] == 1:
self.layers[indx] = DenseLayer(units=units[indx - 1], weights=weights_inp[indx - 1], b=b_inp[indx - 1], activation=activations_inp[indx - 1])
I will describe the principle of operation:
a pair of elements is taken – label + input.
In a cycle by number of epochs:
Using a previously written function predict the output is considered (the lists are flipped to start from the last layer).
We consider the error on the last layer.
delta_weights – derivative, weighted by errors – (we reduce the errors of predictions made with high confidence. If the slope of the tangent line (derivative value) was small, then the network contains either a very large or a very small value) – we calculate the local gradient.
Next we rewrite layer_error.
We update the weights according to the following rule:
We go through the layers and overwrite them with new weights and offsets.
It remains to write the function predict without auxiliary outputs:
def predict(self, x):
predict = self.layers[0](x)
for i in range(1, len(self.layers)):
predict = self.layers[i](predict[0])
return predict
Complete code:
class Sequential():
def __init__(self, layers):
self.layers = layers
def fit(self, x_input, y_input, epochs=50, alpha=0.01):
def predict(x):
activations = []
predict_for_layers = []
weights = []
b_coef = []
layer_2 = []
units = []
predict = self.layers[0](x)
layer_2.append(predict[1])
predict_for_layers.append(predict[0])
for i in range(1, len(self.layers)):
predict = self.layers[i](predict[0])
activations.append(predict[-1])
predict_for_layers.append(predict[0])
weights.append(predict[1])
b_coef.append(predict[2])
layer_2.append(predict[3])
units.append(predict[4])
return predict_for_layers, activations, weights, b_coef, layer_2, units
def sigmoid_gradient(output):
return output * (1 - output)
def tanh_gradient(out):
return 1/((np.exp(out) + np.exp(-out)/2)**2)
def relu_gradient(x):
return (x > 0) * 1
def leaky_relu_gradient(x):
return (x > 0) * 1 + (x <= 0) * 0.01
def linear_gradient(x):
return 1
list_back = self.layers[::-1]
for elem in range(x_input.shape[0]):
x, y = x_input[elem].reshape(1, -1), y_input[elem]
for epoch in range(epochs):
predict_layers = predict(x) # 1 - y, 2 - w, 3 - b, 4 - слой, 5 - кол. нейронов
predict_for_layers, activations, weights, b_coef, layers = predict_layers[0][::-1], predict_layers[1][::-1], predict_layers[2][::-1], predict_layers[3][::-1], predict_layers[4]
units = predict_layers[5]
layer_error = predict_for_layers[0] - y
if len(layer_error.shape) == 1:
layer_error = layer_error.reshape(1, -1)
for ind in range(len(list_back) - 1):
delta_weights = 0
if activations[ind] == 'linear':
delta_weights = layer_error * relu_gradient(predict_for_layers[ind])
if activations[ind] == 'Leaky_relu':
delta_weights = layer_error * leaky_relu_gradient(predict_for_layers[ind])
if activations[ind] == 'relu':
delta_weights = layer_error * relu_gradient(predict_for_layers[ind])
if activations[ind] == 'sigmoid':
delta_weights = layer_error * sigmoid_gradient(predict_for_layers[ind])
if activations[ind] == 'tanh':
delta_weights = layer_error * tanh_gradient(predict_for_layers[ind])
b_coef[ind] -= alpha * (np.full(b_coef[ind].shape, layer_error.sum()))
layer_error = delta_weights.dot(np.transpose(weights[ind]))
weights[ind] -= alpha * (np.transpose(predict_for_layers[ind + 1]).dot(delta_weights))
weights_inp = weights[::-1]
b_inp = b_coef[::-1]
activations_inp = activations[::-1]
for indx in range(1, len(self.layers)):
if layers[indx] == 1:
self.layers[indx] = DenseLayer(units=units[indx - 1], weights=weights_inp[indx - 1], b=b_inp[indx - 1], activation=activations_inp[indx - 1])
# Предсказание значений
def predict(self, x):
predict = self.layers[0](x)
for i in range(1, len(self.layers)):
#print(predict[0].shape)
predict = self.layers[i](predict[0])
return predict
Here’s a simple example of use:
x = np.array([[3., 2.], [2., 2.], [3., 3.], [4., 4.]])
y = [5, 4, 6, 8]
#print(x.shape)
model.fit(x, y, epochs=40)
x_test = np.array([[3., 4.], [4., 4.]])
print(model.predict(x_test)[0])
Exit:
[[7.13999622]
[8. ]]
conclusions
As you can see, after training, the neural network was able to produce an almost correct answer to an example that it did not encounter during training.
There are many shortcomings in the implementation, so if you have comments on what to fix, write.