be2c193341
Next up is dropout layers. remember that it can be used to see uncerstaty in multiply output, which is not in the book
110 lines
3.2 KiB
C++
110 lines
3.2 KiB
C++
#pragma once
|
|
|
|
#include "core/omp_config.h"
|
|
|
|
#include "utils/vector.h"
|
|
#include "utils/matrix.h"
|
|
#include "utils/random.h"
|
|
|
|
|
|
namespace neural_networks{
|
|
|
|
template <typename T>
|
|
struct Dense_Layer{
|
|
|
|
T weight_regularizer_l1 = {0};
|
|
T weight_regularizer_l2 = {0};
|
|
|
|
T bias_regularizer_l1 = {0};
|
|
T bias_regularizer_l2 = {0};
|
|
|
|
utils::Matrix<T> _inputs;
|
|
utils::Matrix<T> weights;
|
|
utils::Vector<T> biases;
|
|
utils::Matrix<T> outputs;
|
|
|
|
utils::Matrix<T> dweights;
|
|
utils::Vector<T> dbiases;
|
|
utils::Matrix<T> dinputs;
|
|
|
|
// Variables for optimizers
|
|
utils::Matrix<T> weight_momentums;
|
|
utils::Vector<T> bias_momentums;
|
|
utils::Matrix<T> weight_cache;
|
|
utils::Vector<T> bias_cache;
|
|
|
|
// Default Constructor
|
|
Dense_Layer() = default;
|
|
|
|
// Constructor
|
|
Dense_Layer(const uint64_t n_inputs, const uint64_t n_neurons,
|
|
const T weight_regularizer_l1=T{0},
|
|
const T weight_regularizer_l2=T{0},
|
|
const T bias_regularizer_l1=T{0},
|
|
const T bias_regularizer_l2=T{0}){
|
|
|
|
this->weight_regularizer_l1 = weight_regularizer_l1;
|
|
this->weight_regularizer_l2 = weight_regularizer_l2;
|
|
this->bias_regularizer_l1 = bias_regularizer_l1;
|
|
this->bias_regularizer_l2 = bias_regularizer_l2;
|
|
|
|
weights.random(n_inputs, n_neurons, -1, 1);
|
|
//weights = numerics::random_matrix(n_inputs, n_neurons, -1, 1);
|
|
biases.resize(n_neurons, T{0});
|
|
|
|
}
|
|
|
|
void forward(const utils::Matrix<T>& inputs){
|
|
_inputs = inputs;
|
|
//std::cout << "HERE" << std::endl;
|
|
outputs = numerics::add_rowwise(numerics::matmul(inputs, weights), biases);
|
|
}
|
|
|
|
void backward(const utils::Matrix<T>& dvalues){
|
|
// Gradients on parameters
|
|
dweights = numerics::matmul(numerics::transpose(_inputs), dvalues);
|
|
dbiases = numerics::sum_rowwise(dvalues);
|
|
|
|
|
|
// Gradients on regularization
|
|
// L1 on weights
|
|
if(weight_regularizer_l1){
|
|
utils::Matrix<T> dL1(weights.rows(), weights.cols(), T{1});
|
|
for (uint64_t i = 0; i < weights.rows(); ++i){
|
|
for (uint64_t j = 0; j < weights.cols(); ++j){
|
|
if (weights(i,j) < 0){
|
|
dL1(i,j) = -1;
|
|
}
|
|
}
|
|
}
|
|
dweights = numerics::add(dweights, numerics::mul(dL1, weight_regularizer_l1));
|
|
}
|
|
// L2 on weights
|
|
if(weight_regularizer_l2){
|
|
dweights = numerics::add(dweights, numerics::mul(numerics::mul(weights, weight_regularizer_l2),T{2}));
|
|
}
|
|
// L1 on biases
|
|
if(bias_regularizer_l1){
|
|
utils::Vector<T> dL1(biases.size(), T{1});
|
|
for (uint64_t i = 0; i < dL1.size(); ++i){
|
|
if (biases[i] < 0){
|
|
dL1[i] = -1;
|
|
}
|
|
}
|
|
dbiases = numerics::add(dbiases, numerics::mul(dL1, bias_regularizer_l1));
|
|
}
|
|
// L2 on biases
|
|
if (bias_regularizer_l2){
|
|
dbiases = numerics::add(dbiases, numerics::mul(numerics::mul(biases, bias_regularizer_l2),T{2}));
|
|
}
|
|
|
|
//Gradient on values
|
|
dinputs = numerics::matmul(dvalues, numerics::transpose(weights));
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
} // end namespace neural_networks
|