diff --git a/build/CMakeFiles/Progress/1 b/build/CMakeFiles/Progress/1 deleted file mode 100644 index 7b4d68d..0000000 --- a/build/CMakeFiles/Progress/1 +++ /dev/null @@ -1 +0,0 @@ -empty \ No newline at end of file diff --git a/build/CMakeFiles/Progress/count.txt b/build/CMakeFiles/Progress/count.txt deleted file mode 100644 index 0cfbf08..0000000 --- a/build/CMakeFiles/Progress/count.txt +++ /dev/null @@ -1 +0,0 @@ -2 diff --git a/build/bin/dense-neural-network b/build/bin/dense-neural-network index 07c37c8..1b94a40 100755 Binary files a/build/bin/dense-neural-network and b/build/bin/dense-neural-network differ diff --git a/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/compiler_depend.internal b/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/compiler_depend.internal index f52bf4c..f1880e0 100644 --- a/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/compiler_depend.internal +++ b/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/compiler_depend.internal @@ -61,6 +61,8 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o /home/newton/Documents/Git/Flux/include/modules/neural_networks/datasets/vertical.h /home/newton/Documents/Git/Flux/include/modules/neural_networks/layers/Dense_Layer.h /home/newton/Documents/Git/Flux/include/modules/neural_networks/neural_networks.h + /home/newton/Documents/Git/Flux/include/modules/neural_networks/optimizers/Optimizer_Adagrad.h + /home/newton/Documents/Git/Flux/include/modules/neural_networks/optimizers/Optimizer_SGD.h /home/newton/Documents/Git/Flux/include/numerics/numerics.h /home/newton/Documents/Git/Flux/include/utils/matrix.h /home/newton/Documents/Git/Flux/include/utils/utils.h @@ -160,6 +162,7 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o /usr/include/c++/13/istream /usr/include/c++/13/limits /usr/include/c++/13/locale + /usr/include/c++/13/math.h /usr/include/c++/13/new /usr/include/c++/13/numeric /usr/include/c++/13/ostream diff --git a/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/compiler_depend.make b/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/compiler_depend.make index ca38ecb..2c8e2a7 100644 --- a/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/compiler_depend.make +++ b/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/compiler_depend.make @@ -60,6 +60,8 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /home/newton/Documents/Git/Flux/include/modules/neural_networks/datasets/vertical.h \ /home/newton/Documents/Git/Flux/include/modules/neural_networks/layers/Dense_Layer.h \ /home/newton/Documents/Git/Flux/include/modules/neural_networks/neural_networks.h \ + /home/newton/Documents/Git/Flux/include/modules/neural_networks/optimizers/Optimizer_Adagrad.h \ + /home/newton/Documents/Git/Flux/include/modules/neural_networks/optimizers/Optimizer_SGD.h \ /home/newton/Documents/Git/Flux/include/numerics/numerics.h \ /home/newton/Documents/Git/Flux/include/utils/matrix.h \ /home/newton/Documents/Git/Flux/include/utils/utils.h \ @@ -159,6 +161,7 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /usr/include/c++/13/istream \ /usr/include/c++/13/limits \ /usr/include/c++/13/locale \ + /usr/include/c++/13/math.h \ /usr/include/c++/13/new \ /usr/include/c++/13/numeric \ /usr/include/c++/13/ostream \ @@ -322,12 +325,22 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /usr/include/x86_64-linux-gnu/c++/13/bits/c++allocator.h: +/usr/include/x86_64-linux-gnu/c++/13/bits/atomic_word.h: + +/usr/include/x86_64-linux-gnu/bits/wordsize.h: + +/usr/include/x86_64-linux-gnu/bits/wctype-wchar.h: + +/usr/include/x86_64-linux-gnu/bits/wchar.h: + /usr/include/c++/13/bits/functexcept.h: /usr/include/x86_64-linux-gnu/bits/libc-header-start.h: /usr/include/c++/13/bits/stl_construct.h: +/home/newton/Documents/Git/Flux/include/modules/neural_networks/optimizers/Optimizer_SGD.h: + /home/newton/Documents/Git/Flux/include/decomp/decomp.h: /usr/include/c++/13/bits/stl_bvector.h: @@ -384,10 +397,6 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /usr/include/c++/13/bits/locale_facets.h: -/usr/include/c++/13/bits/functional_hash.h: - -/usr/include/errno.h: - /usr/include/c++/13/bits/exception_ptr.h: /usr/include/c++/13/bits/locale_conv.h: @@ -420,8 +429,6 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /usr/include/c++/13/bits/basic_string.tcc: -/usr/include/c++/13/bits/stringfwd.h: - /usr/include/c++/13/backward/binders.h: /usr/include/alloca.h: @@ -432,6 +439,10 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /usr/include/wchar.h: +/usr/include/c++/13/bits/functional_hash.h: + +/usr/include/errno.h: + /usr/include/c++/13/bits/ios_base.h: /home/newton/Documents/Git/Flux/include/numerics/veclog.h: @@ -446,6 +457,8 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /usr/include/x86_64-linux-gnu/bits/uintn-identity.h: +/home/newton/Documents/Git/Flux/include/modules/neural_networks/optimizers/Optimizer_Adagrad.h: + /usr/include/c++/13/bits/cxxabi_init_exception.h: /usr/include/c++/13/typeinfo: @@ -486,10 +499,6 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /usr/include/x86_64-linux-gnu/bits/struct_rwlock.h: -/usr/include/c++/13/bits/uniform_int_dist.h: - -/usr/include/c++/13/bits/locale_classes.tcc: - /home/newton/Documents/Git/Flux/include/numerics/inverse.h: /usr/include/c++/13/bits/stl_numeric.h: @@ -502,10 +511,6 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /home/newton/Documents/Git/Flux/include/utils/random.h: -/usr/include/x86_64-linux-gnu/c++/13/bits/atomic_word.h: - -/usr/include/x86_64-linux-gnu/bits/wordsize.h: - /home/newton/Documents/Git/Flux/include/numerics/matsubtract.h: /home/newton/Documents/Git/Flux/include/numerics/matdot.h: @@ -544,6 +549,8 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /usr/include/c++/13/bits/ostream_insert.h: +/usr/include/c++/13/math.h: + /home/newton/Documents/Git/Flux/include/numerics/max.h: /usr/include/x86_64-linux-gnu/bits/pthread_stack_min-dynamic.h: @@ -560,16 +567,6 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /usr/include/c++/13/bits/codecvt.h: -/usr/include/x86_64-linux-gnu/bits/flt-eval-method.h: - -/home/newton/Documents/Git/Flux/include/modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h: - -/usr/include/c++/13/bits/streambuf_iterator.h: - -/usr/include/x86_64-linux-gnu/c++/13/bits/opt_random.h: - -/usr/include/c++/13/ostream: - /usr/include/c++/13/bits/cpp_type_traits.h: /home/newton/Documents/Git/Flux/include/modules/neural_networks/layers/Dense_Layer.h: @@ -630,6 +627,22 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /home/newton/Documents/Git/Flux/include/modules/neural_networks/datasets/vertical.h: +/usr/include/x86_64-linux-gnu/bits/flt-eval-method.h: + +/home/newton/Documents/Git/Flux/include/modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h: + +/usr/include/c++/13/bits/streambuf_iterator.h: + +/usr/include/x86_64-linux-gnu/c++/13/bits/opt_random.h: + +/usr/include/c++/13/ostream: + +/usr/include/c++/13/bits/stringfwd.h: + +/usr/include/c++/13/bits/locale_classes.tcc: + +/usr/include/c++/13/bits/uniform_int_dist.h: + /usr/include/c++/13/bits/vector.tcc: /usr/include/c++/13/cctype: @@ -816,12 +829,8 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: /h /usr/include/x86_64-linux-gnu/bits/types/cookie_io_functions_t.h: -/usr/include/x86_64-linux-gnu/bits/wchar.h: - /usr/include/x86_64-linux-gnu/bits/types/locale_t.h: -/usr/include/x86_64-linux-gnu/bits/wctype-wchar.h: - /home/newton/Documents/Git/Flux/include/modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h: /usr/include/x86_64-linux-gnu/bits/types/clockid_t.h: diff --git a/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o b/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o index d7bde42..adbf7d1 100644 Binary files a/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o and b/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o differ diff --git a/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o.d b/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o.d index 9b7cce6..e8d91ba 100644 --- a/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o.d +++ b/build/examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o.d @@ -238,4 +238,7 @@ examples/dense-neural-network/CMakeFiles/dense-neural-network.dir/main.cpp.o: \ /home/newton/Documents/Git/Flux/include/modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h \ /home/newton/Documents/Git/Flux/include/./modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h \ /home/newton/Documents/Git/Flux/include/./modules/neural_networks/loss/./Loss.h \ - /home/newton/Documents/Git/Flux/include/./numerics/vecmean.h + /home/newton/Documents/Git/Flux/include/./numerics/vecmean.h \ + /home/newton/Documents/Git/Flux/include/modules/neural_networks/optimizers/Optimizer_SGD.h \ + /home/newton/Documents/Git/Flux/include/modules/neural_networks/optimizers/Optimizer_Adagrad.h \ + /usr/include/c++/13/math.h diff --git a/examples/dense-neural-network/main.cpp b/examples/dense-neural-network/main.cpp index b62dd53..e435424 100644 --- a/examples/dense-neural-network/main.cpp +++ b/examples/dense-neural-network/main.cpp @@ -19,131 +19,96 @@ int main(int argc, char const *argv[]) { - utils::Mf X(10,2, 0); - utils::Matrix y(10,1, 0); + uint64_t number_of_classes = 5; + uint64_t number_of_samples = 100; + uint64_t number_of_epochs = 100; + + + utils::Mf X; + utils::Matrix y; utils::Vector class_targets; float loss; float accuracy; - //neural_networks::create_spital_data(10000, 3, X, y); - neural_networks::create_vertical_data(100, 3, X, y); - - neural_networks::Dense_Layer dense1(2, 3); - neural_networks::Activation_ReLU activation1; - neural_networks::Dense_Layer dense2(3, 3); - neural_networks::Activation_Softmax activation2; - - neural_networks::Loss_CategoricalCrossentrophy loss_funtion; - - float lowest_loss = 9999999; - utils::Mf best_dense_1_weights = dense1.weights; - utils::Vf best_dense_1_biases = dense1.biases; - utils::Mf best_dense_2_weights = dense2.weights; - utils::Vf best_dense_2_biases = dense2.biases; - - utils::Vf vectRND; - utils::Vector predections; + // Create dataset + neural_networks::create_spital_data(number_of_samples, number_of_classes, X, y); + //neural_networks::create_vertical_data(number_of_samples, number_of_classes, X, y); + + // Create Dense layer with 2 input featues and 3 output values + neural_networks::Dense_Layer dense1(2, 64); + + // Create ReLU activation (to be used with Dense layer) + neural_networks::Activation_ReLU activation1; + + // Create a second Dense layer with 3 inputs (as we take the vlaues from the last layer) + // and 3 output values + neural_networks::Dense_Layer dense2(64, number_of_classes); + + // Create a Sfotmax classifier's combined loss and activation + neural_networks::Activation_Softmax_Loss_CategoricalCrossentropy loss_activation; + + // Create optimizer + //neural_networks::Optimizer_SGD optimizer(1, 1e-3, 0.5); + neural_networks::Optimizer_Adagrad optimizer(1, 1e-3, 1e-6); + + // Train in loop + for (uint64_t epoch = 0; epoch < number_of_epochs+1; ++epoch){ - for (uint64_t i = 0; i < 10; ++i){ + // Perform a forward pass of our training data through this layer + dense1.forward(X); + // Perform a forward pass thourgh activation function + // takes the output fo the first layer here + activation1.forward(dense1.outputs); - // Generate a new set of weights for iteration - numerics::inplace_matrandom_mul(dense1.weights,0.98f, 1.02f); - numerics::inplace_vecrandom_mul(dense1.biases,0.98f, 1.02f); + // Perform a forward pass through second Dense layer + // takes output of activation function of the first layer as input + dense2.forward(activation1.outputs); - numerics::inplace_matrandom_mul(dense2.weights,0.98f, 1.02f); - numerics::inplace_vecrandom_mul(dense2.biases,0.98f, 1.02f); - - // Perform a forward pass of the training data through this layer - dense1.forward(X); - activation1.forward(dense1.outputs); - dense2.forward(activation1.outputs); - activation2.forward(dense2.outputs); - - // Perform a farward pass through activation function - // it takes the output of the second dense layer here and returns loss - loss = loss_funtion.calculate(activation2.outputs, y); - - predections = numerics::matargmax_row(activation2.outputs); - - if (y.cols() < 1){ - class_targets = numerics::matargmax_row(y); - }else{ - class_targets = y.get_col(0); - } - - accuracy = numerics::vecmean_equal(predections, class_targets); - - if (loss < lowest_loss){ - //std::cout << "New set of weights found, iteration:" << i << ", loss:" << loss << ", acc:" << accuracy << std::endl; - best_dense_1_weights = dense1.weights; - best_dense_1_biases = dense1.biases; - best_dense_2_weights = dense2.weights; - best_dense_2_biases = dense2.biases; - lowest_loss = loss; - } else{ - //std::cout << "HERE" << std::endl; - dense1.weights = best_dense_1_weights; - dense1.biases = best_dense_1_biases; - dense2.weights = best_dense_2_weights; - dense2.biases = best_dense_2_biases; - } + // Perform a foard pass through the activation/loss function + // takes the output of the second dense layer here and returns loss + loss = loss_activation.forward(dense2.outputs, y); + // Calculate accuracy from output of activation2 and targets + predections = numerics::matargmax_row(loss_activation.outputs); + if (y.cols() < 1){ + class_targets = numerics::matargmax_row(y); + }else{ + class_targets = y.get_col(0); } - //std::cout << loss << std::endl; - //std::cout << accuracy << std::endl; - - utils::Matrix softmax_outputs{{0.7, 0.1, 0.2}, - {0.1, 0.5, 0.4}, - {0.02, 0.9, 0.08}}; - utils::Matrix clas_targets{{0},{1},{1}}; - - neural_networks::Activation_Softmax_Loss_CategoricalCrossentropy softmax_loss; - softmax_loss.backward(softmax_outputs, clas_targets); - utils::Matrix dvalues1 = softmax_loss.dinputs; - - neural_networks::Activation_Softmax activation; - activation.outputs = softmax_outputs; - - //neural_networks::Loss_CategoricalCrossentrophy loss; + accuracy = numerics::vecmean_equal(predections, class_targets); - dvalues1.print(); + if (!(epoch%100)){ + std::cout << "epoch: " << epoch; + std::cout << ", acc: " << accuracy; + std::cout << ", loss: " << loss; + std::cout << ", lr: " << optimizer.current_learning_rate; + std::cout << std::endl; + } + + // Backward pass + loss_activation.backward(loss_activation.outputs, y); + dense2.backward(loss_activation.dinputs); + activation1.backward(dense2.dinputs); + dense1.backward(activation1.dinputs); + // Update weights and biases + optimizer.pre_update_params(); + optimizer.update_params(dense1); + optimizer.update_params(dense2); + optimizer.post_update_params(); - - - /* - utils::Vd a = utils::linspace(1, 10, 10, true); - a.print(); - mesh::Mesh1D mesh(a); - mesh.generate_vertices(0.5, 10.5); - double Gamma = 1.0; - - - utils::Md A; - utils::Vd b, s(10,1); - - - core::Configs& cfg = core::Configs::defaults(); - cfg.grid = core::GridKind::Uniform; - cfg.left = {core::FDKind::Forward, core::BCKind::Neumann, 0.0}; - cfg.right = {core::FDKind::Backward, core::BCKind::Neumann, 0.0}; - cfg.solver = core::SolverKind::LU; - - - fluids::Diffusion1D diffusion(cfg, mesh, Gamma); - diffusion.assemble(A, b, s); -*/ + } return 0; } \ No newline at end of file diff --git a/include/modules/neural_networks/layers/Dense_Layer.h b/include/modules/neural_networks/layers/Dense_Layer.h index ecf9faa..3afecfa 100644 --- a/include/modules/neural_networks/layers/Dense_Layer.h +++ b/include/modules/neural_networks/layers/Dense_Layer.h @@ -21,6 +21,11 @@ namespace neural_networks{ utils::Vector dbiases; utils::Matrix dinputs; + // Variables for optimizers + utils::Matrix weight_momentums; + utils::Vector bias_momentums; + utils::Matrix weight_cache; + utils::Vector bias_cache; // Default Constructor Dense_Layer() = default; diff --git a/include/modules/neural_networks/neural_networks.h b/include/modules/neural_networks/neural_networks.h index 4260399..2418b71 100644 --- a/include/modules/neural_networks/neural_networks.h +++ b/include/modules/neural_networks/neural_networks.h @@ -4,6 +4,7 @@ #include "datasets/spiral.h" #include "datasets/vertical.h" + #include "layers/Dense_Layer.h" @@ -11,5 +12,10 @@ #include "activation_functions/Activation_Softmax.h" #include "activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h" + #include "loss/Loss.h" // Base #include "loss/Loss_CategoricalCrossentrophy.h" + + +#include "optimizers/Optimizer_SGD.h" +#include "optimizers/Optimizer_Adagrad.h" diff --git a/include/modules/neural_networks/optimizers/Optimizer_Adagrad.h b/include/modules/neural_networks/optimizers/Optimizer_Adagrad.h new file mode 100644 index 0000000..2e02089 --- /dev/null +++ b/include/modules/neural_networks/optimizers/Optimizer_Adagrad.h @@ -0,0 +1,80 @@ +#pragma once + +#include "./core/omp_config.h" + +#include "./utils/vector.h" +#include "./utils/matrix.h" + +#include "./numerics/matmul.h" + +#include + + + + +namespace neural_networks{ + + template + struct Optimizer_Adagrad{ + + T learning_rate = T{1}; + T current_learning_rate = learning_rate; + T decay = T{0}; + T epsilon = T{1e-7}; + uint64_t iterations = 0; + + // Default Constructor + Optimizer_Adagrad() = default; + + // Constructor + explicit Optimizer_Adagrad(const T lr, const T lr_decay, const T epsilons): learning_rate(lr), current_learning_rate{lr}, decay(lr_decay), epsilon(epsilons) {} + + void pre_update_params(){ + if(decay){ + current_learning_rate = learning_rate * (T{1}/(T{1}+(decay*iterations))); + //std::cout << current_learning_rate << std::endl; + } + } + + template + void update_params(Layer& layer){ + + + + // if layer does not contain cache arrays, create them filled with zeros. + if ((layer.weight_cache.rows() != layer.weights.rows()) || (layer.weight_cache.cols() != layer.weights.cols())){ + layer.weight_cache.resize(layer.weights.rows(), layer.weights.cols(), T{0}); + } + if (layer.bias_cache.size() != layer.biases.size()){ + layer.bias_cache.resize(layer.biases.size(), T{0}); + } + + // Update cache with squared current gradients + for (uint64_t i = 0; i < layer.weights.rows(); ++i){ + for (uint64_t j = 0; j < layer.weights.cols(); ++j){ + layer.weight_cache(i,j) = layer.dweights(i,j)*layer.dweights(i,j); + } + } + + for (uint64_t i = 0; i < layer.biases.size(); ++i){ // can maybe be included when updating weights (saves time) + layer.bias_cache[i] = layer.dbiases[i]*layer.dbiases[i]; + } + + // Vanilla SGD parameter update + normalization with squared rooted cache + for (uint64_t i = 0; i < layer.weights.rows(); ++i){ + for (uint64_t j = 0; j < layer.weights.cols(); ++j){ + layer.weights(i,j) -= (current_learning_rate*layer.dweights(i,j)) / (std::sqrt(layer.weight_cache(i,j)) + epsilon); + } + } + for (uint64_t i = 0; i < layer.biases.size(); ++i){ + layer.biases[i] -= (current_learning_rate*layer.dbiases[i]) / (std::sqrt(layer.bias_cache[i]) + epsilon); + } + } + + void post_update_params(){ + iterations++; + } + + }; + +} // end namespace neural_networks \ No newline at end of file diff --git a/include/modules/neural_networks/optimizers/Optimizer_SGD.h b/include/modules/neural_networks/optimizers/Optimizer_SGD.h new file mode 100644 index 0000000..329a8f1 --- /dev/null +++ b/include/modules/neural_networks/optimizers/Optimizer_SGD.h @@ -0,0 +1,97 @@ +#pragma once + +#include "./core/omp_config.h" + +#include "./utils/vector.h" +#include "./utils/matrix.h" + +#include "./numerics/matmul.h" + + + + +namespace neural_networks{ + + template + struct Optimizer_SGD{ + + T learning_rate = T{1}; + T current_learning_rate = learning_rate; + T decay = T{0}; + T momentum = T{0}; + uint64_t iterations = 0; + + utils::Matrix weight_updates; + utils::Vector bias_updates; + + // Default Constructor + Optimizer_SGD() = default; + + // Constructor + explicit Optimizer_SGD(const T lr, const T lr_decay, const T momentums): learning_rate(lr), current_learning_rate{lr}, decay(lr_decay), momentum(momentums) {} + + void pre_update_params(){ + if(decay){ + current_learning_rate = learning_rate * (T{1}/(T{1}+(decay*iterations))); + //std::cout << current_learning_rate << std::endl; + } + } + + template + void update_params(Layer& layer){ + + + // if we use momentum + if(momentum){ + // if layer does not contain momentum arrays, create them filled with zeros. + if ((layer.weight_momentums.rows() != layer.weights.rows()) || (layer.weight_momentums.cols() != layer.weights.cols())){ + layer.weight_momentums.resize(layer.weights.rows(), layer.weights.cols(), T{0}); + } + if (layer.bias_momentums.size() != layer.biases.size()){ + layer.bias_momentums.resize(layer.biases.size(), T{0}); + } + // Build weight updates with momentum - take previous updates, + // multiplied by retain factor and update with current gradients + weight_updates.resize(layer.weights.rows(),layer.weights.cols()); // can be optimized out later + for (uint64_t i = 0; i < layer.weights.rows(); ++i){ + for (uint64_t j = 0; j < layer.weights.cols(); ++j){ + weight_updates(i,j) = (momentum*layer.weight_momentums(i,j)) - (current_learning_rate*layer.dweights(i,j)); + } + } + layer.weight_momentums = weight_updates; + + // Build bias update + bias_updates.resize(layer.biases.size()); // can be optimized out later + for (uint64_t i = 0; i < layer.biases.size(); ++i){ + bias_updates[i] = (momentum*layer.bias_momentums[i]) - (current_learning_rate*layer.dbiases[i]); + } + layer.bias_momentums = bias_updates; + }else{ + weight_updates.resize(layer.weights.rows(),layer.weights.cols()); // can be optimized out later + // weights -= lr * dweights + for (uint64_t i = 0; i < layer.weights.rows(); ++i){ + for (uint64_t j = 0; j < layer.weights.cols(); ++j){ + weight_updates(i,j) -= current_learning_rate*layer.dweights(i,j); + } + } + bias_updates.resize(layer.biases.size()); // can be optimized out later + // biases -= lr * dbiases + for (uint64_t i = 0; i < layer.biases.size(); ++i){ + bias_updates[i] -= current_learning_rate * layer.dbiases[i]; + } + } + + for (uint64_t i = 0; i < layer.weights.rows(); ++i){ + for (uint64_t j = 0; j < layer.weights.cols(); ++j){ + layer.weights(i,j) += weight_updates(i,j); + } + } + } + + void post_update_params(){ + iterations++; + } + + }; + +} // end namespace neural_networks \ No newline at end of file