Regulaization

Started on regulaization in Loss.h. I need to refactor the matsum.h since I need a total sum over the matrix. Also matmul needs a elementwise matmul function, which is the next this in the ragulaization
2026-01-03 22:10:50 +01:00
parent 32ba0518fa
commit 48f329feef
17 changed files with 881 additions and 510 deletions
@@ -0,0 +1,81 @@
+#pragma once
+
+#include "./core/omp_config.h"
+
+#include "./utils/vector.h"
+#include "./utils/matrix.h"
+
+#include "./numerics/matmul.h"
+
+#include <math.h>
+
+
+
+
+namespace neural_networks{
+
+	template <typename T>
+	struct Optimizer_RMSprop{
+
+			T learning_rate = T{1};
+			T current_learning_rate = learning_rate;
+			T decay = T{0};
+			T epsilon = T{1e-7};
+			T rho = T{0.9};
+			uint64_t iterations = 0;
+		
+			// Default Constructor
+			Optimizer_RMSprop() = default;
+
+			// Constructor
+	    	explicit Optimizer_RMSprop(const T lr, const T lr_decay, const T epsilons, const T rhos): learning_rate(lr), current_learning_rate{lr}, decay(lr_decay), epsilon(epsilons), rho(rhos) {}
+
+	    	void pre_update_params(){
+	    		if(decay){
+	    			current_learning_rate = learning_rate * (T{1}/(T{1}+(decay*iterations)));
+	    			//std::cout << current_learning_rate << std::endl;
+	    		}
+	    	}
+
+	    	template <typename Layer>
+	        void update_params(Layer& layer){
+
+
+
+        		// if layer does not contain cache arrays, create them filled with zeros.
+        		if ((layer.weight_cache.rows() != layer.weights.rows()) || (layer.weight_cache.cols() != layer.weights.cols())){
+        			layer.weight_cache.resize(layer.weights.rows(), layer.weights.cols(), T{0});
+        		}
+        		if (layer.bias_cache.size() != layer.biases.size()){
+        			layer.bias_cache.resize(layer.biases.size(), T{0});
+        		}
+
+        		// Update cache with squared current gradients
+        		for (uint64_t i = 0; i < layer.weights.rows(); ++i){
+        			for (uint64_t j = 0; j < layer.weights.cols(); ++j){
+        				layer.weight_cache(i,j) = (rho*layer.weight_cache(i,j)) + ((T{1}-rho) * (layer.dweights(i,j)*layer.dweights(i,j)));
+        			}
+        		}
+        		
+        		for (uint64_t i = 0; i < layer.biases.size(); ++i){ // can maybe be included when updating weights (saves time)
+        			layer.bias_cache[i] = (rho*layer.bias_cache[i]) + ((T{1}-rho) * (layer.dbiases[i]*layer.dbiases[i]));
+        		}
+
+        		// Vanilla SGD parameter update + normalization with squared rooted cache
+	        	for (uint64_t i = 0; i < layer.weights.rows(); ++i){
+	        		for (uint64_t j = 0; j < layer.weights.cols(); ++j){
+	        			layer.weights(i,j) -= (current_learning_rate*layer.dweights(i,j)) / (std::sqrt(layer.weight_cache(i,j)) + epsilon);
+	        		}
+	        	}
+	        	for (uint64_t i = 0; i < layer.biases.size(); ++i){
+	        		layer.biases[i] -= (current_learning_rate*layer.dbiases[i]) / (std::sqrt(layer.bias_cache[i]) + epsilon);
+	        	}
+	        }
+
+	    	void post_update_params(){
+	    		iterations++;
+	    	}
+
+	};
+
+} // end namespace neural_networks