Dropout Layer

Implemented rng::uniform and rng::binomial for single values, vectors and matrices. implemeted dropout layers and tested it. Also fixed the validation code. Before it used y one place, now it uses y_test as it should.
2026-05-21 15:38:49 +02:00
parent be2c193341
commit eb0a49591e
14 changed files with 395 additions and 116 deletions
@@ -5,6 +5,7 @@
 #include "decomp/decomp.h"

 #include "modules/neural_networks/neural_networks.h"
+#include "random/random.h"



@@ -20,9 +21,8 @@ int main(int argc, char const *argv[])
 {   

    uint64_t number_of_classes = 3;
-    uint64_t number_of_samples = 100;
-    uint64_t number_of_epochs = 1000;
-
+    uint64_t number_of_samples = 1000;
+    uint64_t number_of_epochs = 500;

    utils::Mf X;
    utils::Mf X_test;
@@ -43,26 +43,27 @@ int main(int argc, char const *argv[])

    // Create Dense layer with 2 input featues and 3 output values
    neural_networks::Dense_Layer<float> dense1(
-                                            2, 8,  // input/output
-                                            1e-4f,  // weight L1
-                                            1e-4f,  // weight L2
+                                            2, 16,  // input/output
+                                            0.0f,  // weight L1
+                                            5e-4f,  // weight L2
                                            0.0f,   // bias L1
-                                            0.0f    // bias L2
+                                            5e-4f    // bias L2
                                            );

    // Create ReLU activation (to be used with Dense layer)
    neural_networks::Activation_ReLU<float> activation1;
+    neural_networks::Dropout_Layer<float> dropout1(0.1);



    // Create a second Dense layer with 16 inputs (as we take the vlaues from the last layer)
    // and 16 output values
    neural_networks::Dense_Layer<float> dense2(
-                                            8, 8,  // input/output
-                                            1e-4f,  // weight L1
-                                            1e-4f,  // weight L2
+                                            16, 16,  // input/output
+                                            0.0f,  // weight L1
+                                            5e-4f,  // weight L2
                                            0.0f,   // bias L1
-                                            0.0f    // bias L2
+                                            5e-4f    // bias L2
                                            );
    // Create Softmax activation (to be used with Dense layer)
    neural_networks::Activation_Softmax<float> activation2;
@@ -71,11 +72,11 @@ int main(int argc, char const *argv[])
    // Create a second Dense layer with 3 inputs (as we take the vlaues from the last layer)
    // and 3 output values
    neural_networks::Dense_Layer<float> dense3(
-                                            8, number_of_classes,  // input/output
-                                            1e-4f,  // weight L1
-                                            1e-4f,  // weight L2
+                                            16, number_of_classes,  // input/output
+                                            0.0f,  // weight L1
+                                            5e-4f,  // weight L2
                                            0.0f,   // bias L1
-                                            0.0f    // bias L2
+                                            5e-4f    // bias L2
                                            );

    // Create a Sfotmax classifier's combined loss and activation
@@ -85,7 +86,13 @@ int main(int argc, char const *argv[])
    //neural_networks::Optimizer_SGD<float> optimizer(1, 1e-3, 0.5);
    //neural_networks::Optimizer_Adagrad<float> optimizer(1, 1e-3, 1e-6);
    //neural_networks::Optimizer_RMSprop<float> optimizer(1, 1e-3, 1e-6, 0.9);
-    neural_networks::Optimizer_Adam<float> optimizer(1, 1e-3, 1e-6, 0.9, 0.999);
+    neural_networks::Optimizer_Adam<float> optimizer(
+                                                    0.05,      // Learning-rate
+                                                    5e-5,   // Learning-rate decay
+                                                    1e-6,   // epsilons
+                                                    0.9,    // beta 1 
+                                                    0.999   // beta 2
+                                                    );
    


@@ -98,10 +105,12 @@ int main(int argc, char const *argv[])
        // Perform a forward pass thourgh activation function
        // takes the output fo the first layer here
        activation1.forward(dense1.outputs);
+        dropout1.forward(activation1.outputs);

        // Perform a forward pass through second Dense layer
        // takes output of activation function of the first layer as input
-        dense2.forward(activation1.outputs);
+        dense2.forward(dropout1.outputs);
+

        // Perform a forward pass thourgh activation function
        // takes the output fo the first layer here
@@ -126,7 +135,7 @@ int main(int argc, char const *argv[])
        //predections = numerics::matargmax_row <int64_t, float>(loss_activation.outputs);
        predections = numerics::argmax_rowwise(loss_activation.outputs);

-        if (y.cols() < 1){
+        if (y.cols() > 1){
            class_targets = numerics::argmax_rowwise(y);
        }else{
            class_targets = utils::veccast <uint64_t, int64_t> (y.get_col(0));
@@ -151,7 +160,8 @@ int main(int argc, char const *argv[])
        dense3.backward(loss_activation.dinputs);
        activation2.backward(dense3.dinputs);
        dense2.backward(activation2.dinputs);
-        activation1.backward(dense2.dinputs);
+        dropout1.backward(dense2.dinputs);
+        activation1.backward(dropout1.dinputs);
        dense1.backward(activation1.dinputs);


@@ -192,7 +202,7 @@ int main(int argc, char const *argv[])

    // Perform a foard pass through the activation/loss function
    // takes the output of the second dense layer here and returns loss
-    data_loss = loss_activation.forward(dense3.outputs, y);
+    data_loss = loss_activation.forward(dense3.outputs, y_test);

    // Calculate regularization penalty
    regularization_loss = loss_activation.loss.regularization_loss(dense1) + loss_activation.loss.regularization_loss(dense2) + loss_activation.loss.regularization_loss(dense3);
@@ -202,7 +212,7 @@ int main(int argc, char const *argv[])
    // Calculate accuracy from output of activation2 and targets
    predections = numerics::argmax_rowwise(loss_activation.outputs);

-    if (y.cols() == 1){
+    if (y.cols() > 1){
        class_targets = numerics::argmax_rowwise(y_test);
    }else{
        class_targets = utils::veccast <uint64_t, int64_t> (y_test.get_col(0));