Backpass on reg

Made backpass on regulariaztion and made it easy to see in main.cpp
2026-05-16 21:34:34 +02:00
parent d2fe8aa65c
commit eb2374eaf5
5 changed files with 180 additions and 104 deletions
@@ -28,6 +28,8 @@ int main(int argc, char const *argv[])
    utils::Mf X_test;
    utils::Matrix<int64_t> y;
    utils::Matrix<int64_t> y_test;
+    float data_loss;
+    float regularization_loss;
    float loss;
    float accuracy;

@@ -40,7 +42,13 @@ int main(int argc, char const *argv[])
    //neural_networks::create_vertical_data<float, int64_t>(number_of_samples, number_of_classes, X, y);

    // Create Dense layer with 2 input featues and 3 output values
-    neural_networks::Dense_Layer<float> dense1(2, 16);
+    neural_networks::Dense_Layer<float> dense1(
+                                            2, 8,  // input/output
+                                            1e-4f,  // weight L1
+                                            1e-4f,  // weight L2
+                                            0.0f,   // bias L1
+                                            0.0f    // bias L2
+                                            );

    // Create ReLU activation (to be used with Dense layer)
    neural_networks::Activation_ReLU<float> activation1;
@@ -49,14 +57,26 @@ int main(int argc, char const *argv[])

    // Create a second Dense layer with 16 inputs (as we take the vlaues from the last layer)
    // and 16 output values
-    neural_networks::Dense_Layer<float> dense2(16, 16);
+    neural_networks::Dense_Layer<float> dense2(
+                                            8, 8,  // input/output
+                                            1e-4f,  // weight L1
+                                            1e-4f,  // weight L2
+                                            0.0f,   // bias L1
+                                            0.0f    // bias L2
+                                            );
    // Create Softmax activation (to be used with Dense layer)
    neural_networks::Activation_Softmax<float> activation2;


    // Create a second Dense layer with 3 inputs (as we take the vlaues from the last layer)
    // and 3 output values
-    neural_networks::Dense_Layer<float> dense3(16, number_of_classes);
+    neural_networks::Dense_Layer<float> dense3(
+                                            8, number_of_classes,  // input/output
+                                            1e-4f,  // weight L1
+                                            1e-4f,  // weight L2
+                                            0.0f,   // bias L1
+                                            0.0f    // bias L2
+                                            );

    // Create a Sfotmax classifier's combined loss and activation
    neural_networks::Activation_Softmax_Loss_CategoricalCrossentropy<float, int64_t> loss_activation;
@@ -95,8 +115,12 @@ int main(int argc, char const *argv[])

        // Perform a foard pass through the activation/loss function
        // takes the output of the second dense layer here and returns loss
-        loss = loss_activation.forward(dense2.outputs, y);
-        loss_activation.loss.regularization_loss(dense1);
+        data_loss = loss_activation.forward(dense3.outputs, y);
+
+        // Calculate regularization penalty
+        regularization_loss = loss_activation.loss.regularization_loss(dense1) + loss_activation.loss.regularization_loss(dense2) + loss_activation.loss.regularization_loss(dense3);
+
+        loss = data_loss + regularization_loss;

        // Calculate accuracy from output of activation2 and targets
        //predections = numerics::matargmax_row <int64_t, float>(loss_activation.outputs);
@@ -116,13 +140,17 @@ int main(int argc, char const *argv[])
            std::cout << "epoch: " << epoch;
            std::cout << ", acc: " << accuracy;
            std::cout << ", loss: " << loss;
+            std::cout << ", data_loss: " << data_loss;
+            std::cout << ", regularization_loss: " << regularization_loss;
            std::cout << ", lr: " << optimizer.current_learning_rate;
            std::cout << std::endl;
        }

        // Backward pass
        loss_activation.backward(loss_activation.outputs, y);
-        dense2.backward(loss_activation.dinputs);
+        dense3.backward(loss_activation.dinputs);
+        activation2.backward(dense3.dinputs);
+        dense2.backward(activation2.dinputs);
        activation1.backward(dense2.dinputs);
        dense1.backward(activation1.dinputs);

@@ -131,6 +159,7 @@ int main(int argc, char const *argv[])
        optimizer.pre_update_params();
        optimizer.update_params(dense1);
        optimizer.update_params(dense2);
+        optimizer.update_params(dense3);
        optimizer.post_update_params();

    }
@@ -163,7 +192,12 @@ int main(int argc, char const *argv[])

    // Perform a foard pass through the activation/loss function
    // takes the output of the second dense layer here and returns loss
-    loss = loss_activation.forward(dense3.outputs, y_test);
+    data_loss = loss_activation.forward(dense3.outputs, y);
+
+    // Calculate regularization penalty
+    regularization_loss = loss_activation.loss.regularization_loss(dense1) + loss_activation.loss.regularization_loss(dense2) + loss_activation.loss.regularization_loss(dense3);
+
+    loss = data_loss + regularization_loss;

    // Calculate accuracy from output of activation2 and targets
    predections = numerics::argmax_rowwise(loss_activation.outputs);