Optimizers - Part 1
Sync public mirror / sync (push) Failing after 24s

Done with SGD and Adagrad, still need to optimize them but they work.
This commit is contained in:
2026-01-01 19:23:48 +01:00
parent bd2edea8ef
commit e5f8c91be4
12 changed files with 299 additions and 133 deletions
+67 -102
View File
@@ -19,131 +19,96 @@
int main(int argc, char const *argv[])
{
utils::Mf X(10,2, 0);
utils::Matrix<int64_t> y(10,1, 0);
uint64_t number_of_classes = 5;
uint64_t number_of_samples = 100;
uint64_t number_of_epochs = 100;
utils::Mf X;
utils::Matrix<int64_t> y;
utils::Vector<int64_t> class_targets;
float loss;
float accuracy;
//neural_networks::create_spital_data<float, uint64_t>(10000, 3, X, y);
neural_networks::create_vertical_data<float, int64_t>(100, 3, X, y);
neural_networks::Dense_Layer<float> dense1(2, 3);
neural_networks::Activation_ReLU<float> activation1;
neural_networks::Dense_Layer<float> dense2(3, 3);
neural_networks::Activation_Softmax<float> activation2;
neural_networks::Loss_CategoricalCrossentrophy<float, int64_t> loss_funtion;
float lowest_loss = 9999999;
utils::Mf best_dense_1_weights = dense1.weights;
utils::Vf best_dense_1_biases = dense1.biases;
utils::Mf best_dense_2_weights = dense2.weights;
utils::Vf best_dense_2_biases = dense2.biases;
utils::Vf vectRND;
utils::Vector<int64_t> predections;
// Create dataset
neural_networks::create_spital_data<float, int64_t>(number_of_samples, number_of_classes, X, y);
//neural_networks::create_vertical_data<float, int64_t>(number_of_samples, number_of_classes, X, y);
// Create Dense layer with 2 input featues and 3 output values
neural_networks::Dense_Layer<float> dense1(2, 64);
// Create ReLU activation (to be used with Dense layer)
neural_networks::Activation_ReLU<float> activation1;
// Create a second Dense layer with 3 inputs (as we take the vlaues from the last layer)
// and 3 output values
neural_networks::Dense_Layer<float> dense2(64, number_of_classes);
// Create a Sfotmax classifier's combined loss and activation
neural_networks::Activation_Softmax_Loss_CategoricalCrossentropy<float, int64_t> loss_activation;
// Create optimizer
//neural_networks::Optimizer_SGD<float> optimizer(1, 1e-3, 0.5);
neural_networks::Optimizer_Adagrad<float> optimizer(1, 1e-3, 1e-6);
// Train in loop
for (uint64_t epoch = 0; epoch < number_of_epochs+1; ++epoch){
for (uint64_t i = 0; i < 10; ++i){
// Perform a forward pass of our training data through this layer
dense1.forward(X);
// Perform a forward pass thourgh activation function
// takes the output fo the first layer here
activation1.forward(dense1.outputs);
// Generate a new set of weights for iteration
numerics::inplace_matrandom_mul(dense1.weights,0.98f, 1.02f);
numerics::inplace_vecrandom_mul(dense1.biases,0.98f, 1.02f);
// Perform a forward pass through second Dense layer
// takes output of activation function of the first layer as input
dense2.forward(activation1.outputs);
numerics::inplace_matrandom_mul(dense2.weights,0.98f, 1.02f);
numerics::inplace_vecrandom_mul(dense2.biases,0.98f, 1.02f);
// Perform a forward pass of the training data through this layer
dense1.forward(X);
activation1.forward(dense1.outputs);
dense2.forward(activation1.outputs);
activation2.forward(dense2.outputs);
// Perform a farward pass through activation function
// it takes the output of the second dense layer here and returns loss
loss = loss_funtion.calculate(activation2.outputs, y);
predections = numerics::matargmax_row<int64_t, float>(activation2.outputs);
if (y.cols() < 1){
class_targets = numerics::matargmax_row<int64_t, int64_t>(y);
}else{
class_targets = y.get_col(0);
}
accuracy = numerics::vecmean_equal<float>(predections, class_targets);
if (loss < lowest_loss){
//std::cout << "New set of weights found, iteration:" << i << ", loss:" << loss << ", acc:" << accuracy << std::endl;
best_dense_1_weights = dense1.weights;
best_dense_1_biases = dense1.biases;
best_dense_2_weights = dense2.weights;
best_dense_2_biases = dense2.biases;
lowest_loss = loss;
} else{
//std::cout << "HERE" << std::endl;
dense1.weights = best_dense_1_weights;
dense1.biases = best_dense_1_biases;
dense2.weights = best_dense_2_weights;
dense2.biases = best_dense_2_biases;
}
// Perform a foard pass through the activation/loss function
// takes the output of the second dense layer here and returns loss
loss = loss_activation.forward(dense2.outputs, y);
// Calculate accuracy from output of activation2 and targets
predections = numerics::matargmax_row<int64_t, float>(loss_activation.outputs);
if (y.cols() < 1){
class_targets = numerics::matargmax_row<int64_t, int64_t>(y);
}else{
class_targets = y.get_col(0);
}
//std::cout << loss << std::endl;
//std::cout << accuracy << std::endl;
utils::Matrix<float> softmax_outputs{{0.7, 0.1, 0.2},
{0.1, 0.5, 0.4},
{0.02, 0.9, 0.08}};
utils::Matrix<int64_t> clas_targets{{0},{1},{1}};
neural_networks::Activation_Softmax_Loss_CategoricalCrossentropy<float, int64_t> softmax_loss;
softmax_loss.backward(softmax_outputs, clas_targets);
utils::Matrix<float> dvalues1 = softmax_loss.dinputs;
neural_networks::Activation_Softmax<float> activation;
activation.outputs = softmax_outputs;
//neural_networks::Loss_CategoricalCrossentrophy<float, int64_t> loss;
accuracy = numerics::vecmean_equal<float>(predections, class_targets);
dvalues1.print();
if (!(epoch%100)){
std::cout << "epoch: " << epoch;
std::cout << ", acc: " << accuracy;
std::cout << ", loss: " << loss;
std::cout << ", lr: " << optimizer.current_learning_rate;
std::cout << std::endl;
}
// Backward pass
loss_activation.backward(loss_activation.outputs, y);
dense2.backward(loss_activation.dinputs);
activation1.backward(dense2.dinputs);
dense1.backward(activation1.dinputs);
// Update weights and biases
optimizer.pre_update_params();
optimizer.update_params(dense1);
optimizer.update_params(dense2);
optimizer.post_update_params();
/*
utils::Vd a = utils::linspace<double>(1, 10, 10, true);
a.print();
mesh::Mesh1D<double> mesh(a);
mesh.generate_vertices(0.5, 10.5);
double Gamma = 1.0;
utils::Md A;
utils::Vd b, s(10,1);
core::Configs<double>& cfg = core::Configs<double>::defaults();
cfg.grid = core::GridKind::Uniform;
cfg.left = {core::FDKind::Forward, core::BCKind::Neumann, 0.0};
cfg.right = {core::FDKind::Backward, core::BCKind::Neumann, 0.0};
cfg.solver = core::SolverKind::LU;
fluids::Diffusion1D<double> diffusion(cfg, mesh, Gamma);
diffusion.assemble(A, b, s);
*/
}
return 0;
}