diff --git a/bin/abc_lab b/bin/abc_lab index 45555df..1223353 100755 Binary files a/bin/abc_lab and b/bin/abc_lab differ diff --git a/include/modules/neural_networks/activation_functions/Activation_Softmax.h b/include/modules/neural_networks/activation_functions/Activation_Softmax.h index 2be0034..ae23ca2 100644 --- a/include/modules/neural_networks/activation_functions/Activation_Softmax.h +++ b/include/modules/neural_networks/activation_functions/Activation_Softmax.h @@ -11,30 +11,47 @@ #include "./numerics/matdiv.h" + namespace neural_networks{ template struct Activation_Softmax{ - utils::Matrix exp_values; - utils::Matrix probabilities; + //utils::Matrix exp_values; + //utils::Matrix probabilities; utils::Matrix outputs; + utils::Matrix dinputs; + void forward(const utils::Matrix& inputs){ // Get unnormalized probabilities - exp_values = numerics::matexp(numerics::matsubtract(inputs, numerics::matmax(inputs, "rows"), "col")); + utils::Matrix exp_values = numerics::matexp(numerics::matsubtract(inputs, numerics::matmax(inputs, "rows"), "col")); // Normalize them for each sample - probabilities = numerics::matdiv(exp_values, numerics::matsum(exp_values, "col"), "col"); - - + utils::Matrix probabilities = numerics::matdiv(exp_values, numerics::matsum(exp_values, "col"), "col"); outputs = probabilities; - } + void backward(const utils::Matrix& dvalues){ + + const uint64_t rows = dvalues.rows(); + const uint64_t cols = dvalues.cols(); + + if ((dinputs.rows() != rows) || dinputs.cols() != cols){ + dinputs.resize(rows, cols); + } + + for (uint64_t i = 0; i < rows; ++i){ + T dot = T{0}; + for (uint64_t j = 0; j < cols; ++j){ + dot += outputs(i,j) * dvalues(i,j); + } + for (uint64_t j = 0; j < cols; ++j){ + dinputs(i,j) = outputs(i,j) * (dvalues(i,j) - dot); + } + } + } }; - - } // end namespace neural_networks \ No newline at end of file diff --git a/include/modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h b/include/modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h new file mode 100644 index 0000000..8816960 --- /dev/null +++ b/include/modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h @@ -0,0 +1,68 @@ +#pragma once + +#include "./core/omp_config.h" + +#include "./utils/vector.h" +#include "./utils/matrix.h" + +#include "./numerics/matmax.h" +#include "./numerics/matsubtract.h" +#include "./numerics/matexp.h" +#include "./numerics/matdiv.h" + +#include "./modules/neural_networks/activation_functions/Activation_Softmax.h" +#include "./modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h" + + +namespace neural_networks{ + + template + struct Activation_Softmax_Loss_CategoricalCrossentropy{ + + neural_networks::Activation_Softmax activation; + neural_networks::Loss_CategoricalCrossentrophy loss; + + //utils::Matrix exp_values; + //utils::Matrix probabilities; + utils::Matrix outputs; + utils::Matrix dinputs; + + utils::Vector forward(const utils::Matrix& inputs, const utils::Matrix& y_true){ + + // Output layer's activation function + activation.forward(inputs); + // Set the output + outputs = activation.outputs; + // Calculate and return loss value + Td data_loss = loss.calculate(inputs, y_true); + return data_loss; + } + + void backward(const utils::Matrix& dvalues, const utils::Matrix& y_true){ + + // Number of samples + const uint64_t samples = y_true.rows(); + + // If the labels are one-hot encoded, + // turn them into discrete values + + const uint64_t rows = dvalues.rows(); + const uint64_t cols = dvalues.cols(); + + if ((dinputs.rows() != rows) || dinputs.cols() != cols){ + dinputs.resize(rows, cols); + } + + for (uint64_t i = 0; i < rows; ++i){ + Td dot = Td{0}; + for (uint64_t j = 0; j < cols; ++j){ + dot += outputs(i,j) * dvalues(i,j); + } + for (uint64_t j = 0; j < cols; ++j){ + dinputs(i,j) = outputs(i,j) * (dvalues(i,j) - dot); + } + } + } + }; + +} // end namespace neural_networks \ No newline at end of file diff --git a/include/modules/neural_networks/loss/Loss.h b/include/modules/neural_networks/loss/Loss.h index 5b3b634..5f72a35 100644 --- a/include/modules/neural_networks/loss/Loss.h +++ b/include/modules/neural_networks/loss/Loss.h @@ -13,9 +13,11 @@ namespace neural_networks{ struct Loss{ utils::Vector sample_losses; + utils::Matrix dinputs; Td data_loss; virtual utils::Vector forward(const utils::Matrix& output, const utils::Matrix& y) = 0; + virtual void backward(const utils::Matrix& dvalues, const utils::Matrix& y) = 0; Td calculate(const utils::Matrix& output, const utils::Matrix& y){ diff --git a/include/modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h b/include/modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h index 33f07ea..94c9a0d 100644 --- a/include/modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h +++ b/include/modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h @@ -16,38 +16,67 @@ namespace neural_networks{ template struct Loss_CategoricalCrossentrophy : Loss { - - utils::Vector forward(const utils::Matrix& y_pred, const utils::Matrix& y_true) override{ - - utils::Vector correct_confidences(y_true.rows(), Td{0}); - utils::Matrix cast_y_true = utils::matcast(y_true); - // Number of samles in a batch - const uint64_t samples = y_true.rows(); + utils::Matrix dinputs; - // Clip data to prevent dividning by 0 - // Clip both sides to not drag mean towards any value - utils::Matrix y_pred_clipped = numerics::matclip(y_pred, Td{1e-7}, Td{1.0} - Td{1e-7}); + + utils::Vector forward(const utils::Matrix& y_pred, const utils::Matrix& y_true) override{ + + utils::Vector correct_confidences(y_true.rows(), Td{0}); + utils::Matrix cast_y_true = utils::matcast(y_true); - // Probabilities for taget values - // only if categorical labes - if (y_true.cols() == 1){ - for (uint64_t i = 0; i < y_true.rows(); ++i){ - const uint64_t idx = static_cast(y_true(i, 0)); - correct_confidences[i] = y_pred_clipped(i, idx); - } - }else{ // Mask values - only for one-hot encoded labels - correct_confidences = numerics::matdot_row(y_pred_clipped, cast_y_true); + // Number of samles in a batch + const uint64_t samples = y_true.rows(); + // Clip data to prevent dividning by 0 + // Clip both sides to not drag mean towards any value + utils::Matrix y_pred_clipped = numerics::matclip(y_pred, Td{1e-7}, Td{1.0} - Td{1e-7}); + + // Probabilities for taget values + // only if categorical labes + if (y_true.cols() == 1){ + for (uint64_t i = 0; i < y_true.rows(); ++i){ + const uint64_t idx = static_cast(y_true(i, 0)); + correct_confidences[i] = y_pred_clipped(i, idx); } - // Losses - utils::Vector negative_log_likelihoods(samples, Td{0}); - for (uint64_t i = 0; i < samples; ++i){ - negative_log_likelihoods[i] = -std::log(static_cast(correct_confidences[i])); - } + }else{ // Mask values - only for one-hot encoded labels + correct_confidences = numerics::matdot_row(y_pred_clipped, cast_y_true); - return negative_log_likelihoods; } + // Losses + utils::Vector negative_log_likelihoods(samples, Td{0}); + for (uint64_t i = 0; i < samples; ++i){ + negative_log_likelihoods[i] = -std::log(static_cast(correct_confidences[i])); + } + + return negative_log_likelihoods; + } + + void backward(const utils::Matrix& dvalues, const utils::Matrix& y_true) override{ + + // Number of samples + const Td samples = static_cast (y_true.rows()); + // Number of labels in every sample + // We'll use the first samle to count them + const Ti labels = dvalues.cols(); + + utils::Matrix y_temp; + + if (y_true.cols() == 1){ + + y_temp = utils::eye(labels, y_true.get_col(0)); + }else{ + y_temp = y_true; + } + + + // Calculate the gradient + numerics::inplace_matscalar(y_temp,Ti{-1}); + dinputs = numerics::matdiv(utils::matcast(y_temp), dvalues); + numerics::inplace_matdiv(dinputs, samples); + + + } }; diff --git a/include/modules/neural_networks/neural_networks.h b/include/modules/neural_networks/neural_networks.h index 85523ed..4260399 100644 --- a/include/modules/neural_networks/neural_networks.h +++ b/include/modules/neural_networks/neural_networks.h @@ -9,6 +9,7 @@ #include "activation_functions/Activation_ReLU.h" #include "activation_functions/Activation_Softmax.h" +#include "activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h" #include "loss/Loss.h" // Base #include "loss/Loss_CategoricalCrossentrophy.h" diff --git a/include/numerics/matdiv.h b/include/numerics/matdiv.h index c8e1312..c4c175e 100644 --- a/include/numerics/matdiv.h +++ b/include/numerics/matdiv.h @@ -8,7 +8,6 @@ namespace numerics{ -// ---------------- Serial baseline ---------------- template utils::Matrix matdiv(const utils::Matrix& A, const utils::Vector& b, std::string method){ @@ -33,6 +32,60 @@ namespace numerics{ } + + + template + void inplace_matdiv(utils::Matrix& A, const utils::Matrix& B){ + + const uint64_t rows = A.rows(); + const uint64_t cols = A.cols(); + + if ((rows != B.rows()) || (cols != B.cols())){ + throw std::runtime_error("inplace_matdiv: rows and cols are not the same'"); + } + + for (uint64_t i = 0; i < rows; ++i){ + for (uint64_t j = 0; j < cols; ++j){ + A(i,j) /= B(i,j); + } + } + } + + + template + utils::Matrix matdiv(const utils::Matrix& A, const utils::Matrix& B){ + + const uint64_t rows = A.rows(); + const uint64_t cols = A.cols(); + + if ((rows != B.rows()) || (cols != B.cols())){ + throw std::runtime_error("matdiv: choose div by: 'row' or 'col'"); + } + + utils::Matrix C = A; + + inplace_matdiv(C, B); + + return C; + } + + template + void inplace_matdiv(utils::Matrix& A, const T b){ + + const uint64_t rows = A.rows(); + const uint64_t cols = A.cols(); + + + for (uint64_t i = 0; i < rows; ++i){ + for (uint64_t j = 0; j < cols; ++j){ + A(i,j) /= b; + } + } + } + + + + } // namespace numerics #endif // _matdiv_n_ \ No newline at end of file diff --git a/include/utils/generators.h b/include/utils/generators.h index 3369d29..e1b8ae8 100644 --- a/include/utils/generators.h +++ b/include/utils/generators.h @@ -2,3 +2,4 @@ #pragma once #include "./utils/generators/linspace.h" +#include "./utils/generators/eye.h" diff --git a/include/utils/generators/eye.h b/include/utils/generators/eye.h new file mode 100644 index 0000000..dbe3227 --- /dev/null +++ b/include/utils/generators/eye.h @@ -0,0 +1,24 @@ +#pragma once + +#include "utils/vector.h" +#include "utils/matrix.h" + + +namespace utils{ + + template + utils::Matrix eye(const T a, const utils::Vector& b){ + + const uint64_t N = b.size(); + utils::Matrix C(N, a, T{0}); + + for (uint64_t i = 0; i < N; ++i){ + C(i, b[i]) = T{1}; + } + return C; + } + + + + +} // end namespace utils \ No newline at end of file diff --git a/obj/main.d b/obj/main.d index 7fffdb3..941cdc8 100644 --- a/obj/main.d +++ b/obj/main.d @@ -2,7 +2,8 @@ obj/main.o: src/main.cpp include/./core/omp_config.h \ include/./utils/utils.h include/./utils/vector.h \ include/./utils/random.h include/./utils/matrix.h \ include/./utils/generators.h include/./utils/generators/linspace.h \ - include/utils/vector.h include/./utils/matcast.h \ + include/utils/vector.h include/./utils/generators/eye.h \ + include/utils/matrix.h include/./utils/matcast.h \ include/./numerics/numerics.h include/./numerics/max.h \ include/./numerics/exp.h include/./numerics/log.h \ include/./numerics/vecclip.h include/./numerics/vecexp.h \ @@ -30,16 +31,16 @@ obj/main.o: src/main.cpp include/./core/omp_config.h \ include/./decomp/decomp.h include/./modules/mesh/mesh.h \ include/modules/mesh/mesh1d.h include/modules/fluids/fluids.h \ include/modules/fluids/diffusion1d.h include/core/global_config.h \ - include/utils/matrix.h \ include/./modules/neural_networks/neural_networks.h \ include/./modules/neural_networks/datasets/spiral.h \ include/./modules/neural_networks/datasets/vertical.h \ include/./modules/neural_networks/layers/Dense_Layer.h \ include/./modules/neural_networks/activation_functions/Activation_ReLU.h \ include/./modules/neural_networks/activation_functions/Activation_Softmax.h \ - include/./modules/neural_networks/loss/Loss.h \ - include/./numerics/vecmean.h \ - include/./modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h + include/./modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h \ + include/./modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h \ + include/./modules/neural_networks/loss/./Loss.h \ + include/./numerics/vecmean.h include/./core/omp_config.h: include/./utils/utils.h: include/./utils/vector.h: @@ -48,6 +49,8 @@ include/./utils/matrix.h: include/./utils/generators.h: include/./utils/generators/linspace.h: include/utils/vector.h: +include/./utils/generators/eye.h: +include/utils/matrix.h: include/./utils/matcast.h: include/./numerics/numerics.h: include/./numerics/max.h: @@ -95,13 +98,13 @@ include/modules/mesh/mesh1d.h: include/modules/fluids/fluids.h: include/modules/fluids/diffusion1d.h: include/core/global_config.h: -include/utils/matrix.h: include/./modules/neural_networks/neural_networks.h: include/./modules/neural_networks/datasets/spiral.h: include/./modules/neural_networks/datasets/vertical.h: include/./modules/neural_networks/layers/Dense_Layer.h: include/./modules/neural_networks/activation_functions/Activation_ReLU.h: include/./modules/neural_networks/activation_functions/Activation_Softmax.h: -include/./modules/neural_networks/loss/Loss.h: -include/./numerics/vecmean.h: +include/./modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h: include/./modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h: +include/./modules/neural_networks/loss/./Loss.h: +include/./numerics/vecmean.h: diff --git a/obj/main.o b/obj/main.o index 8511dde..93e6749 100644 Binary files a/obj/main.o and b/obj/main.o differ diff --git a/src/main.cpp b/src/main.cpp index e85ed5f..0a77c81 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -24,20 +24,20 @@ int main(int argc, char const *argv[]) { utils::Mf X(10,2, 0); - utils::Matrix y(10,1, 0); - utils::Vector class_targets; + utils::Matrix y(10,1, 0); + utils::Vector class_targets; float loss; float accuracy; //neural_networks::create_spital_data(10000, 3, X, y); - neural_networks::create_vertical_data(100, 3, X, y); + neural_networks::create_vertical_data(100, 3, X, y); neural_networks::Dense_Layer dense1(2, 3); neural_networks::Activation_ReLU activation1; neural_networks::Dense_Layer dense2(3, 3); neural_networks::Activation_Softmax activation2; - neural_networks::Loss_CategoricalCrossentrophy loss_funtion; + neural_networks::Loss_CategoricalCrossentrophy loss_funtion; float lowest_loss = 9999999; utils::Mf best_dense_1_weights = dense1.weights; @@ -47,7 +47,7 @@ int main(int argc, char const *argv[]) utils::Vf vectRND; - utils::Vector predections; + utils::Vector predections; @@ -73,10 +73,10 @@ int main(int argc, char const *argv[]) // it takes the output of the second dense layer here and returns loss loss = loss_funtion.calculate(activation2.outputs, y); - predections = numerics::matargmax_row(activation2.outputs); + predections = numerics::matargmax_row(activation2.outputs); if (y.cols() < 1){ - class_targets = numerics::matargmax_row(y); + class_targets = numerics::matargmax_row(y); }else{ class_targets = y.get_col(0); }