diff --git a/include/modules/neural_networks/activation_functions/Activation_Softmax.h b/include/modules/neural_networks/activation_functions/Activation_Softmax.h index 2be0034..ae23ca2 100644 --- a/include/modules/neural_networks/activation_functions/Activation_Softmax.h +++ b/include/modules/neural_networks/activation_functions/Activation_Softmax.h @@ -11,30 +11,47 @@ #include "./numerics/matdiv.h" + namespace neural_networks{ template struct Activation_Softmax{ - utils::Matrix exp_values; - utils::Matrix probabilities; + //utils::Matrix exp_values; + //utils::Matrix probabilities; utils::Matrix outputs; + utils::Matrix dinputs; + void forward(const utils::Matrix& inputs){ // Get unnormalized probabilities - exp_values = numerics::matexp(numerics::matsubtract(inputs, numerics::matmax(inputs, "rows"), "col")); + utils::Matrix exp_values = numerics::matexp(numerics::matsubtract(inputs, numerics::matmax(inputs, "rows"), "col")); // Normalize them for each sample - probabilities = numerics::matdiv(exp_values, numerics::matsum(exp_values, "col"), "col"); - - + utils::Matrix probabilities = numerics::matdiv(exp_values, numerics::matsum(exp_values, "col"), "col"); outputs = probabilities; - } + void backward(const utils::Matrix& dvalues){ + + const uint64_t rows = dvalues.rows(); + const uint64_t cols = dvalues.cols(); + + if ((dinputs.rows() != rows) || dinputs.cols() != cols){ + dinputs.resize(rows, cols); + } + + for (uint64_t i = 0; i < rows; ++i){ + T dot = T{0}; + for (uint64_t j = 0; j < cols; ++j){ + dot += outputs(i,j) * dvalues(i,j); + } + for (uint64_t j = 0; j < cols; ++j){ + dinputs(i,j) = outputs(i,j) * (dvalues(i,j) - dot); + } + } + } }; - - } // end namespace neural_networks \ No newline at end of file diff --git a/include/modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h b/include/modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h new file mode 100644 index 0000000..8816960 --- /dev/null +++ b/include/modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h @@ -0,0 +1,68 @@ +#pragma once + +#include "./core/omp_config.h" + +#include "./utils/vector.h" +#include "./utils/matrix.h" + +#include "./numerics/matmax.h" +#include "./numerics/matsubtract.h" +#include "./numerics/matexp.h" +#include "./numerics/matdiv.h" + +#include "./modules/neural_networks/activation_functions/Activation_Softmax.h" +#include "./modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h" + + +namespace neural_networks{ + + template + struct Activation_Softmax_Loss_CategoricalCrossentropy{ + + neural_networks::Activation_Softmax activation; + neural_networks::Loss_CategoricalCrossentrophy loss; + + //utils::Matrix exp_values; + //utils::Matrix probabilities; + utils::Matrix outputs; + utils::Matrix dinputs; + + utils::Vector forward(const utils::Matrix& inputs, const utils::Matrix& y_true){ + + // Output layer's activation function + activation.forward(inputs); + // Set the output + outputs = activation.outputs; + // Calculate and return loss value + Td data_loss = loss.calculate(inputs, y_true); + return data_loss; + } + + void backward(const utils::Matrix& dvalues, const utils::Matrix& y_true){ + + // Number of samples + const uint64_t samples = y_true.rows(); + + // If the labels are one-hot encoded, + // turn them into discrete values + + const uint64_t rows = dvalues.rows(); + const uint64_t cols = dvalues.cols(); + + if ((dinputs.rows() != rows) || dinputs.cols() != cols){ + dinputs.resize(rows, cols); + } + + for (uint64_t i = 0; i < rows; ++i){ + Td dot = Td{0}; + for (uint64_t j = 0; j < cols; ++j){ + dot += outputs(i,j) * dvalues(i,j); + } + for (uint64_t j = 0; j < cols; ++j){ + dinputs(i,j) = outputs(i,j) * (dvalues(i,j) - dot); + } + } + } + }; + +} // end namespace neural_networks \ No newline at end of file diff --git a/include/modules/neural_networks/loss/Loss.h b/include/modules/neural_networks/loss/Loss.h index 5b3b634..5f72a35 100644 --- a/include/modules/neural_networks/loss/Loss.h +++ b/include/modules/neural_networks/loss/Loss.h @@ -13,9 +13,11 @@ namespace neural_networks{ struct Loss{ utils::Vector sample_losses; + utils::Matrix dinputs; Td data_loss; virtual utils::Vector forward(const utils::Matrix& output, const utils::Matrix& y) = 0; + virtual void backward(const utils::Matrix& dvalues, const utils::Matrix& y) = 0; Td calculate(const utils::Matrix& output, const utils::Matrix& y){ diff --git a/include/modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h b/include/modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h index 33f07ea..94c9a0d 100644 --- a/include/modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h +++ b/include/modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h @@ -16,38 +16,67 @@ namespace neural_networks{ template struct Loss_CategoricalCrossentrophy : Loss { - - utils::Vector forward(const utils::Matrix& y_pred, const utils::Matrix& y_true) override{ - - utils::Vector correct_confidences(y_true.rows(), Td{0}); - utils::Matrix cast_y_true = utils::matcast(y_true); - // Number of samles in a batch - const uint64_t samples = y_true.rows(); + utils::Matrix dinputs; - // Clip data to prevent dividning by 0 - // Clip both sides to not drag mean towards any value - utils::Matrix y_pred_clipped = numerics::matclip(y_pred, Td{1e-7}, Td{1.0} - Td{1e-7}); + + utils::Vector forward(const utils::Matrix& y_pred, const utils::Matrix& y_true) override{ + + utils::Vector correct_confidences(y_true.rows(), Td{0}); + utils::Matrix cast_y_true = utils::matcast(y_true); - // Probabilities for taget values - // only if categorical labes - if (y_true.cols() == 1){ - for (uint64_t i = 0; i < y_true.rows(); ++i){ - const uint64_t idx = static_cast(y_true(i, 0)); - correct_confidences[i] = y_pred_clipped(i, idx); - } - }else{ // Mask values - only for one-hot encoded labels - correct_confidences = numerics::matdot_row(y_pred_clipped, cast_y_true); + // Number of samles in a batch + const uint64_t samples = y_true.rows(); + // Clip data to prevent dividning by 0 + // Clip both sides to not drag mean towards any value + utils::Matrix y_pred_clipped = numerics::matclip(y_pred, Td{1e-7}, Td{1.0} - Td{1e-7}); + + // Probabilities for taget values + // only if categorical labes + if (y_true.cols() == 1){ + for (uint64_t i = 0; i < y_true.rows(); ++i){ + const uint64_t idx = static_cast(y_true(i, 0)); + correct_confidences[i] = y_pred_clipped(i, idx); } - // Losses - utils::Vector negative_log_likelihoods(samples, Td{0}); - for (uint64_t i = 0; i < samples; ++i){ - negative_log_likelihoods[i] = -std::log(static_cast(correct_confidences[i])); - } + }else{ // Mask values - only for one-hot encoded labels + correct_confidences = numerics::matdot_row(y_pred_clipped, cast_y_true); - return negative_log_likelihoods; } + // Losses + utils::Vector negative_log_likelihoods(samples, Td{0}); + for (uint64_t i = 0; i < samples; ++i){ + negative_log_likelihoods[i] = -std::log(static_cast(correct_confidences[i])); + } + + return negative_log_likelihoods; + } + + void backward(const utils::Matrix& dvalues, const utils::Matrix& y_true) override{ + + // Number of samples + const Td samples = static_cast (y_true.rows()); + // Number of labels in every sample + // We'll use the first samle to count them + const Ti labels = dvalues.cols(); + + utils::Matrix y_temp; + + if (y_true.cols() == 1){ + + y_temp = utils::eye(labels, y_true.get_col(0)); + }else{ + y_temp = y_true; + } + + + // Calculate the gradient + numerics::inplace_matscalar(y_temp,Ti{-1}); + dinputs = numerics::matdiv(utils::matcast(y_temp), dvalues); + numerics::inplace_matdiv(dinputs, samples); + + + } }; diff --git a/include/modules/neural_networks/neural_networks.h b/include/modules/neural_networks/neural_networks.h index 85523ed..4260399 100644 --- a/include/modules/neural_networks/neural_networks.h +++ b/include/modules/neural_networks/neural_networks.h @@ -9,6 +9,7 @@ #include "activation_functions/Activation_ReLU.h" #include "activation_functions/Activation_Softmax.h" +#include "activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h" #include "loss/Loss.h" // Base #include "loss/Loss_CategoricalCrossentrophy.h" diff --git a/include/numerics/matdiv.h b/include/numerics/matdiv.h index c8e1312..c4c175e 100644 --- a/include/numerics/matdiv.h +++ b/include/numerics/matdiv.h @@ -8,7 +8,6 @@ namespace numerics{ -// ---------------- Serial baseline ---------------- template utils::Matrix matdiv(const utils::Matrix& A, const utils::Vector& b, std::string method){ @@ -33,6 +32,60 @@ namespace numerics{ } + + + template + void inplace_matdiv(utils::Matrix& A, const utils::Matrix& B){ + + const uint64_t rows = A.rows(); + const uint64_t cols = A.cols(); + + if ((rows != B.rows()) || (cols != B.cols())){ + throw std::runtime_error("inplace_matdiv: rows and cols are not the same'"); + } + + for (uint64_t i = 0; i < rows; ++i){ + for (uint64_t j = 0; j < cols; ++j){ + A(i,j) /= B(i,j); + } + } + } + + + template + utils::Matrix matdiv(const utils::Matrix& A, const utils::Matrix& B){ + + const uint64_t rows = A.rows(); + const uint64_t cols = A.cols(); + + if ((rows != B.rows()) || (cols != B.cols())){ + throw std::runtime_error("matdiv: choose div by: 'row' or 'col'"); + } + + utils::Matrix C = A; + + inplace_matdiv(C, B); + + return C; + } + + template + void inplace_matdiv(utils::Matrix& A, const T b){ + + const uint64_t rows = A.rows(); + const uint64_t cols = A.cols(); + + + for (uint64_t i = 0; i < rows; ++i){ + for (uint64_t j = 0; j < cols; ++j){ + A(i,j) /= b; + } + } + } + + + + } // namespace numerics #endif // _matdiv_n_ \ No newline at end of file diff --git a/include/utils/generators.h b/include/utils/generators.h index 3369d29..e1b8ae8 100644 --- a/include/utils/generators.h +++ b/include/utils/generators.h @@ -2,3 +2,4 @@ #pragma once #include "./utils/generators/linspace.h" +#include "./utils/generators/eye.h" diff --git a/include/utils/generators/eye.h b/include/utils/generators/eye.h new file mode 100644 index 0000000..dbe3227 --- /dev/null +++ b/include/utils/generators/eye.h @@ -0,0 +1,24 @@ +#pragma once + +#include "utils/vector.h" +#include "utils/matrix.h" + + +namespace utils{ + + template + utils::Matrix eye(const T a, const utils::Vector& b){ + + const uint64_t N = b.size(); + utils::Matrix C(N, a, T{0}); + + for (uint64_t i = 0; i < N; ++i){ + C(i, b[i]) = T{1}; + } + return C; + } + + + + +} // end namespace utils \ No newline at end of file