Activation Softmax with CategoricalCrossentrophy is almost done from page. 234
This commit is contained in:
BIN
Binary file not shown.
@@ -11,30 +11,47 @@
|
|||||||
#include "./numerics/matdiv.h"
|
#include "./numerics/matdiv.h"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
namespace neural_networks{
|
namespace neural_networks{
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct Activation_Softmax{
|
struct Activation_Softmax{
|
||||||
|
|
||||||
utils::Matrix<T> exp_values;
|
//utils::Matrix<T> exp_values;
|
||||||
utils::Matrix<T> probabilities;
|
//utils::Matrix<T> probabilities;
|
||||||
utils::Matrix<T> outputs;
|
utils::Matrix<T> outputs;
|
||||||
|
|
||||||
|
utils::Matrix<T> dinputs;
|
||||||
|
|
||||||
void forward(const utils::Matrix<T>& inputs){
|
void forward(const utils::Matrix<T>& inputs){
|
||||||
|
|
||||||
// Get unnormalized probabilities
|
// Get unnormalized probabilities
|
||||||
exp_values = numerics::matexp(numerics::matsubtract(inputs, numerics::matmax(inputs, "rows"), "col"));
|
utils::Matrix<T> exp_values = numerics::matexp(numerics::matsubtract(inputs, numerics::matmax(inputs, "rows"), "col"));
|
||||||
|
|
||||||
// Normalize them for each sample
|
// Normalize them for each sample
|
||||||
probabilities = numerics::matdiv(exp_values, numerics::matsum(exp_values, "col"), "col");
|
utils::Matrix<T> probabilities = numerics::matdiv(exp_values, numerics::matsum(exp_values, "col"), "col");
|
||||||
|
|
||||||
|
|
||||||
outputs = probabilities;
|
outputs = probabilities;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void backward(const utils::Matrix<T>& dvalues){
|
||||||
|
|
||||||
|
const uint64_t rows = dvalues.rows();
|
||||||
|
const uint64_t cols = dvalues.cols();
|
||||||
|
|
||||||
|
if ((dinputs.rows() != rows) || dinputs.cols() != cols){
|
||||||
|
dinputs.resize(rows, cols);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < rows; ++i){
|
||||||
|
T dot = T{0};
|
||||||
|
for (uint64_t j = 0; j < cols; ++j){
|
||||||
|
dot += outputs(i,j) * dvalues(i,j);
|
||||||
|
}
|
||||||
|
for (uint64_t j = 0; j < cols; ++j){
|
||||||
|
dinputs(i,j) = outputs(i,j) * (dvalues(i,j) - dot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // end namespace neural_networks
|
} // end namespace neural_networks
|
||||||
+68
@@ -0,0 +1,68 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "./core/omp_config.h"
|
||||||
|
|
||||||
|
#include "./utils/vector.h"
|
||||||
|
#include "./utils/matrix.h"
|
||||||
|
|
||||||
|
#include "./numerics/matmax.h"
|
||||||
|
#include "./numerics/matsubtract.h"
|
||||||
|
#include "./numerics/matexp.h"
|
||||||
|
#include "./numerics/matdiv.h"
|
||||||
|
|
||||||
|
#include "./modules/neural_networks/activation_functions/Activation_Softmax.h"
|
||||||
|
#include "./modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace neural_networks{
|
||||||
|
|
||||||
|
template <typename Td, typename Ti>
|
||||||
|
struct Activation_Softmax_Loss_CategoricalCrossentropy{
|
||||||
|
|
||||||
|
neural_networks::Activation_Softmax<Td> activation;
|
||||||
|
neural_networks::Loss_CategoricalCrossentrophy<Td, Ti> loss;
|
||||||
|
|
||||||
|
//utils::Matrix<T> exp_values;
|
||||||
|
//utils::Matrix<T> probabilities;
|
||||||
|
utils::Matrix<Td> outputs;
|
||||||
|
utils::Matrix<Td> dinputs;
|
||||||
|
|
||||||
|
utils::Vector<Td> forward(const utils::Matrix<Td>& inputs, const utils::Matrix<Ti>& y_true){
|
||||||
|
|
||||||
|
// Output layer's activation function
|
||||||
|
activation.forward(inputs);
|
||||||
|
// Set the output
|
||||||
|
outputs = activation.outputs;
|
||||||
|
// Calculate and return loss value
|
||||||
|
Td data_loss = loss.calculate(inputs, y_true);
|
||||||
|
return data_loss;
|
||||||
|
}
|
||||||
|
|
||||||
|
void backward(const utils::Matrix<Td>& dvalues, const utils::Matrix<Ti>& y_true){
|
||||||
|
|
||||||
|
// Number of samples
|
||||||
|
const uint64_t samples = y_true.rows();
|
||||||
|
|
||||||
|
// If the labels are one-hot encoded,
|
||||||
|
// turn them into discrete values
|
||||||
|
|
||||||
|
const uint64_t rows = dvalues.rows();
|
||||||
|
const uint64_t cols = dvalues.cols();
|
||||||
|
|
||||||
|
if ((dinputs.rows() != rows) || dinputs.cols() != cols){
|
||||||
|
dinputs.resize(rows, cols);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < rows; ++i){
|
||||||
|
Td dot = Td{0};
|
||||||
|
for (uint64_t j = 0; j < cols; ++j){
|
||||||
|
dot += outputs(i,j) * dvalues(i,j);
|
||||||
|
}
|
||||||
|
for (uint64_t j = 0; j < cols; ++j){
|
||||||
|
dinputs(i,j) = outputs(i,j) * (dvalues(i,j) - dot);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace neural_networks
|
||||||
@@ -13,9 +13,11 @@ namespace neural_networks{
|
|||||||
struct Loss{
|
struct Loss{
|
||||||
|
|
||||||
utils::Vector<Td> sample_losses;
|
utils::Vector<Td> sample_losses;
|
||||||
|
utils::Matrix<Td> dinputs;
|
||||||
Td data_loss;
|
Td data_loss;
|
||||||
|
|
||||||
virtual utils::Vector<Td> forward(const utils::Matrix<Td>& output, const utils::Matrix<Ti>& y) = 0;
|
virtual utils::Vector<Td> forward(const utils::Matrix<Td>& output, const utils::Matrix<Ti>& y) = 0;
|
||||||
|
virtual void backward(const utils::Matrix<Td>& dvalues, const utils::Matrix<Ti>& y) = 0;
|
||||||
|
|
||||||
Td calculate(const utils::Matrix<Td>& output, const utils::Matrix<Ti>& y){
|
Td calculate(const utils::Matrix<Td>& output, const utils::Matrix<Ti>& y){
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,9 @@ namespace neural_networks{
|
|||||||
template <typename Td, typename Ti>
|
template <typename Td, typename Ti>
|
||||||
struct Loss_CategoricalCrossentrophy : Loss<Td, Ti> {
|
struct Loss_CategoricalCrossentrophy : Loss<Td, Ti> {
|
||||||
|
|
||||||
|
utils::Matrix<Td> dinputs;
|
||||||
|
|
||||||
|
|
||||||
utils::Vector<Td> forward(const utils::Matrix<Td>& y_pred, const utils::Matrix<Ti>& y_true) override{
|
utils::Vector<Td> forward(const utils::Matrix<Td>& y_pred, const utils::Matrix<Ti>& y_true) override{
|
||||||
|
|
||||||
utils::Vector<Td> correct_confidences(y_true.rows(), Td{0});
|
utils::Vector<Td> correct_confidences(y_true.rows(), Td{0});
|
||||||
@@ -48,6 +51,32 @@ namespace neural_networks{
|
|||||||
|
|
||||||
return negative_log_likelihoods;
|
return negative_log_likelihoods;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void backward(const utils::Matrix<Td>& dvalues, const utils::Matrix<Ti>& y_true) override{
|
||||||
|
|
||||||
|
// Number of samples
|
||||||
|
const Td samples = static_cast<Td> (y_true.rows());
|
||||||
|
// Number of labels in every sample
|
||||||
|
// We'll use the first samle to count them
|
||||||
|
const Ti labels = dvalues.cols();
|
||||||
|
|
||||||
|
utils::Matrix<Ti> y_temp;
|
||||||
|
|
||||||
|
if (y_true.cols() == 1){
|
||||||
|
|
||||||
|
y_temp = utils::eye(labels, y_true.get_col(0));
|
||||||
|
}else{
|
||||||
|
y_temp = y_true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Calculate the gradient
|
||||||
|
numerics::inplace_matscalar(y_temp,Ti{-1});
|
||||||
|
dinputs = numerics::matdiv(utils::matcast<Td, Ti>(y_temp), dvalues);
|
||||||
|
numerics::inplace_matdiv(dinputs, samples);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
#include "activation_functions/Activation_ReLU.h"
|
#include "activation_functions/Activation_ReLU.h"
|
||||||
#include "activation_functions/Activation_Softmax.h"
|
#include "activation_functions/Activation_Softmax.h"
|
||||||
|
#include "activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h"
|
||||||
|
|
||||||
#include "loss/Loss.h" // Base
|
#include "loss/Loss.h" // Base
|
||||||
#include "loss/Loss_CategoricalCrossentrophy.h"
|
#include "loss/Loss_CategoricalCrossentrophy.h"
|
||||||
|
|||||||
@@ -8,7 +8,6 @@
|
|||||||
|
|
||||||
namespace numerics{
|
namespace numerics{
|
||||||
|
|
||||||
// ---------------- Serial baseline ----------------
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
utils::Matrix<T> matdiv(const utils::Matrix<T>& A, const utils::Vector<T>& b, std::string method){
|
utils::Matrix<T> matdiv(const utils::Matrix<T>& A, const utils::Vector<T>& b, std::string method){
|
||||||
|
|
||||||
@@ -33,6 +32,60 @@ namespace numerics{
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void inplace_matdiv(utils::Matrix<T>& A, const utils::Matrix<T>& B){
|
||||||
|
|
||||||
|
const uint64_t rows = A.rows();
|
||||||
|
const uint64_t cols = A.cols();
|
||||||
|
|
||||||
|
if ((rows != B.rows()) || (cols != B.cols())){
|
||||||
|
throw std::runtime_error("inplace_matdiv: rows and cols are not the same'");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < rows; ++i){
|
||||||
|
for (uint64_t j = 0; j < cols; ++j){
|
||||||
|
A(i,j) /= B(i,j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
utils::Matrix<T> matdiv(const utils::Matrix<T>& A, const utils::Matrix<T>& B){
|
||||||
|
|
||||||
|
const uint64_t rows = A.rows();
|
||||||
|
const uint64_t cols = A.cols();
|
||||||
|
|
||||||
|
if ((rows != B.rows()) || (cols != B.cols())){
|
||||||
|
throw std::runtime_error("matdiv: choose div by: 'row' or 'col'");
|
||||||
|
}
|
||||||
|
|
||||||
|
utils::Matrix<T> C = A;
|
||||||
|
|
||||||
|
inplace_matdiv(C, B);
|
||||||
|
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void inplace_matdiv(utils::Matrix<T>& A, const T b){
|
||||||
|
|
||||||
|
const uint64_t rows = A.rows();
|
||||||
|
const uint64_t cols = A.cols();
|
||||||
|
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < rows; ++i){
|
||||||
|
for (uint64_t j = 0; j < cols; ++j){
|
||||||
|
A(i,j) /= b;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // namespace numerics
|
} // namespace numerics
|
||||||
|
|
||||||
#endif // _matdiv_n_
|
#endif // _matdiv_n_
|
||||||
@@ -2,3 +2,4 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "./utils/generators/linspace.h"
|
#include "./utils/generators/linspace.h"
|
||||||
|
#include "./utils/generators/eye.h"
|
||||||
|
|||||||
@@ -0,0 +1,24 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "utils/vector.h"
|
||||||
|
#include "utils/matrix.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace utils{
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
utils::Matrix<T> eye(const T a, const utils::Vector<T>& b){
|
||||||
|
|
||||||
|
const uint64_t N = b.size();
|
||||||
|
utils::Matrix<T> C(N, a, T{0});
|
||||||
|
|
||||||
|
for (uint64_t i = 0; i < N; ++i){
|
||||||
|
C(i, b[i]) = T{1};
|
||||||
|
}
|
||||||
|
return C;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} // end namespace utils
|
||||||
+11
-8
@@ -2,7 +2,8 @@ obj/main.o: src/main.cpp include/./core/omp_config.h \
|
|||||||
include/./utils/utils.h include/./utils/vector.h \
|
include/./utils/utils.h include/./utils/vector.h \
|
||||||
include/./utils/random.h include/./utils/matrix.h \
|
include/./utils/random.h include/./utils/matrix.h \
|
||||||
include/./utils/generators.h include/./utils/generators/linspace.h \
|
include/./utils/generators.h include/./utils/generators/linspace.h \
|
||||||
include/utils/vector.h include/./utils/matcast.h \
|
include/utils/vector.h include/./utils/generators/eye.h \
|
||||||
|
include/utils/matrix.h include/./utils/matcast.h \
|
||||||
include/./numerics/numerics.h include/./numerics/max.h \
|
include/./numerics/numerics.h include/./numerics/max.h \
|
||||||
include/./numerics/exp.h include/./numerics/log.h \
|
include/./numerics/exp.h include/./numerics/log.h \
|
||||||
include/./numerics/vecclip.h include/./numerics/vecexp.h \
|
include/./numerics/vecclip.h include/./numerics/vecexp.h \
|
||||||
@@ -30,16 +31,16 @@ obj/main.o: src/main.cpp include/./core/omp_config.h \
|
|||||||
include/./decomp/decomp.h include/./modules/mesh/mesh.h \
|
include/./decomp/decomp.h include/./modules/mesh/mesh.h \
|
||||||
include/modules/mesh/mesh1d.h include/modules/fluids/fluids.h \
|
include/modules/mesh/mesh1d.h include/modules/fluids/fluids.h \
|
||||||
include/modules/fluids/diffusion1d.h include/core/global_config.h \
|
include/modules/fluids/diffusion1d.h include/core/global_config.h \
|
||||||
include/utils/matrix.h \
|
|
||||||
include/./modules/neural_networks/neural_networks.h \
|
include/./modules/neural_networks/neural_networks.h \
|
||||||
include/./modules/neural_networks/datasets/spiral.h \
|
include/./modules/neural_networks/datasets/spiral.h \
|
||||||
include/./modules/neural_networks/datasets/vertical.h \
|
include/./modules/neural_networks/datasets/vertical.h \
|
||||||
include/./modules/neural_networks/layers/Dense_Layer.h \
|
include/./modules/neural_networks/layers/Dense_Layer.h \
|
||||||
include/./modules/neural_networks/activation_functions/Activation_ReLU.h \
|
include/./modules/neural_networks/activation_functions/Activation_ReLU.h \
|
||||||
include/./modules/neural_networks/activation_functions/Activation_Softmax.h \
|
include/./modules/neural_networks/activation_functions/Activation_Softmax.h \
|
||||||
include/./modules/neural_networks/loss/Loss.h \
|
include/./modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h \
|
||||||
include/./numerics/vecmean.h \
|
include/./modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h \
|
||||||
include/./modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h
|
include/./modules/neural_networks/loss/./Loss.h \
|
||||||
|
include/./numerics/vecmean.h
|
||||||
include/./core/omp_config.h:
|
include/./core/omp_config.h:
|
||||||
include/./utils/utils.h:
|
include/./utils/utils.h:
|
||||||
include/./utils/vector.h:
|
include/./utils/vector.h:
|
||||||
@@ -48,6 +49,8 @@ include/./utils/matrix.h:
|
|||||||
include/./utils/generators.h:
|
include/./utils/generators.h:
|
||||||
include/./utils/generators/linspace.h:
|
include/./utils/generators/linspace.h:
|
||||||
include/utils/vector.h:
|
include/utils/vector.h:
|
||||||
|
include/./utils/generators/eye.h:
|
||||||
|
include/utils/matrix.h:
|
||||||
include/./utils/matcast.h:
|
include/./utils/matcast.h:
|
||||||
include/./numerics/numerics.h:
|
include/./numerics/numerics.h:
|
||||||
include/./numerics/max.h:
|
include/./numerics/max.h:
|
||||||
@@ -95,13 +98,13 @@ include/modules/mesh/mesh1d.h:
|
|||||||
include/modules/fluids/fluids.h:
|
include/modules/fluids/fluids.h:
|
||||||
include/modules/fluids/diffusion1d.h:
|
include/modules/fluids/diffusion1d.h:
|
||||||
include/core/global_config.h:
|
include/core/global_config.h:
|
||||||
include/utils/matrix.h:
|
|
||||||
include/./modules/neural_networks/neural_networks.h:
|
include/./modules/neural_networks/neural_networks.h:
|
||||||
include/./modules/neural_networks/datasets/spiral.h:
|
include/./modules/neural_networks/datasets/spiral.h:
|
||||||
include/./modules/neural_networks/datasets/vertical.h:
|
include/./modules/neural_networks/datasets/vertical.h:
|
||||||
include/./modules/neural_networks/layers/Dense_Layer.h:
|
include/./modules/neural_networks/layers/Dense_Layer.h:
|
||||||
include/./modules/neural_networks/activation_functions/Activation_ReLU.h:
|
include/./modules/neural_networks/activation_functions/Activation_ReLU.h:
|
||||||
include/./modules/neural_networks/activation_functions/Activation_Softmax.h:
|
include/./modules/neural_networks/activation_functions/Activation_Softmax.h:
|
||||||
include/./modules/neural_networks/loss/Loss.h:
|
include/./modules/neural_networks/activation_functions/Activation_Softmax_Loss_CategoricalCrossentropy.h:
|
||||||
include/./numerics/vecmean.h:
|
|
||||||
include/./modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h:
|
include/./modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h:
|
||||||
|
include/./modules/neural_networks/loss/./Loss.h:
|
||||||
|
include/./numerics/vecmean.h:
|
||||||
|
|||||||
BIN
Binary file not shown.
+7
-7
@@ -24,20 +24,20 @@ int main(int argc, char const *argv[])
|
|||||||
{
|
{
|
||||||
|
|
||||||
utils::Mf X(10,2, 0);
|
utils::Mf X(10,2, 0);
|
||||||
utils::Matrix<uint64_t> y(10,1, 0);
|
utils::Matrix<int64_t> y(10,1, 0);
|
||||||
utils::Vector<uint64_t> class_targets;
|
utils::Vector<int64_t> class_targets;
|
||||||
float loss;
|
float loss;
|
||||||
float accuracy;
|
float accuracy;
|
||||||
|
|
||||||
//neural_networks::create_spital_data<float, uint64_t>(10000, 3, X, y);
|
//neural_networks::create_spital_data<float, uint64_t>(10000, 3, X, y);
|
||||||
neural_networks::create_vertical_data<float, uint64_t>(100, 3, X, y);
|
neural_networks::create_vertical_data<float, int64_t>(100, 3, X, y);
|
||||||
|
|
||||||
neural_networks::Dense_Layer<float> dense1(2, 3);
|
neural_networks::Dense_Layer<float> dense1(2, 3);
|
||||||
neural_networks::Activation_ReLU<float> activation1;
|
neural_networks::Activation_ReLU<float> activation1;
|
||||||
neural_networks::Dense_Layer<float> dense2(3, 3);
|
neural_networks::Dense_Layer<float> dense2(3, 3);
|
||||||
neural_networks::Activation_Softmax<float> activation2;
|
neural_networks::Activation_Softmax<float> activation2;
|
||||||
|
|
||||||
neural_networks::Loss_CategoricalCrossentrophy<float, uint64_t> loss_funtion;
|
neural_networks::Loss_CategoricalCrossentrophy<float, int64_t> loss_funtion;
|
||||||
|
|
||||||
float lowest_loss = 9999999;
|
float lowest_loss = 9999999;
|
||||||
utils::Mf best_dense_1_weights = dense1.weights;
|
utils::Mf best_dense_1_weights = dense1.weights;
|
||||||
@@ -47,7 +47,7 @@ int main(int argc, char const *argv[])
|
|||||||
|
|
||||||
utils::Vf vectRND;
|
utils::Vf vectRND;
|
||||||
|
|
||||||
utils::Vector<uint64_t> predections;
|
utils::Vector<int64_t> predections;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -73,10 +73,10 @@ int main(int argc, char const *argv[])
|
|||||||
// it takes the output of the second dense layer here and returns loss
|
// it takes the output of the second dense layer here and returns loss
|
||||||
loss = loss_funtion.calculate(activation2.outputs, y);
|
loss = loss_funtion.calculate(activation2.outputs, y);
|
||||||
|
|
||||||
predections = numerics::matargmax_row<uint64_t, float>(activation2.outputs);
|
predections = numerics::matargmax_row<int64_t, float>(activation2.outputs);
|
||||||
|
|
||||||
if (y.cols() < 1){
|
if (y.cols() < 1){
|
||||||
class_targets = numerics::matargmax_row<uint64_t, uint64_t>(y);
|
class_targets = numerics::matargmax_row<int64_t, int64_t>(y);
|
||||||
}else{
|
}else{
|
||||||
class_targets = y.get_col(0);
|
class_targets = y.get_col(0);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user