Flux-openbuild/include/modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h

#pragma once

#include "./core/omp_config.h"

#include "./utils/vector.h"
#include "./utils/matrix.h"
#include "./utils/matcast.h"

#include "./numerics/matclip.h"
#include "./numerics/veclog.h"

#include "./Loss.h"


namespace neural_networks{

	template <typename Td, typename Ti>
	struct Loss_CategoricalCrossentrophy : Loss<Td, Ti>  {

		utils::Matrix<Td> dinputs;


		utils::Vector<Td> forward(const utils::Matrix<Td>& y_pred, const utils::Matrix<Ti>& y_true) override{

			utils::Vector<Td> correct_confidences(y_true.rows(), Td{0});
			utils::Matrix<Td> cast_y_true = utils::matcast<Td, Ti>(y_true);

			// Number of samles in a batch
			const uint64_t samples = y_true.rows();

			// Clip data to prevent dividning by 0
			// Clip both sides to not drag mean towards any value
			utils::Matrix<Td> y_pred_clipped = numerics::matclip(y_pred, Td{1e-7}, Td{1.0} - Td{1e-7});

			// Probabilities for taget values
			// only if categorical labes
			if (y_true.cols() == 1){
				for (uint64_t i = 0; i < y_true.rows(); ++i){
					const uint64_t idx = static_cast<uint64_t>(y_true(i, 0));
					correct_confidences[i] = y_pred_clipped(i, idx);
				}
			}else{ // Mask values - only for one-hot encoded labels
				correct_confidences = numerics::matdot_row(y_pred_clipped, cast_y_true);

			}
			// Losses
			utils::Vector<Td> negative_log_likelihoods(samples, Td{0});
			for (uint64_t i = 0; i < samples; ++i){
			    negative_log_likelihoods[i] = -std::log(static_cast<Td>(correct_confidences[i]));
			}

			return negative_log_likelihoods;
		}

		void backward(const utils::Matrix<Td>& dvalues, const utils::Matrix<Ti>& y_true) override{

			// Number of samples
			const Td samples = static_cast<Td> (y_true.rows());
			// Number of labels in every sample
			// We'll use the first samle to count them
			const Ti labels = dvalues.cols();

			utils::Matrix<Ti> y_temp;

			if (y_true.cols() == 1){

			 	y_temp = utils::eye(labels, y_true.get_col(0));
			 }else{
			 	y_temp = y_true;
			 }


			 // Calculate the gradient
			 numerics::inplace_matscalar(y_temp,Ti{-1});
			 dinputs = numerics::matdiv(utils::matcast<Td, Ti>(y_temp), dvalues);
			 numerics::inplace_matdiv(dinputs, samples);


		}
	};


} // end namespace neural_networks