Sync public subset from Flux

2025-10-20 12:24:21 +00:00
parent 9a69d64d79
commit a334b74935
8 changed files with 230 additions and 35 deletions
--- a/include/modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h
+++ b/include/modules/neural_networks/loss/Loss_CategoricalCrossentrophy.h
@@ -16,38 +16,67 @@ namespace neural_networks{

 	template <typename Td, typename Ti>
 	struct Loss_CategoricalCrossentrophy : Loss<Td, Ti>  {
-		
-			utils::Vector<Td> forward(const utils::Matrix<Td>& y_pred, const utils::Matrix<Ti>& y_true) override{
-				
-				utils::Vector<Td> correct_confidences(y_true.rows(), Td{0});
-				utils::Matrix<Td> cast_y_true = utils::matcast<Td, Ti>(y_true);

-				// Number of samles in a batch
-				const uint64_t samples = y_true.rows();
+		utils::Matrix<Td> dinputs;

-				// Clip data to prevent dividning by 0
-				// Clip both sides to not drag mean towards any value
-				utils::Matrix<Td> y_pred_clipped = numerics::matclip(y_pred, Td{1e-7}, Td{1.0} - Td{1e-7});
+	
+		utils::Vector<Td> forward(const utils::Matrix<Td>& y_pred, const utils::Matrix<Ti>& y_true) override{
+			
+			utils::Vector<Td> correct_confidences(y_true.rows(), Td{0});
+			utils::Matrix<Td> cast_y_true = utils::matcast<Td, Ti>(y_true);

-				// Probabilities for taget values
-				// only if categorical labes
-				if (y_true.cols() == 1){
-					for (uint64_t i = 0; i < y_true.rows(); ++i){
-						const uint64_t idx = static_cast<uint64_t>(y_true(i, 0));
-						correct_confidences[i] = y_pred_clipped(i, idx);
-					}
-				}else{ // Mask values - only for one-hot encoded labels
-					correct_confidences = numerics::matdot_row(y_pred_clipped, cast_y_true);
+			// Number of samles in a batch
+			const uint64_t samples = y_true.rows();

+			// Clip data to prevent dividning by 0
+			// Clip both sides to not drag mean towards any value
+			utils::Matrix<Td> y_pred_clipped = numerics::matclip(y_pred, Td{1e-7}, Td{1.0} - Td{1e-7});
+
+			// Probabilities for taget values
+			// only if categorical labes
+			if (y_true.cols() == 1){
+				for (uint64_t i = 0; i < y_true.rows(); ++i){
+					const uint64_t idx = static_cast<uint64_t>(y_true(i, 0));
+					correct_confidences[i] = y_pred_clipped(i, idx);
 				}
-				// Losses			
-				utils::Vector<Td> negative_log_likelihoods(samples, Td{0});
-				for (uint64_t i = 0; i < samples; ++i){
-				    negative_log_likelihoods[i] = -std::log(static_cast<Td>(correct_confidences[i]));
-				}
+			}else{ // Mask values - only for one-hot encoded labels
+				correct_confidences = numerics::matdot_row(y_pred_clipped, cast_y_true);

-				return negative_log_likelihoods;
 			}
+			// Losses			
+			utils::Vector<Td> negative_log_likelihoods(samples, Td{0});
+			for (uint64_t i = 0; i < samples; ++i){
+			    negative_log_likelihoods[i] = -std::log(static_cast<Td>(correct_confidences[i]));
+			}
+
+			return negative_log_likelihoods;
+		}
+		
+		void backward(const utils::Matrix<Td>& dvalues, const utils::Matrix<Ti>& y_true) override{
+
+			// Number of samples
+			const Td samples = static_cast<Td> (y_true.rows());
+			// Number of labels in every sample
+			// We'll use the first samle to count them
+			const Ti labels = dvalues.cols();
+
+			utils::Matrix<Ti> y_temp;
+
+			if (y_true.cols() == 1){
+			 	
+			 	y_temp = utils::eye(labels, y_true.get_col(0));
+			 }else{
+			 	y_temp = y_true;
+			 }
+
+
+			 // Calculate the gradient
+			 numerics::inplace_matscalar(y_temp,Ti{-1});
+			 dinputs = numerics::matdiv(utils::matcast<Td, Ti>(y_temp), dvalues);
+			 numerics::inplace_matdiv(dinputs, samples);
+
+
+		}
 	};