Sync public subset from Flux
This commit is contained in:
@@ -16,38 +16,67 @@ namespace neural_networks{
|
||||
|
||||
template <typename Td, typename Ti>
|
||||
struct Loss_CategoricalCrossentrophy : Loss<Td, Ti> {
|
||||
|
||||
utils::Vector<Td> forward(const utils::Matrix<Td>& y_pred, const utils::Matrix<Ti>& y_true) override{
|
||||
|
||||
utils::Vector<Td> correct_confidences(y_true.rows(), Td{0});
|
||||
utils::Matrix<Td> cast_y_true = utils::matcast<Td, Ti>(y_true);
|
||||
|
||||
// Number of samles in a batch
|
||||
const uint64_t samples = y_true.rows();
|
||||
utils::Matrix<Td> dinputs;
|
||||
|
||||
// Clip data to prevent dividning by 0
|
||||
// Clip both sides to not drag mean towards any value
|
||||
utils::Matrix<Td> y_pred_clipped = numerics::matclip(y_pred, Td{1e-7}, Td{1.0} - Td{1e-7});
|
||||
|
||||
utils::Vector<Td> forward(const utils::Matrix<Td>& y_pred, const utils::Matrix<Ti>& y_true) override{
|
||||
|
||||
utils::Vector<Td> correct_confidences(y_true.rows(), Td{0});
|
||||
utils::Matrix<Td> cast_y_true = utils::matcast<Td, Ti>(y_true);
|
||||
|
||||
// Probabilities for taget values
|
||||
// only if categorical labes
|
||||
if (y_true.cols() == 1){
|
||||
for (uint64_t i = 0; i < y_true.rows(); ++i){
|
||||
const uint64_t idx = static_cast<uint64_t>(y_true(i, 0));
|
||||
correct_confidences[i] = y_pred_clipped(i, idx);
|
||||
}
|
||||
}else{ // Mask values - only for one-hot encoded labels
|
||||
correct_confidences = numerics::matdot_row(y_pred_clipped, cast_y_true);
|
||||
// Number of samles in a batch
|
||||
const uint64_t samples = y_true.rows();
|
||||
|
||||
// Clip data to prevent dividning by 0
|
||||
// Clip both sides to not drag mean towards any value
|
||||
utils::Matrix<Td> y_pred_clipped = numerics::matclip(y_pred, Td{1e-7}, Td{1.0} - Td{1e-7});
|
||||
|
||||
// Probabilities for taget values
|
||||
// only if categorical labes
|
||||
if (y_true.cols() == 1){
|
||||
for (uint64_t i = 0; i < y_true.rows(); ++i){
|
||||
const uint64_t idx = static_cast<uint64_t>(y_true(i, 0));
|
||||
correct_confidences[i] = y_pred_clipped(i, idx);
|
||||
}
|
||||
// Losses
|
||||
utils::Vector<Td> negative_log_likelihoods(samples, Td{0});
|
||||
for (uint64_t i = 0; i < samples; ++i){
|
||||
negative_log_likelihoods[i] = -std::log(static_cast<Td>(correct_confidences[i]));
|
||||
}
|
||||
}else{ // Mask values - only for one-hot encoded labels
|
||||
correct_confidences = numerics::matdot_row(y_pred_clipped, cast_y_true);
|
||||
|
||||
return negative_log_likelihoods;
|
||||
}
|
||||
// Losses
|
||||
utils::Vector<Td> negative_log_likelihoods(samples, Td{0});
|
||||
for (uint64_t i = 0; i < samples; ++i){
|
||||
negative_log_likelihoods[i] = -std::log(static_cast<Td>(correct_confidences[i]));
|
||||
}
|
||||
|
||||
return negative_log_likelihoods;
|
||||
}
|
||||
|
||||
void backward(const utils::Matrix<Td>& dvalues, const utils::Matrix<Ti>& y_true) override{
|
||||
|
||||
// Number of samples
|
||||
const Td samples = static_cast<Td> (y_true.rows());
|
||||
// Number of labels in every sample
|
||||
// We'll use the first samle to count them
|
||||
const Ti labels = dvalues.cols();
|
||||
|
||||
utils::Matrix<Ti> y_temp;
|
||||
|
||||
if (y_true.cols() == 1){
|
||||
|
||||
y_temp = utils::eye(labels, y_true.get_col(0));
|
||||
}else{
|
||||
y_temp = y_true;
|
||||
}
|
||||
|
||||
|
||||
// Calculate the gradient
|
||||
numerics::inplace_matscalar(y_temp,Ti{-1});
|
||||
dinputs = numerics::matdiv(utils::matcast<Td, Ti>(y_temp), dvalues);
|
||||
numerics::inplace_matdiv(dinputs, samples);
|
||||
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user