Flux-openbuild/include/numerics/matadd.h

#ifndef _matadd_n_
#define _matadd_n_

#include "./utils/vector.h"
#include "./utils/matrix.h"
#include "./core/omp_config.h"

namespace numerics{

	template <typename T>
	void inplace_matadd_colvec(utils::Matrix<T>& A, const utils::Vector<T>& x) {

	    const uint64_t rows = A.rows();
	    const uint64_t cols = A.cols();

	    if (rows != x.size()) {
	        throw std::runtime_error("inplace_matadd_colvec: dimension mismatch");
	    }

	    for (uint64_t i = 0; i < cols; ++i) {
	        for (uint64_t j = 0; j < rows; ++j) {
	            A(j, i) +=  x[j];
	        }
	    }
	}

	template <typename T>
	void inplace_matadd_rowvec(utils::Matrix<T>& A, const utils::Vector<T>& x) {

	    const uint64_t rows = A.rows();
	    const uint64_t cols = A.cols();

	    if (cols != x.size()) {
	        throw std::runtime_error("inplace_matadd_rowvec: dimension mismatch");
	    }

	    for (uint64_t i = 0; i < cols; ++i) {
	        for (uint64_t j = 0; j < rows; ++j) {
	            A(j, i) +=  x[i];
	        }
	    }
	}

	template <typename T>
	utils::Matrix<T> matadd_colvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {

	    //const uint64_t rows = A.rows();
	    //const uint64_t cols = A.cols();

	    utils::Matrix<T> B = A;

	    inplace_matadd_colvec(B, x);

	    return B;
	}

	template <typename T>
	utils::Matrix<T> matadd_rowvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {

	    //const uint64_t rows = A.rows();
	    //const uint64_t cols = A.cols();

	    utils::Matrix<T> B = A;

	    inplace_matadd_rowvec(B, x);

	    return B;
	}

	template <typename T>
	utils::Matrix<T> matadd(const utils::Matrix<T>& A, const utils::Vector<T>& x, std::string method = "auto"){

		const uint64_t rows = A.rows();
	    const uint64_t cols = A.cols();
	    const uint64_t N = x.size();

	    if (method=="auto"){

			if (rows==cols){
				throw std::runtime_error("matadd: too many options for dimensions");
			} else if (rows == N){
				return matadd_rowvec(A, x);
			} else if (cols == N){
				return matadd_colvec(A, x);
			}else{
				throw std::runtime_error("matadd: undefined fault - auto");
			}
	    }else if(method=="row"){
				return matadd_rowvec(A, x);
		} else if (method=="col"){
			return matadd_colvec(A, x);
		}else{
			throw std::runtime_error("matadd: undefined fault - defined method");
		}
    }


	/*
	// -------------- Collapse(2) OpenMP ----------------
	template <typename T>
	utils::Vector<T> matvec_omp(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
	    if (A.cols() != x.size()) {
	        throw std::runtime_error("matvec: dimension mismatch");
	    }

	    const uint64_t m = A.rows();
	    const uint64_t n = A.cols();

	    utils::Vector<T> y(m, T{0}); // <-- y has length m (rows)


        const T* xptr = x.data();
        const T* Aptr = A.data();	// row-major: A(i,j) == Aptr[i*n + j]

        // Each row i is an independent dot product: y[i] = dot(A[i,*], x)
	    #pragma omp parallel for schedule(static)
	    for (uint64_t i = 0; i < m; ++i) {
            const T* row = Aptr + i * n;     // contiguous row i
            T acc = T{0};
            #pragma omp simd reduction(+:acc)
	        for (uint64_t j = 0; j < n; ++j) {
	            acc += row[j] * xptr[j];
	        }
	        y[i] = acc;
		}

	    return y;
	}

	// -------------- Auto OpenMP ----------------
	template <typename T>
	utils::Vector<T> matvec_auto(const utils::Matrix<T>& A,
	                             const utils::Vector<T>& x) {


	    uint64_t work = A.rows() * A.cols();

	    bool can_parallel = omp_config::omp_parallel_allowed();
		#ifdef _OPENMP
		    int threads = omp_get_max_threads();
		#else
		    int threads = 1;
		#endif

	    if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
	        return matvec_omp(A,x);
	    }
	    else{
	    	// Safe fallback
	    	return matvec(A,x);
	    }

	}

// =================================================
//   y = x * A    (Vector–Matrix product)
// =================================================
	template <typename T>
	utils::Vector<T> vecmat(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
	    if (x.size() != A.rows()) {
	        throw std::runtime_error("vecmat: dimension mismatch");
	    }
	    const uint64_t m = A.rows();
	    const uint64_t n = A.cols();

	    utils::Vector<T> y(n, T{0});

	    for (uint64_t j = 0; j < n; ++j) {
	        for (uint64_t i = 0; i < m; ++i) {
	            y[j] += x[i] * A(i, j);
	        }
	    }
	    return y;
	}

	// -------------- Collapse(2) OpenMP ----------------
	template <typename T>
	utils::Vector<T> vecmat_omp(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
	    if (x.size() != A.rows()) {
	        throw std::runtime_error("vecmat: dimension mismatch");
	    }
	    const uint64_t m = A.rows();
	    const uint64_t n = A.cols();

	    utils::Vector<T> y(n, T{0});
	    #pragma omp parallel for schedule(static)
	    for (uint64_t j = 0; j < n; ++j) {
	        T acc = T{0};
	        for (uint64_t i = 0; i < m; ++i) {
	            acc += x[i] * A(i, j);
	        }
	        y[j] = acc;
	    }
	    return y;
	}

	// -------------- Auto OpenMP ----------------
	template <typename T>
	utils::Vector<T> vecmat_auto(const utils::Vector<T>& x,
								 const utils::Matrix<T>& A) {

	    uint64_t work = A.rows() * A.cols();

	    bool can_parallel = omp_config::omp_parallel_allowed();
		#ifdef _OPENMP
		    int threads = omp_get_max_threads();
		#else
		    int threads = 1;
		#endif

	    if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
	        return vecmat_omp(x,A);
	    }
	    else{
	    	// Safe fallback
	    	return vecmat(x,A);
	    }

	}
*/

} // namespace numerics

#endif // _matadd_n_