226 lines
5.6 KiB
C++
226 lines
5.6 KiB
C++
#ifndef _matadd_n_
|
||
#define _matadd_n_
|
||
|
||
#include "./utils/vector.h"
|
||
#include "./utils/matrix.h"
|
||
#include "./core/omp_config.h"
|
||
|
||
namespace numerics{
|
||
|
||
template <typename T>
|
||
void inplace_matadd_colvec(utils::Matrix<T>& A, const utils::Vector<T>& x) {
|
||
|
||
const uint64_t rows = A.rows();
|
||
const uint64_t cols = A.cols();
|
||
|
||
if (rows != x.size()) {
|
||
throw std::runtime_error("inplace_matadd_colvec: dimension mismatch");
|
||
}
|
||
|
||
for (uint64_t i = 0; i < cols; ++i) {
|
||
for (uint64_t j = 0; j < rows; ++j) {
|
||
A(j, i) += x[j];
|
||
}
|
||
}
|
||
}
|
||
|
||
template <typename T>
|
||
void inplace_matadd_rowvec(utils::Matrix<T>& A, const utils::Vector<T>& x) {
|
||
|
||
const uint64_t rows = A.rows();
|
||
const uint64_t cols = A.cols();
|
||
|
||
if (cols != x.size()) {
|
||
throw std::runtime_error("inplace_matadd_rowvec: dimension mismatch");
|
||
}
|
||
|
||
for (uint64_t i = 0; i < cols; ++i) {
|
||
for (uint64_t j = 0; j < rows; ++j) {
|
||
A(j, i) += x[i];
|
||
}
|
||
}
|
||
}
|
||
|
||
template <typename T>
|
||
utils::Matrix<T> matadd_colvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
|
||
|
||
//const uint64_t rows = A.rows();
|
||
//const uint64_t cols = A.cols();
|
||
|
||
utils::Matrix<T> B = A;
|
||
|
||
inplace_matadd_colvec(B, x);
|
||
|
||
return B;
|
||
}
|
||
|
||
template <typename T>
|
||
utils::Matrix<T> matadd_rowvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
|
||
|
||
//const uint64_t rows = A.rows();
|
||
//const uint64_t cols = A.cols();
|
||
|
||
utils::Matrix<T> B = A;
|
||
|
||
inplace_matadd_rowvec(B, x);
|
||
|
||
return B;
|
||
}
|
||
|
||
template <typename T>
|
||
utils::Matrix<T> matadd(const utils::Matrix<T>& A, const utils::Vector<T>& x, std::string method = "auto"){
|
||
|
||
const uint64_t rows = A.rows();
|
||
const uint64_t cols = A.cols();
|
||
const uint64_t N = x.size();
|
||
|
||
if (method=="auto"){
|
||
|
||
if (rows==cols){
|
||
throw std::runtime_error("matadd: too many options for dimensions");
|
||
} else if (rows == N){
|
||
return matadd_rowvec(A, x);
|
||
} else if (cols == N){
|
||
return matadd_colvec(A, x);
|
||
}else{
|
||
throw std::runtime_error("matadd: undefined fault - auto");
|
||
}
|
||
}else if(method=="row"){
|
||
return matadd_rowvec(A, x);
|
||
} else if (method=="col"){
|
||
return matadd_colvec(A, x);
|
||
}else{
|
||
throw std::runtime_error("matadd: undefined fault - defined method");
|
||
}
|
||
}
|
||
|
||
|
||
|
||
|
||
/*
|
||
// -------------- Collapse(2) OpenMP ----------------
|
||
template <typename T>
|
||
utils::Vector<T> matvec_omp(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
|
||
if (A.cols() != x.size()) {
|
||
throw std::runtime_error("matvec: dimension mismatch");
|
||
}
|
||
|
||
const uint64_t m = A.rows();
|
||
const uint64_t n = A.cols();
|
||
|
||
utils::Vector<T> y(m, T{0}); // <-- y has length m (rows)
|
||
|
||
|
||
const T* xptr = x.data();
|
||
const T* Aptr = A.data(); // row-major: A(i,j) == Aptr[i*n + j]
|
||
|
||
// Each row i is an independent dot product: y[i] = dot(A[i,*], x)
|
||
#pragma omp parallel for schedule(static)
|
||
for (uint64_t i = 0; i < m; ++i) {
|
||
const T* row = Aptr + i * n; // contiguous row i
|
||
T acc = T{0};
|
||
#pragma omp simd reduction(+:acc)
|
||
for (uint64_t j = 0; j < n; ++j) {
|
||
acc += row[j] * xptr[j];
|
||
}
|
||
y[i] = acc;
|
||
}
|
||
|
||
return y;
|
||
}
|
||
|
||
// -------------- Auto OpenMP ----------------
|
||
template <typename T>
|
||
utils::Vector<T> matvec_auto(const utils::Matrix<T>& A,
|
||
const utils::Vector<T>& x) {
|
||
|
||
|
||
uint64_t work = A.rows() * A.cols();
|
||
|
||
bool can_parallel = omp_config::omp_parallel_allowed();
|
||
#ifdef _OPENMP
|
||
int threads = omp_get_max_threads();
|
||
#else
|
||
int threads = 1;
|
||
#endif
|
||
|
||
if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
|
||
return matvec_omp(A,x);
|
||
}
|
||
else{
|
||
// Safe fallback
|
||
return matvec(A,x);
|
||
}
|
||
|
||
}
|
||
|
||
// =================================================
|
||
// y = x * A (Vector–Matrix product)
|
||
// =================================================
|
||
template <typename T>
|
||
utils::Vector<T> vecmat(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
|
||
if (x.size() != A.rows()) {
|
||
throw std::runtime_error("vecmat: dimension mismatch");
|
||
}
|
||
const uint64_t m = A.rows();
|
||
const uint64_t n = A.cols();
|
||
|
||
utils::Vector<T> y(n, T{0});
|
||
|
||
for (uint64_t j = 0; j < n; ++j) {
|
||
for (uint64_t i = 0; i < m; ++i) {
|
||
y[j] += x[i] * A(i, j);
|
||
}
|
||
}
|
||
return y;
|
||
}
|
||
|
||
// -------------- Collapse(2) OpenMP ----------------
|
||
template <typename T>
|
||
utils::Vector<T> vecmat_omp(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
|
||
if (x.size() != A.rows()) {
|
||
throw std::runtime_error("vecmat: dimension mismatch");
|
||
}
|
||
const uint64_t m = A.rows();
|
||
const uint64_t n = A.cols();
|
||
|
||
utils::Vector<T> y(n, T{0});
|
||
#pragma omp parallel for schedule(static)
|
||
for (uint64_t j = 0; j < n; ++j) {
|
||
T acc = T{0};
|
||
for (uint64_t i = 0; i < m; ++i) {
|
||
acc += x[i] * A(i, j);
|
||
}
|
||
y[j] = acc;
|
||
}
|
||
return y;
|
||
}
|
||
|
||
// -------------- Auto OpenMP ----------------
|
||
template <typename T>
|
||
utils::Vector<T> vecmat_auto(const utils::Vector<T>& x,
|
||
const utils::Matrix<T>& A) {
|
||
|
||
uint64_t work = A.rows() * A.cols();
|
||
|
||
bool can_parallel = omp_config::omp_parallel_allowed();
|
||
#ifdef _OPENMP
|
||
int threads = omp_get_max_threads();
|
||
#else
|
||
int threads = 1;
|
||
#endif
|
||
|
||
if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
|
||
return vecmat_omp(x,A);
|
||
}
|
||
else{
|
||
// Safe fallback
|
||
return vecmat(x,A);
|
||
}
|
||
|
||
}
|
||
*/
|
||
|
||
} // namespace numerics
|
||
|
||
#endif // _matadd_n_
|