Sync public subset from Flux (private)

This commit is contained in:
Gitea CI
2025-10-06 20:14:13 +00:00
parent 272e77c536
commit b2d00af0e1
390 changed files with 152131 additions and 0 deletions

23
include/numerics/abs.h Normal file
View File

@@ -0,0 +1,23 @@
#pragma once
#include "./utils/vector.h"
#include "./utils/matrix.h"
namespace numerics{
template <typename T>
T abs(const T a){
if(a < 0){
return -a;
}else{
return a;
}
}
} // namespace numerics

View File

@@ -0,0 +1,39 @@
#pragma once
#include <cmath>
#include "./utils/vector.h"
#include "./utils/matrix.h"
namespace numerics{
template <typename T>
T exponential(const T a){
return std::exp(a);
}
template <typename T>
utils::Vector<T> exponential(const utils::Vector<T>& a){
utils::Vector<T> b = a;
for (uint64_t i = 0; i < a.size(); ++i){
b[i] = numerics::exponential(a[i]);
}
return b;
}
template <typename T>
utils::Matrix<T> exponential(const utils::Matrix<T>& A){
utils::Matrix<T> B = A;
for (uint64_t i = 0; i < A.rows(); ++i){
for (uint64_t j = 0; j < A.cols(); ++j){
B(i,j) = numerics::exponential(A(i,j));
}
}
return B;
}
} // namespace numerics

View File

@@ -0,0 +1,138 @@
#pragma once
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics {
template <typename T>
void inplace_eye(utils::Matrix<T>& A, uint64_t N = 0){
bool need_full_zero = true;
if (N != 0){
A.resize(N,N,T{0});
need_full_zero = false;
}else{
N = A.rows();
if (N != A.cols()) {
throw std::runtime_error("inplace_eye: non-square matrix");
}
}
// 1) Zero the whole matrix if we didn't just resize with zeros
if (need_full_zero){
for (uint64_t i = 0; i < N; ++i){
for (uint64_t j = 0; j < N; ++j){
if (i==j){
A(i,j) = T{1};
}else{
A(i,j) = T{0};
}
}
}
}else{
for (uint64_t i = 0; i < N; ++i){
A(i,i) = T{1};
}
}
}
template <typename T>
void inplace_eye_omp(utils::Matrix<T>& A, uint64_t N = 0){
bool need_full_zero = true;
if (N != 0){
A.resize(N,N,T{0});
need_full_zero = false;
}else{
N = A.rows();
if (N != A.cols()) {
throw std::runtime_error("inplace_eye_omp: non-square matrix");
}
}
// 1) Zero the whole matrix if we didn't just resize with zeros
if (need_full_zero){
T* ptr = A.data();
uint64_t NN = N*N;
#pragma omp parallel for schedule(static)
for (uint64_t i = 0; i < NN; ++i){
ptr[i] = T{0};
}
}
// 2) Set the diagonal to 1
#pragma omp parallel for schedule(static)
for (uint64_t i = 0; i < N; ++i){
A(i,i) = T{1};
}
}
template <typename T>
utils::Matrix<T> eye(uint64_t N){
utils::Matrix<T> A;
inplace_eye(A, N);
return A;
}
template <typename T>
utils::Matrix<T> eye_omp(uint64_t N){
utils::Matrix<T> A;
inplace_eye_omp(A, N);
return A;
}
template <typename T>
utils::Matrix<T> eye_omp_auto(uint64_t N){
uint64_t work = N*N;
utils::Matrix<T> A(N,N,T{0});
#ifdef _OPENMP
bool can_parallel = omp_config::omp_parallel_allowed();
uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
#else
bool can_parallel = false;
uint64_t threads = 1;
#endif
if (can_parallel || work > threads * 4ull) {
inplace_eye_omp(A, 0);
}
else{
// Safe fallback
inplace_eye(A, 0);
}
return A;
}
// Untested:
template <typename T>
void inplace_eye_omp_auto(utils::Matrix<T>& A, uint64_t N = 0){
uint64_t work = N*N;
#ifdef _OPENMP
bool can_parallel = omp_config::omp_parallel_allowed();
uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
#else
bool can_parallel = false;
uint64_t threads = 1;
#endif
if (can_parallel || work > threads * 4ull) {
inplace_eye_omp(A, 0);
}
else{
// Safe fallback
inplace_eye(A, 0);
}
}
} // namespace utils

View File

@@ -0,0 +1,9 @@
#pragma once
//#include "./numerics/interpolation1d/interpolation1d_base.h"
#include "./numerics/interpolation1d/interpolation1d_barycentric.h"
#include "./numerics/interpolation1d/interpolation1d_cubic_spline.h"
#include "./numerics/interpolation1d/interpolation1d_linear.h"
#include "./numerics/interpolation1d/interpolation1d_polynomial.h"
#include "./numerics/interpolation1d/interpolation1d_rational.h"

View File

@@ -0,0 +1,88 @@
#pragma once
#include "./numerics/interpolation1d/interpolation1d_base.h"
#include "./utils/vector.h"
#include "./numerics/min.h"
#include "./numerics/max.h"
namespace numerics{
template <typename T>
struct interp_barycentric : Base_interp<T> {
using Base = Base_interp<T>;
// bring base data members into scope (or use this->xx / this->yy below)
using Base::xx;
using Base::yy;
using Base::n;
utils::Vector<T> w;
int64_t d;
interp_barycentric(const utils::Vector<T> &xv, const utils::Vector<T> &yv, uint64_t dd)
: Base_interp<T>(xv, &yv[0], xv.size()), w(n,T{0}), d(dd) {
// Constructor arguments are x and y vectors of length n, and order d of desired approximation.
if (n <= d){
throw std::invalid_argument("d too large for number of points in interp_barycentric");
}
for (int64_t k = 0; k < n; ++k){
int64_t imin = numerics::max(k-d, static_cast<int64_t>(0));
int64_t imax;
if (k >= n - d) {
imax = n - d - 1;
} else {
imax = k;
}
T temp;
if ( (imin & 1) != 0 ) { // odd?
temp = T{-1};
} else { // even
temp = T{1};
}
T sum = T{0};
for (int64_t i = imin; i <= imax; ++i){
int64_t jmax = numerics::min(i+d, n-1);
T term = T{1};
for (int64_t j = i; j <= jmax; ++j){
if (j == k){
continue;
}
term *= (xx[k] - xx[j]);
}
term = temp/term;
temp = -temp;
sum += term;
}
w[k] = sum;
}
}
T rawinterp(int64_t jl, T x) override{
T num{T{0}}, den{T{0}};
for (int64_t i = 0; i < n; ++i){
T h = x - xx[i];
if (h == T{0}){
return yy[i];
}else{
T temp = w[i]/h;
num += temp*yy[i];
den += temp;
}
}
return num/den;
}
T interp(T x) {
return rawinterp(1, x);
}
};
} // namespace numerics

View File

@@ -0,0 +1,151 @@
#pragma once
#include "./numerics/min.h"
#include "./numerics/max.h"
#include "./numerics/abs.h"
#include "./utils/vector.h"
namespace numerics{
template <typename T>
struct Base_interp{
int64_t n, mm;
int64_t jsav, dj;
bool cor;
const T *xx, *yy;
Base_interp(const utils::Vector<T>& x, const T *y, uint64_t m)
:n(x.size()), mm(m), jsav(0), cor(false), xx(&x[0]), yy(y){
//dj = numerics::min(static_cast<int64_t>(1), static_cast<int64_t>(std::pow(static_cast<T>(n), 0.25))); // from NR
dj = numerics::max(static_cast<int64_t>(1), static_cast<int64_t>(std::pow(static_cast<T>(n), 0.25))); // from chatbot
if (mm < 2 || n < mm) throw std::invalid_argument("Base_interp: invalid mm or n");
if (!xx || !yy) throw std::invalid_argument("Base_interp: null data pointers");
if (n < 2) throw std::invalid_argument("Base_interp: need at least 2 points");
bool asc = false;
if (xx[0] < xx[1]){
asc = true;
}
for (int64_t i = 1; i < n; ++i){
if (!(xx[i] > xx[i-1]) && asc) {
throw std::invalid_argument("x must be strictly increasing");
} else if (!(xx[i] < xx[i-1]) && !asc){
throw std::invalid_argument("x must be strictly decreasing");
}
}
}
T interp(T x){
int64_t jlo;
if (cor){
jlo = hunt(x);
}
else{
jlo = locate(x);
}
return rawinterp(jlo,x);
}
// Derived classes provide this as the actual interpolation method.
T virtual rawinterp(int64_t jlo, T x) = 0;
int64_t locate(const T x){
int64_t ju, jl;
int64_t jm;
if (n < 2 || mm < 2 || mm > n){
throw std::runtime_error("Interpolate: locate size error");
}
bool ascnd = (xx[n-1] >= xx[0]); // True if ascending order of table, false otherwise.
jl = 0; // Initialize lower
ju = n-1; // and upper limits.
while (ju - jl > 1) { // If we are not yet done,
jm = (ju+jl) >> 1; // compute a midpoint,
if ((x >= xx[jm]) == ascnd){
jl=jm; // and replace either the lower limit
}else{
ju=jm; // or the upper limit, as appropriate.
}
} // Repeat until the test condition is satisfied.
if (std::abs(jl - jsav) > dj){ // Decide whether to use hunt or locate next time.
cor = false;
}else{
cor = true;
}
jsav = jl;
return numerics::max(static_cast<int64_t>(0), numerics::min(n-mm, jl-((mm-2)>>1)));
}
int64_t hunt(const T x){
int64_t jl=jsav, jm, ju, inc=1;
if (n < 2 || mm < 2 || mm > n){
throw std::runtime_error("Interpolate: hunt size error");
}
bool ascnd=(xx[n-1] >= xx[0]); // True if ascending order of table, false otherwise.
if (jl < 0 || jl > n-1) { // Input guess not useful. Go immediately to bisection.
jl=0;
ju=n-1;
}else{
if ((x >= xx[jl]) == ascnd){ // Hunt up:
for (;;){
ju = jl + inc;
if (ju >= n-1){
ju = n-1;
break; // Off end of table.
}else if((x < xx[ju]) == ascnd){
break; // Found bracket.
}else{ // Not done, so double the increment and try again.
jl = ju;
inc += inc;
}
}
}else{ // Hunt down:
ju = jl;
for (;;){
jl = jl - inc;
if (jl <= 0){ //Off end of table.
jl = 0;
break;
}else if((x >= xx[jl]) == ascnd){
break; // Found bracket.
}
else{ // Not done, so double the increment and try again.
ju = jl;
inc += inc;
}
}
}
}
while(ju-jl > 1){ // Hunt is done, so begin the final bisection phase:
jm = (ju+jl) >> 1;
if ((x >= xx[jm]) == ascnd){
jl =jm;
}else{
ju=jm;
}
}
if (numerics::abs(jl-jsav) > dj){
cor = false;
}else{
cor = true;
}
jsav = jl;
return numerics::max(static_cast<int64_t>(0), numerics::min(n-mm, jl-((mm-2)>>1)));
}
};
} // namespace numerics

View File

@@ -0,0 +1,86 @@
#pragma once
#include "./numerics/interpolation1d/interpolation1d_base.h"
//#include "./numerics/abs.h"
#include "./utils/vector.h"
namespace numerics{
template <typename T>
struct interp_cubic_spline : Base_interp<T> {
using Base = Base_interp<T>;
// bring base data members into scope (or use this->xx / this->yy below)
using Base::xx;
using Base::yy;
//using Base::mm;
utils::Vector<T> y2;
interp_cubic_spline(utils::Vector<T> &xv, utils::Vector<T> &yv, T yp1=T{1.e99}, T ypn=T{1.e99})
: Base_interp<T>(xv, &yv[0], 2), y2(xv.size(),T{0}) {
sety2(&xv[0], &yv[0], yp1, ypn);
}
interp_cubic_spline(utils::Vector<T> &xv, const T *yv, T yp1=T{1.e99}, T ypn=T{1.e99})
: Base_interp<T>(xv, yv, 2), y2(xv.size(),T{0}) {
sety2(&xv[0], yv, yp1, ypn);
}
void sety2(const T *xv, const T *yv, T yp1, T ypn){
T p, qn, sig, un;
uint64_t n = y2.size();
utils::Vector<T> u(n-1, T{0});
if (yp1 > static_cast<T>(0.99e99)){ // The lower boundary condition is set either to be “natural”
y2[0] = u[0] = T{0};
}else{ // or else to have a specified first derivative.
y2[0] = T{-0.5};
u[0] = (3.0/(xv[1]-xv[0]))*(((yv[1]-yv[0])/(xv[1]-xv[0]))-yp1);
}
for (uint64_t i = 1; i < n-1; ++i){ // This is the decomposition loop of the tridiagonal algorithm
sig = (xv[i]-xv[i-1])/(xv[i+1]-xv[i-1]);
p = sig*y2[i-1]+T{2};
y2[i] = (sig - T{1})/p; // y2 and u are used for temporary storage of the decomposed factors.
u[i]=((yv[i+1]-yv[i])/(xv[i+1]-xv[i])) - ((yv[i]-yv[i-1])/(xv[i]-xv[i-1]));
u[i]=((T{6}*u[i]/(xv[i+1]-xv[i-1])) - sig*u[i-1])/p;
}
if (ypn > static_cast<T>(0.99e99)){ // The upper boundary condition is set either to be “natural”
qn = un = T{0};
}else{ // or else to have a specified first derivative.
qn = T{0.5};
un = (T{3}/(xv[n-1]-xv[n-2]))*(ypn-((yv[n-1]-yv[n-2])/(xv[n-1]-xv[n-2])));
}
y2[n-1] = (un-(qn*u[n-2]))/((qn*y2[n-2])+T{1});
for (int64_t k = n-2; k >= 0; --k){
y2[k] = y2[k] * y2[k+1]+u[k];
}
}
T rawinterp(int64_t jl, T x) override{
int64_t klo=jl, khi=jl+1;
T y, h, b, a;
h = xx[khi] - xx[klo];
if (h == T{0}){ // The xas must be distinct.
throw std::invalid_argument("interp_cubic_spline: Bad input to routine splint");
}
a = (xx[khi] - x)/h; // Cubic spline polynomial is now evaluated.
b = (x - xx[klo])/h;
y = a*yy[klo] + b*yy[khi] + ( ((a*a*a) - a)*y2[klo] + ((b*b*b) - b)*y2[khi] ) * (h*h) / T{6};
return y;
}
};
} // namespace numerics

View File

@@ -0,0 +1,29 @@
#pragma once
#include "./numerics/interpolation1d/interpolation1d_base.h"
namespace numerics{
template <typename T>
struct interp_linear : Base_interp<T> {
using Base = Base_interp<T>;
// bring base data members into scope (or use this->xx / this->yy below)
using Base::xx;
using Base::yy;
interp_linear(const utils::Vector<T> &xv, const utils::Vector<T> &yv): Base_interp<T>(xv, &yv[0], 2){}
T rawinterp(int64_t j, T x) override{
if (xx[j]==xx[j+1]){
return yy[j]; // Table is defective, but we can recover.
}else {
return (yy[j] + ((x-xx[j])/(xx[j+1]-xx[j]))*(yy[j+1]-yy[j]));
}
}
};
} // namespace numerics

View File

@@ -0,0 +1,75 @@
#pragma once
#include "./numerics/interpolation1d/interpolation1d_base.h"
#include "./numerics/abs.h"
#include "./utils/vector.h"
namespace numerics{
template <typename T>
struct interp_polynomial : Base_interp<T> {
using Base = Base_interp<T>;
// bring base data members into scope (or use this->xx / this->yy below)
using Base::xx;
using Base::yy;
using Base::mm;
T dy;
interp_polynomial(const utils::Vector<T> &xv, const utils::Vector<T> &yv, uint64_t m)
: Base_interp<T>(xv, &yv[0], m), dy(T{0}){}
T rawinterp(int64_t jl, T x) override{
int64_t ns=0;
T y, den, dif, dift, ho, hp, w;
const T *xa = &xx[jl], *ya = &yy[jl];
utils::Vector<T> c(mm,0), d(mm,0);
dif = numerics::abs(x-xa[0]);
for (int64_t i = 0; i < mm; ++i){ // Here we find the index ns of the closest table entry,
dift = numerics::abs(x-xa[i]);
if (dift < dif){
ns = i;
dif=dift;
}
c[i]=ya[i]; // and initialize the tableau of cs and ds.
d[i]=ya[i];
}
y = ya[ns]; // This is the initial approximation to y.
ns -= 1;
for (int64_t m = 1; m < mm; ++m){ // For each column of the tableau,
for (int64_t i = 0; i < mm-m; ++i){ // we loop over the current cs and ds and update them.
ho = xa[i]-x;
hp = xa[i+m]-x;
w = c[i+1]-d[i];
den = ho-hp;
if (den == T{0.0}){
throw std::invalid_argument("interp_polynomial error"); // This error can occur only if two input xas are (to within roundoff identical.
}
den = w/den; // Here the cs and ds are updated.
d[i] = hp*den;
c[i] = ho*den;
}
bool take_left = 2 * (ns + 1) < (mm - m);
if (take_left) {
dy = c[ns + 1];
y += dy;
} else {
dy = d[ns];
y += dy;
ns -= 1;
}
}
return y;
}
};
} // namespace numerics

View File

@@ -0,0 +1,79 @@
#pragma once
#include "./numerics/interpolation1d/interpolation1d_base.h"
#include "./utils/vector.h"
#include "./numerics/abs.h"
namespace numerics{
template <typename T>
struct interp_rational : Base_interp<T> {
using Base = Base_interp<T>;
// bring base data members into scope (or use this->xx / this->yy below)
using Base::xx;
using Base::yy;
using Base::mm;
T dy;
interp_rational(const utils::Vector<T> &xv, const utils::Vector<T> &yv, uint64_t m)
: Base_interp<T>(xv, &yv[0], m), dy(T{0}){}
T rawinterp(int64_t jl, T x) override{
const T TINY = T{1.0e-99};
int64_t ns=0;
T y, w, t, hh, h, dd;
const T *xa = &xx[jl], *ya = &yy[jl];
utils::Vector<T> c(mm, T{0}), d(mm, T{0});
hh = numerics::abs(x - xa[0]);
for (int64_t i = 0; i < mm; ++i){
h = numerics::abs(x-xa[i]);
if (h == T{0}){
dy = T{0};
return ya[i];
}else if (h < hh){
ns = i;
hh = h;
}
c[i] = ya[i];
d[i] = ya[i] + TINY; // The TINY part is needed to prevent a rare zero-over-zero condition.
}
y = ya[ns];
ns -= 1;
for (int64_t m = 1; m < mm; ++m){
for (int64_t i = 0; i < mm-m; ++i){
w = c[i+1] - d[i];
h = xa[i+m] - x; // h will never be zero, since this was tested in the initializing loop.
t = (xa[i] - x)*d[i]/h;
dd = t - c[i+1];
if (dd == T{0}){ // This error condition indicates that the interpolating function has a pole at the requested value of x.
throw std::invalid_argument("Error in routine interp_rational"); //
}
dd = w/dd;
d[i] = c[i+1]*dd;
c[i] = t*dd;
}
const bool take_left = (2 * (ns + 1) < (mm - m));
if (take_left) {
dy = c[ns + 1];
} else {
dy = d[ns];
ns -= 1;
}
y += dy;
}
return y;
}
};
} // namespace numerics

View File

@@ -0,0 +1,45 @@
#ifndef _inverse_n_
#define _inverse_n_
#include "./utils/vector.h"
#include "./utils/matrix.h"
#include "./numerics/inverse/inverse_gauss_jordan.h"
#include "./numerics/inverse/inverse_lu.h"
#include <omp.h>
namespace numerics{
template <typename T>
void inplace_inverse(utils::Matrix<T>& A, std::string method = "Gauss-Jordan"){
if (A.rows() != A.cols()) {
throw std::runtime_error("inplace_inverse: non-square matrix");
}
if (method == "Gauss-Jordan"){
inverse_gj(A);
}
else if(method == "LU"){
inplace_inverse_lu(A);
}
else{
throw std::runtime_error("numerics::inplace_inverse(" + method + ") - Not implemented yet \r \nImplemented: 'Gauss-Jordan', 'LU'");
}
}
template <typename T>
utils::Matrix<T> inverse(utils::Matrix<T>& A, std::string method = "Gauss-Jordan"){
utils::Matrix<T> B = A;
inplace_inverse(B, method);
return B;
}
} // namespace numerics
#endif // _inverse_n_

View File

@@ -0,0 +1,100 @@
#ifndef _inverse_gj_n_
#define _inverse_gj_n_
#include "./utils/vector.h"
#include "./utils/matrix.h"
#include "./numerics/initializers/eye.h"
#include <omp.h>
namespace numerics{
template <typename T>
void inverse_gj(utils::Matrix<T>& A){
//utils::Matrix<T> B(A.rows(),A.cols(), T{0});
utils::Matrix<T> B;
B = eye_omp_auto<T>(A.rows());
uint64_t icol{0}, irow{0}, rows{A.rows()}, cols{A.cols()};
double big, dum, pivinv, temp;
utils::Vi indxc(rows,0), indxr(rows,0), ipiv(rows,0);
//for (uint64_t j = 0; j < N; ++j){ ipiv[j] = 0;}
for (uint64_t i = 0; i < rows; i++){
big = 0.0;
for (uint64_t j = 0; j < rows; j++){
if (ipiv[j] != 1){
for (uint64_t k = 0; k < rows; k++){
if (ipiv[k] == 0){
if (abs(A(j,k)) >= big){
big = abs(A(j,k));
irow = j;
icol = k;
}
}
}
}
}
if (big <= T{1e-14}){
throw std::runtime_error("utill:inplace_inverse('Gauss-Jordan' - Singular Matrix");
}
ipiv[icol]++;
if (irow != icol){
for (uint64_t l = 0; l < rows; l++){ // SWAP
temp = A(irow,l);
A(irow,l) = A(icol,l);
A(icol,l) = temp;
}
for (uint64_t l = 0; l < cols; l++){ // SWAP temp matrix
temp = B(irow,l);
B(irow,l) = B(icol,l);
B(icol,l) = temp;
}
}
indxr[i] = irow;
indxc[i] = icol;
if (A(icol,icol) == 0.0){
throw std::runtime_error("utill:inplace_inverse('Gauss-Jordan' - Singular Matrix");
}
pivinv= 1.0/A(icol,icol);
A(icol,icol)=1.0;
for (uint64_t l = 0; l < rows; l++){
A(icol,l) *= pivinv;
}
for (uint64_t l = 0; l < cols; l++){
B(icol,l) *= pivinv;
}
for (uint64_t ll = 0; ll < rows; ll++){
if (ll != icol){
dum = A(ll,icol);
A(ll,icol) = 0;
for (uint64_t l = 0; l < rows; l++){
A(ll,l) -= A(icol,l)*dum;
}
for (uint64_t l = 0; l < rows; l++){
B(ll,l) -= B(icol,l)*dum;
}
}
}
}
//m = temp_m;
for (int64_t l = rows-1; l >= 0; l--){
if (indxr[l] != indxc[l]){
for (uint64_t k = 0; k < rows; k++){
temp = A(k,indxr[l]);
A(k,indxr[l]) = A(k,indxc[l]);
A(k,indxc[l]) = temp;
}
}
}
}
} // namespace numerics
#endif // _inverse_gj_n_

View File

@@ -0,0 +1,18 @@
#pragma once
#include "./decomp/lu.h"
namespace numerics{
template <typename T>
void inplace_inverse_lu(utils::Matrix<T>& A){
if (A.rows() != A.cols()){
throw std::runtime_error("numerics inverse_lu: non-square matrix");
}
decomp::LUdcmp<T> lu(A);
lu.inplace_inverse(A);
}
}

226
include/numerics/matadd.h Normal file
View File

@@ -0,0 +1,226 @@
#ifndef _matadd_n_
#define _matadd_n_
#include "./utils/vector.h"
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics{
template <typename T>
void inplace_matadd_colvec(utils::Matrix<T>& A, const utils::Vector<T>& x) {
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
if (rows != x.size()) {
throw std::runtime_error("inplace_matadd_colvec: dimension mismatch");
}
for (uint64_t i = 0; i < cols; ++i) {
for (uint64_t j = 0; j < rows; ++j) {
A(j, i) += x[j];
}
}
}
template <typename T>
void inplace_matadd_rowvec(utils::Matrix<T>& A, const utils::Vector<T>& x) {
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
if (cols != x.size()) {
throw std::runtime_error("inplace_matadd_rowvec: dimension mismatch");
}
for (uint64_t i = 0; i < cols; ++i) {
for (uint64_t j = 0; j < rows; ++j) {
A(j, i) += x[i];
}
}
}
template <typename T>
utils::Matrix<T> matadd_colvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
//const uint64_t rows = A.rows();
//const uint64_t cols = A.cols();
utils::Matrix<T> B = A;
inplace_matadd_colvec(B, x);
return B;
}
template <typename T>
utils::Matrix<T> matadd_rowvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
//const uint64_t rows = A.rows();
//const uint64_t cols = A.cols();
utils::Matrix<T> B = A;
inplace_matadd_rowvec(B, x);
return B;
}
template <typename T>
utils::Matrix<T> matadd(const utils::Matrix<T>& A, const utils::Vector<T>& x, std::string method = "auto"){
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
const uint64_t N = x.size();
if (method=="auto"){
if (rows==cols){
throw std::runtime_error("matadd: too many options for dimensions");
} else if (rows == N){
return matadd_rowvec(A, x);
} else if (cols == N){
return matadd_colvec(A, x);
}else{
throw std::runtime_error("matadd: undefined fault - auto");
}
}else if(method=="row"){
return matadd_rowvec(A, x);
} else if (method=="col"){
return matadd_colvec(A, x);
}else{
throw std::runtime_error("matadd: undefined fault - defined method");
}
}
/*
// -------------- Collapse(2) OpenMP ----------------
template <typename T>
utils::Vector<T> matvec_omp(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
if (A.cols() != x.size()) {
throw std::runtime_error("matvec: dimension mismatch");
}
const uint64_t m = A.rows();
const uint64_t n = A.cols();
utils::Vector<T> y(m, T{0}); // <-- y has length m (rows)
const T* xptr = x.data();
const T* Aptr = A.data(); // row-major: A(i,j) == Aptr[i*n + j]
// Each row i is an independent dot product: y[i] = dot(A[i,*], x)
#pragma omp parallel for schedule(static)
for (uint64_t i = 0; i < m; ++i) {
const T* row = Aptr + i * n; // contiguous row i
T acc = T{0};
#pragma omp simd reduction(+:acc)
for (uint64_t j = 0; j < n; ++j) {
acc += row[j] * xptr[j];
}
y[i] = acc;
}
return y;
}
// -------------- Auto OpenMP ----------------
template <typename T>
utils::Vector<T> matvec_auto(const utils::Matrix<T>& A,
const utils::Vector<T>& x) {
uint64_t work = A.rows() * A.cols();
bool can_parallel = omp_config::omp_parallel_allowed();
#ifdef _OPENMP
int threads = omp_get_max_threads();
#else
int threads = 1;
#endif
if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
return matvec_omp(A,x);
}
else{
// Safe fallback
return matvec(A,x);
}
}
// =================================================
// y = x * A (VectorMatrix product)
// =================================================
template <typename T>
utils::Vector<T> vecmat(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
if (x.size() != A.rows()) {
throw std::runtime_error("vecmat: dimension mismatch");
}
const uint64_t m = A.rows();
const uint64_t n = A.cols();
utils::Vector<T> y(n, T{0});
for (uint64_t j = 0; j < n; ++j) {
for (uint64_t i = 0; i < m; ++i) {
y[j] += x[i] * A(i, j);
}
}
return y;
}
// -------------- Collapse(2) OpenMP ----------------
template <typename T>
utils::Vector<T> vecmat_omp(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
if (x.size() != A.rows()) {
throw std::runtime_error("vecmat: dimension mismatch");
}
const uint64_t m = A.rows();
const uint64_t n = A.cols();
utils::Vector<T> y(n, T{0});
#pragma omp parallel for schedule(static)
for (uint64_t j = 0; j < n; ++j) {
T acc = T{0};
for (uint64_t i = 0; i < m; ++i) {
acc += x[i] * A(i, j);
}
y[j] = acc;
}
return y;
}
// -------------- Auto OpenMP ----------------
template <typename T>
utils::Vector<T> vecmat_auto(const utils::Vector<T>& x,
const utils::Matrix<T>& A) {
uint64_t work = A.rows() * A.cols();
bool can_parallel = omp_config::omp_parallel_allowed();
#ifdef _OPENMP
int threads = omp_get_max_threads();
#else
int threads = 1;
#endif
if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
return vecmat_omp(x,A);
}
else{
// Safe fallback
return vecmat(x,A);
}
}
*/
} // namespace numerics
#endif // _matadd_n_

38
include/numerics/matdiv.h Normal file
View File

@@ -0,0 +1,38 @@
#ifndef _matdiv_n_
#define _matdiv_n_
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics{
// ---------------- Serial baseline ----------------
template <typename T>
utils::Matrix<T> matdiv(const utils::Matrix<T>& A, const utils::Vector<T>& b, std::string method){
utils::Matrix<T> C = A;
if (method == "row"){
for (uint64_t i = 0; i < A.rows(); ++i){
for (uint64_t j = 0; j < A.cols(); ++j){
C(i,j) /= b[j];
}
}
}else if (method == "col"){
for (uint64_t i = 0; i < A.rows(); ++i){
for (uint64_t j = 0; j < A.cols(); ++j){
C(i,j) /= b[i];
}
}
}else{
throw std::runtime_error("matdiv: choose div by: 'row' or 'col'");
}
return C;
}
} // namespace numerics
#endif // _matdiv_n_

View File

@@ -0,0 +1,85 @@
#pragma once
#include "./core/omp_config.h"
#include "./utils/matrix.h"
#include "./numerics/abs.h"
namespace numerics{
// -------------- Serial ----------------
template <typename T>
bool matequal(const utils::Matrix<T>& A, const utils::Matrix<T>& B, double tol = 1e-9) {
if (A.rows() != B.rows() || A.cols() != B.cols()) {
return false;
}
bool decimal = std::is_floating_point<T>::value;
const uint64_t rows=A.rows(), cols=A.cols();
for (uint64_t i = 0; i < rows; ++i)
for (uint64_t j = 0; j < cols; ++j)
if (decimal) {
if (numerics::abs(A(i,j) - B(i,j)) > static_cast<T>(tol)){
return false;
}
} else {
if (A(i,j) != B(i,j)){
return false;
}
}
return true;
}
// -------------- Parallel ----------------
template <typename T>
bool matequal_omp(const utils::Matrix<T>& A, const utils::Matrix<T>& B, double tol = 1e-9) {
if (A.rows() != B.rows() || A.cols() != B.cols()) {
return false;
}
bool decimal = std::is_floating_point<T>::value;
bool eq = true;
const uint64_t rows=A.rows(), cols=A.cols();
#pragma omp parallel for collapse(2) schedule(static) reduction(&&:eq)
for (uint64_t i = 0; i < rows; ++i)
for (uint64_t j = 0; j < cols; ++j)
if (decimal) {
eq = eq && (numerics::abs(A(i,j) - B(i,j)) <= static_cast<T>(tol));
} else {
eq = eq && (A(i,j) == B(i,j));
}
return eq;
}
// -------------- Auto OpenMP ----------------
template <typename T>
bool matequal_auto(const utils::Matrix<T>& A, const utils::Matrix<T>& B, double tol = 1e-9) {
if (A.rows() != B.rows() || A.cols() != B.cols()) {
return false;
}
uint64_t work = A.rows() * A.cols();
#ifdef _OPENMP
bool can_parallel = omp_config::omp_parallel_allowed();
uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
#else
bool can_parallel = false;
uint64_t threads = 1;
#endif
if (can_parallel || work > threads * 4ull) {
return matequal_omp(A,B,tol);
}
else{
// Safe fallback
return matequal(A,B,tol);
}
}
} // namespace numerics

124
include/numerics/matmul.h Normal file
View File

@@ -0,0 +1,124 @@
#ifndef _matmul_n_
#define _matmul_n_
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics{
// ---------------- Serial baseline ----------------
template <typename T>
utils::Matrix<T> matmul(const utils::Matrix<T>& A, const utils::Matrix<T>& B){
if(A.cols() != B.rows()){
throw std::runtime_error("matmul: dimension mismatch");
}
const uint64_t m = A.rows();
const uint64_t n = A.cols(); // also B.rows()
const uint64_t p = B.cols();
T tmp;
utils::Matrix<T> C(m, p, T{0});
for (uint64_t i = 0; i < m; ++i){
for (uint64_t j = 0; j < n; ++j){
tmp = A(i,j);
for (uint64_t k = 0; k < p; ++k){
C(i,k) += tmp * B(j,k);
}
}
}
return C;
}
// ---------------- Rows-only OpenMP ----------------
template <typename T>
utils::Matrix<T> matmul_rows_omp(const utils::Matrix<T>& A,
const utils::Matrix<T>& B) {
if (A.cols() != B.rows()) throw std::runtime_error("matmul_rows_omp: dim mismatch");
const uint64_t m=A.rows(), n=A.cols(), p=B.cols();
utils::Matrix<T> C(m, p, T{0});
#pragma omp parallel for schedule(static)
for (uint64_t i=0;i<m;++i) {
for (uint64_t j=0;j<p;++j) {
T acc=T{0};
for (uint64_t k=0;k<n;++k) {
acc += A(i,k)*B(k,j);
}
C(i,j)=acc;
}
}
return C;
}
// -------------- Collapse(2) OpenMP ----------------
template <typename T>
utils::Matrix<T> matmul_collapse_omp(const utils::Matrix<T>& A,
const utils::Matrix<T>& B) {
if (A.cols() != B.rows()) throw std::runtime_error("matmul_collapse_omp: dim mismatch");
const uint64_t m=A.rows(), n=A.cols(), p=B.cols();
utils::Matrix<T> C(m, p, T{0});
#pragma omp parallel for collapse(2) schedule(static)
for (uint64_t i=0;i<m;++i) {
for (uint64_t j=0;j<p;++j) {
T acc=T{0};
for (uint64_t k=0;k<n;++k){
acc += A(i,k)*B(k,j);
}
C(i,j)=acc;
}
}
return C;
}
// -------------------- Auto selector ---------------------
template <typename T>
utils::Matrix<T> matmul_auto(const utils::Matrix<T>& A,
const utils::Matrix<T>& B) {
const uint64_t m=A.rows(), p=B.cols();
const uint64_t work = m * p;
#ifdef _OPENMP
bool can_parallel = omp_config::omp_parallel_allowed();
uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
#else
bool can_parallel = false;
uint64_t threads = 1;
#endif
// Tiny problems: serial is cheapest.
if (!can_parallel || work < threads*4ull) {
return matmul(A,B);
}
// Plenty of (i,j) work → collapse(2) is a great default.
else if (work >= 8ull * threads) {
return matmul_collapse_omp(A,B);
}
// Many rows and very few columns → rows-only cheaper overhead.
else if (m >= static_cast<uint64_t>(threads) && p <= 4) {
return matmul_rows_omp(A,B);
}
else{
// Safe fallback
return matmul(A,B);
}
}
} // namespace numerics
#endif // _matmul_n_

View File

@@ -0,0 +1,102 @@
#ifndef _matsubtract_n_
#define _matsubtract_n_
#include "./utils/vector.h"
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics{
template <typename T>
void inplace_matsubtract_colvec(utils::Matrix<T>& A, const utils::Vector<T>& x) {
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
if (rows != x.size()) {
throw std::runtime_error("inplace_matsubtract_colvec: dimension mismatch");
}
for (uint64_t i = 0; i < cols; ++i) {
for (uint64_t j = 0; j < rows; ++j) {
A(j, i) -= x[j];
}
}
}
template <typename T>
void inplace_matsubtract_rowvec(utils::Matrix<T>& A, const utils::Vector<T>& x) {
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
if (cols != x.size()) {
throw std::runtime_error("inplace_matsubtract_rowvec: dimension mismatch");
}
for (uint64_t i = 0; i < cols; ++i) {
for (uint64_t j = 0; j < rows; ++j) {
A(j, i) -= x[i];
}
}
}
template <typename T>
utils::Matrix<T> matsubtract_colvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
//const uint64_t rows = A.rows();
//const uint64_t cols = A.cols();
utils::Matrix<T> B = A;
inplace_matsubtract_colvec(B, x);
return B;
}
template <typename T>
utils::Matrix<T> matsubtract_rowvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
//const uint64_t rows = A.rows();
//const uint64_t cols = A.cols();
utils::Matrix<T> B = A;
inplace_matsubtract_rowvec(B, x);
return B;
}
template <typename T>
utils::Matrix<T> matsubtract(const utils::Matrix<T>& A, const utils::Vector<T>& x, std::string method = "auto"){
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
const uint64_t N = x.size();
if (method=="auto"){
if (rows==cols){
throw std::runtime_error("matsubtract: too many options for dimensions");
} else if (rows == N){
return matsubtract_rowvec(A, x);
} else if (cols == N){
return matsubtract_colvec(A, x);
}else{
throw std::runtime_error("matsubtract: undefined fault - auto");
}
}else if(method=="row"){
return matsubtract_rowvec(A, x);
} else if (method=="col"){
return matsubtract_colvec(A, x);
}else{
throw std::runtime_error("matsubtract: undefined fault - defined method");
}
}
} // namespace numerics
#endif // _matsubtract_n_

39
include/numerics/matsum.h Normal file
View File

@@ -0,0 +1,39 @@
#ifndef _matsum_n_
#define _matsum_n_
#include "./utils/vector.h"
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics{
template <typename T>
utils::Vector<T> matsum(utils::Matrix<T>& A, std::string method) {
utils::Vector<T> b;
if (method == "row"){
b.resize(A.cols(), T{0});
for (uint64_t i = 0; i < A.cols(); ++i){
for (uint64_t j = 0; j < A.rows(); ++j){
b[i] += A(j, i);
}
}
}else if (method == "col"){
b.resize(A.rows(), T{0});
for (uint64_t i = 0; i < A.cols(); ++i){
for (uint64_t j = 0; j < A.rows(); ++j){
b[j] += A(j, i);
}
}
}else{
throw std::runtime_error("matsum: choose sum by: 'row' or 'col'");
}
return b;
}
} // namespace numerics
#endif // _matadd_n_

156
include/numerics/matvec.h Normal file
View File

@@ -0,0 +1,156 @@
#ifndef _matvec_n_
#define _matvec_n_
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics{
// =================================================
// y = A * x (MatrixVector product)
// =================================================
template <typename T>
utils::Vector<T> matvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
if (A.cols() != x.size()) {
throw std::runtime_error("matvec: dimension mismatch");
}
const uint64_t m = A.rows();
const uint64_t n = A.cols();
utils::Vector<T> y(m, T{0});
for (uint64_t i = 0; i < m; ++i) {
for (uint64_t j = 0; j < n; ++j) {
y[i] += A(i, j) * x[j];
}
}
return y;
}
// -------------- Collapse(2) OpenMP ----------------
template <typename T>
utils::Vector<T> matvec_omp(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
if (A.cols() != x.size()) {
throw std::runtime_error("matvec: dimension mismatch");
}
const uint64_t m = A.rows();
const uint64_t n = A.cols();
utils::Vector<T> y(m, T{0}); // <-- y has length m (rows)
const T* xptr = x.data();
const T* Aptr = A.data(); // row-major: A(i,j) == Aptr[i*n + j]
// Each row i is an independent dot product: y[i] = dot(A[i,*], x)
#pragma omp parallel for schedule(static)
for (uint64_t i = 0; i < m; ++i) {
const T* row = Aptr + i * n; // contiguous row i
T acc = T{0};
#pragma omp simd reduction(+:acc)
for (uint64_t j = 0; j < n; ++j) {
acc += row[j] * xptr[j];
}
y[i] = acc;
}
return y;
}
// -------------- Auto OpenMP ----------------
template <typename T>
utils::Vector<T> matvec_auto(const utils::Matrix<T>& A,
const utils::Vector<T>& x) {
uint64_t work = A.rows() * A.cols();
bool can_parallel = omp_config::omp_parallel_allowed();
#ifdef _OPENMP
int threads = omp_get_max_threads();
#else
int threads = 1;
#endif
if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
return matvec_omp(A,x);
}
else{
// Safe fallback
return matvec(A,x);
}
}
// =================================================
// y = x * A (VectorMatrix product)
// =================================================
template <typename T>
utils::Vector<T> vecmat(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
if (x.size() != A.rows()) {
throw std::runtime_error("vecmat: dimension mismatch");
}
const uint64_t m = A.rows();
const uint64_t n = A.cols();
utils::Vector<T> y(n, T{0});
for (uint64_t j = 0; j < n; ++j) {
for (uint64_t i = 0; i < m; ++i) {
y[j] += x[i] * A(i, j);
}
}
return y;
}
// -------------- Collapse(2) OpenMP ----------------
template <typename T>
utils::Vector<T> vecmat_omp(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
if (x.size() != A.rows()) {
throw std::runtime_error("vecmat: dimension mismatch");
}
const uint64_t m = A.rows();
const uint64_t n = A.cols();
utils::Vector<T> y(n, T{0});
#pragma omp parallel for schedule(static)
for (uint64_t j = 0; j < n; ++j) {
T acc = T{0};
for (uint64_t i = 0; i < m; ++i) {
acc += x[i] * A(i, j);
}
y[j] = acc;
}
return y;
}
// -------------- Auto OpenMP ----------------
template <typename T>
utils::Vector<T> vecmat_auto(const utils::Vector<T>& x,
const utils::Matrix<T>& A) {
uint64_t work = A.rows() * A.cols();
bool can_parallel = omp_config::omp_parallel_allowed();
#ifdef _OPENMP
int threads = omp_get_max_threads();
#else
int threads = 1;
#endif
if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
return vecmat_omp(x,A);
}
else{
// Safe fallback
return vecmat(x,A);
}
}
} // namespace numerics
#endif // _matvec_n_

76
include/numerics/max.h Normal file
View File

@@ -0,0 +1,76 @@
#pragma once
#include "./utils/vector.h"
#include "./utils/matrix.h"
namespace numerics{
template <typename T>
T max(const T a, const T b){
if(a < b){
return b;
}else{
return a;
}
}
template <typename T>
void inplace_max(utils::Matrix<T>& A, const T b){
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
for (uint64_t i = 0; i < rows; ++i){
for (uint64_t j = 0; j < cols; ++j){
if (b > A(i,j)){
//std::cout << A(i,j) << std::endl;
A(i,j) = b;
//std::cout << A(i,j) << std::endl;
}
}
}
}
template <typename T>
utils::Matrix<T> max(const utils::Matrix<T>& A, const T b){
utils::Matrix<T> B = A;
inplace_max(B, b);
return B;
}
template <typename T>
utils::Vector<T> max(const utils::Matrix<T>& A, std::string method){
utils::Vector<T> b;
if (method == "cols"){
b.resize(A.cols(), T{0});
for (uint64_t i = 0; i < A.cols(); ++i){
for (uint64_t j = 0; j < A.rows(); ++j){
b[i] = max(A(j, i), b[i]);
}
}
}else if (method == "rows"){
b.resize(A.rows(), T{0});
for (uint64_t i = 0; i < A.rows(); ++i){
for (uint64_t j = 0; j < A.cols(); ++j){
//std::cout << i << ":" << j << std::endl;
b[i] = max(A(i, j), b[i]);
}
}
}else{
throw std::runtime_error("max: choose 'rows or 'cols'");
}
return b;
}
} // namespace numerics

31
include/numerics/mean.h Normal file
View File

@@ -0,0 +1,31 @@
#ifndef _mean_n_
#define _mean_n_
#include "./utils/vector.h"
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics{
template <typename T>
T mean(utils::Vector<T>& A) {
T mean(T{0});
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
for (uint64_t i = 0; i < cols; ++i) {
for (uint64_t j = 0; j < rows; ++j) {
mean += A(j, i);
}
}
mean /= (static_cast<T>(rows)* static_cast<T>(cols));
return mean;
}
} // namespace numerics
#endif // _mean_n_

21
include/numerics/min.h Normal file
View File

@@ -0,0 +1,21 @@
#pragma once
#include "./utils/vector.h"
#include "./utils/matrix.h"
namespace numerics{
template <typename T>
T min(const T a, const T b){
if(a < b){
return a;
}else{
return b;
}
}
} // namespace numerics

View File

@@ -0,0 +1,21 @@
// "./numerics/numerics.h"
#pragma once
#include "./numerics/initializers/eye.h"
#include "./numerics/matequal.h"
#include "./numerics/transpose.h"
#include "./numerics/inverse.h"
#include "./numerics/matmul.h"
#include "./numerics/matdiv.h"
#include "./numerics/matvec.h"
#include "./numerics/matadd.h"
#include "./numerics/matsubtract.h"
#include "./numerics/matsum.h"
#include "./numerics/min.h"
#include "./numerics/max.h"
#include "./numerics/abs.h"
#include "./numerics/mean.h"
#include "./numerics/exponential.h"
#include "./numerics/interpolation1d.h" // base

View File

@@ -0,0 +1,156 @@
#ifndef _transpose_n_
#define _transpose_n_
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics{
template <typename T>
void inplace_transpose_square(utils::Matrix<T>& A){
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
if (rows != cols){
throw std::runtime_error("inplace_transpose only valid for square matrices");
}
for (uint64_t i = 0; i < rows; ++i){
for (uint64_t j = i + 1; j < cols; ++j){
T tmp = A(j,i);
A(j,i) = A(i,j);
A(i,j) = tmp;
//std::swap(A(j,i), A(i,j));
}
}
}
template <typename T>
void inplace_transpose_square_omp(utils::Matrix<T>& A){
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
if (rows != cols){
throw std::runtime_error("inplace_transpose only valid for square matrices");
}
#pragma omp parallel for schedule(static)
for (uint64_t i = 0; i < rows; ++i){
for (uint64_t j = i + 1; j < cols; ++j){
T tmp = A(j,i);
A(j,i) = A(i,j);
A(i,j) = tmp;
//std::swap(A(j,i), A(i,j));
}
}
}
template <typename T>
utils::Matrix<T> transpose(const utils::Matrix<T>& A){
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
utils::Matrix<T> B(cols, rows, T{0});
for (uint64_t i = 0; i < rows; ++i){
for (uint64_t j = 0; j < cols; ++j){
B(j,i) = A(i,j);
}
}
return B;
}
template <typename T>
utils::Matrix<T> transpose_omp(const utils::Matrix<T>& A){
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
utils::Matrix<T> B(cols, rows, T{0});
#pragma omp parallel for collapse(2) schedule(static)
for (uint64_t i = 0; i < rows; ++i){
for (uint64_t j = 0; j < cols; ++j){
B(j,i) = A(i,j);
}
}
return B;
}
// -------- Auto selectors --------
template <typename T>
void inplace_transpose_square_auto(utils::Matrix<T>& A) {
const uint64_t rows = A.rows(), cols = A.cols();
if (rows != cols) {
throw std::runtime_error("inplace_transpose_auto: only valid for square matrices");
}
const std::uint64_t work = static_cast<std::uint64_t>((rows * (rows - 1)) / 2); // number of swaps
#ifdef _OPENMP
bool can_parallel = omp_config::omp_parallel_allowed();
uint64_t threads = static_cast<std::uint64_t>(omp_get_max_threads());
#else
bool can_parallel = false;
uint64_t threads = 1;
#endif
if (can_parallel && work > threads * 4ull) {
inplace_transpose_square_omp(A);
}else {
inplace_transpose_square(A);
}
}
template <typename T>
utils::Matrix<T> transpose_auto(const utils::Matrix<T>& A) {
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
uint64_t work = A.rows() * A.cols();
if (rows==cols){
utils::Matrix<T> B = A;
inplace_transpose_square_auto(B);
return B;
}
#ifdef _OPENMP
bool can_parallel = omp_config::omp_parallel_allowed();
uint64_t threads = static_cast<std::uint64_t>(omp_get_max_threads());
#else
bool can_parallel = false;
uint64_t threads = 1;
#endif
if (!can_parallel || work > threads * 4ull) {
return transpose_omp(A);
} else {
return transpose(A);
}
}
} // namespace numerics
#endif // _transpose_n_