Sync public subset from Flux (private)

2025-10-06 20:14:13 +00:00
parent 272e77c536
commit b2d00af0e1
390 changed files with 152131 additions and 0 deletions
--- a/include/numerics/abs.h
+++ b/include/numerics/abs.h
@@ -0,0 +1,23 @@
+#pragma once
+
+
+#include "./utils/vector.h"
+#include "./utils/matrix.h"
+
+
+namespace numerics{
+	
+	template <typename T>
+	T abs(const T a){
+
+		if(a < 0){
+			return -a;
+		}else{
+			return a;
+		}
+	}
+
+
+
+} // namespace numerics
+
--- a/include/numerics/exponential.h
+++ b/include/numerics/exponential.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <cmath>
+
+#include "./utils/vector.h"
+#include "./utils/matrix.h"
+
+
+namespace numerics{
+	
+	template <typename T>
+	T exponential(const T a){
+		return std::exp(a);
+	}
+
+	template <typename T>
+	utils::Vector<T>  exponential(const utils::Vector<T>& a){
+		utils::Vector<T> b = a;
+		for (uint64_t i = 0; i < a.size(); ++i){
+			b[i] =  numerics::exponential(a[i]);
+		}
+		return b;
+	}
+
+	template <typename T>
+	utils::Matrix<T>  exponential(const utils::Matrix<T>& A){
+		utils::Matrix<T> B = A;
+		for (uint64_t i = 0; i < A.rows(); ++i){
+			for (uint64_t j = 0; j < A.cols(); ++j){
+				B(i,j) = numerics::exponential(A(i,j));
+			}
+		}
+		return B;
+	}
+
+
+
+} // namespace numerics
+
--- a/include/numerics/initializers/eye.h
+++ b/include/numerics/initializers/eye.h
@@ -0,0 +1,138 @@
+#pragma once
+#include "./utils/matrix.h"
+#include "./core/omp_config.h"
+
+
+namespace numerics {
+
+	template <typename T>
+	void inplace_eye(utils::Matrix<T>& A, uint64_t N = 0){
+
+		bool need_full_zero = true;
+
+		if (N != 0){
+			A.resize(N,N,T{0});
+			need_full_zero = false;
+		}else{
+			N = A.rows();
+		    if (N != A.cols()) {
+		        throw std::runtime_error("inplace_eye: non-square matrix");
+		    }
+		}
+		// 1) Zero the whole matrix if we didn't just resize with zeros
+		if (need_full_zero){
+			for (uint64_t i = 0; i < N; ++i){
+				for (uint64_t j = 0; j < N; ++j){
+					if (i==j){
+						A(i,j) = T{1};
+					}else{
+						A(i,j) = T{0};
+					}
+				}
+			}
+		}else{
+			for (uint64_t i = 0; i < N; ++i){
+				A(i,i) = T{1};
+			}
+		}
+
+	}
+
+
+	template <typename T>
+	void inplace_eye_omp(utils::Matrix<T>& A, uint64_t N = 0){
+
+		bool need_full_zero = true;
+
+		if (N != 0){
+			A.resize(N,N,T{0});
+			need_full_zero = false;
+		}else{
+			N = A.rows();
+		    if (N != A.cols()) {
+		        throw std::runtime_error("inplace_eye_omp: non-square matrix");
+		    }
+		}
+
+		// 1) Zero the whole matrix if we didn't just resize with zeros
+		if (need_full_zero){
+			T* ptr = A.data();
+			uint64_t NN = N*N;
+			#pragma omp parallel for schedule(static)
+			for (uint64_t i = 0; i < NN; ++i){
+				ptr[i] = T{0};
+			}
+		}
+		// 2) Set the diagonal to 1
+		#pragma omp parallel for schedule(static)
+		for (uint64_t i = 0; i < N; ++i){
+			A(i,i) = T{1};
+		}
+
+	}
+
+	template <typename T>
+	utils::Matrix<T> eye(uint64_t N){
+		utils::Matrix<T> A;
+		inplace_eye(A, N);
+		return A;
+
+	}
+
+	template <typename T>
+	utils::Matrix<T> eye_omp(uint64_t N){
+		utils::Matrix<T> A;
+		inplace_eye_omp(A, N);
+		return A;
+	}
+
+	template <typename T>
+	utils::Matrix<T> eye_omp_auto(uint64_t N){
+
+	    uint64_t work = N*N;
+	    utils::Matrix<T> A(N,N,T{0});
+
+		#ifdef _OPENMP
+			bool can_parallel = omp_config::omp_parallel_allowed();
+		    uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
+		#else
+		    bool can_parallel = false;
+		    uint64_t threads = 1;
+		#endif
+
+	    if (can_parallel || work > threads * 4ull) {
+	        inplace_eye_omp(A, 0);
+	    }
+	    else{
+	    	// Safe fallback
+	    	inplace_eye(A, 0);
+	    }
+
+		return A;
+	}
+ // Untested:
+	template <typename T>
+	void inplace_eye_omp_auto(utils::Matrix<T>& A, uint64_t N = 0){
+
+	    uint64_t work = N*N;
+
+		#ifdef _OPENMP
+			bool can_parallel = omp_config::omp_parallel_allowed();
+		    uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
+		#else
+		    bool can_parallel = false;
+		    uint64_t threads = 1;
+		#endif
+
+	    if (can_parallel || work > threads * 4ull) {
+	        inplace_eye_omp(A, 0);
+	    }
+	    else{
+	    	// Safe fallback
+	    	inplace_eye(A, 0);
+	    }
+	}
+
+
+
+} // namespace utils
--- a/include/numerics/interpolation1d.h
+++ b/include/numerics/interpolation1d.h
@@ -0,0 +1,9 @@
+#pragma once
+
+
+//#include "./numerics/interpolation1d/interpolation1d_base.h"
+#include "./numerics/interpolation1d/interpolation1d_barycentric.h"
+#include "./numerics/interpolation1d/interpolation1d_cubic_spline.h"
+#include "./numerics/interpolation1d/interpolation1d_linear.h"
+#include "./numerics/interpolation1d/interpolation1d_polynomial.h"
+#include "./numerics/interpolation1d/interpolation1d_rational.h"
--- a/include/numerics/interpolation1d/.gitkeep
+++ b/include/numerics/interpolation1d/.gitkeep
--- a/include/numerics/interpolation1d/interpolation1d_barycentric.h
+++ b/include/numerics/interpolation1d/interpolation1d_barycentric.h
@@ -0,0 +1,88 @@
+#pragma once
+
+#include "./numerics/interpolation1d/interpolation1d_base.h"
+
+#include "./utils/vector.h"
+#include "./numerics/min.h"
+#include "./numerics/max.h"
+
+
+namespace numerics{
+	
+	template <typename T>
+	struct interp_barycentric : Base_interp<T> {
+		using Base   = Base_interp<T>;
+
+	  	// bring base data members into scope (or use this->xx / this->yy below)
+	  	using Base::xx;
+	  	using Base::yy;
+	  	using Base::n;
+
+	  	utils::Vector<T> w;
+	  	int64_t d;
+
+		interp_barycentric(const utils::Vector<T> &xv, const utils::Vector<T> &yv, uint64_t dd)
+		: Base_interp<T>(xv, &yv[0], xv.size()), w(n,T{0}), d(dd) {
+			// Constructor arguments are x and y vectors of length n, and order d of desired approximation.
+			if (n <= d){
+				throw std::invalid_argument("d too large for number of points in interp_barycentric");
+			}
+			for (int64_t k = 0; k < n; ++k){
+				int64_t imin = numerics::max(k-d, static_cast<int64_t>(0));
+				int64_t imax;
+				if (k >= n - d) {
+				    imax = n - d - 1;
+				} else {
+				    imax = k;
+				}
+				T temp;
+				if ( (imin & 1) != 0 ) {   // odd?
+				    temp = T{-1};
+				} else {                   // even
+				    temp = T{1};
+				}
+				T sum = T{0};
+
+				for (int64_t i = imin; i <= imax; ++i){
+					int64_t jmax = numerics::min(i+d, n-1);
+					T term = T{1};
+					for (int64_t j = i; j <= jmax; ++j){
+						if (j == k){
+							continue;
+						}
+						term *= (xx[k] - xx[j]);
+					}
+					term = temp/term;
+					temp = -temp;
+					sum += term;
+				}
+				w[k] = sum;
+			}
+		}
+
+		T rawinterp(int64_t jl, T x) override{
+
+			T num{T{0}}, den{T{0}};
+			
+			for (int64_t i = 0; i < n; ++i){
+				T h = x - xx[i];
+				if (h == T{0}){
+					return yy[i];
+				}else{
+					T temp = w[i]/h;
+					num += temp*yy[i];
+					den += temp;
+				}
+			}
+			return num/den;
+		}
+
+
+		T interp(T x) {
+			return rawinterp(1, x);
+		}
+
+	};
+
+} // namespace numerics
+
--- a/include/numerics/interpolation1d/interpolation1d_base.h
+++ b/include/numerics/interpolation1d/interpolation1d_base.h
@@ -0,0 +1,151 @@
+#pragma once
+
+#include "./numerics/min.h"
+#include "./numerics/max.h"
+#include "./numerics/abs.h"
+
+#include "./utils/vector.h"
+
+
+namespace numerics{
+	
+	template <typename T>
+	struct Base_interp{
+
+		int64_t n, mm;
+		int64_t jsav, dj;
+		bool cor;
+		const T *xx, *yy;
+
+
+		Base_interp(const utils::Vector<T>& x, const T *y, uint64_t m)
+			:n(x.size()), mm(m), jsav(0), cor(false), xx(&x[0]), yy(y){
+			//dj = numerics::min(static_cast<int64_t>(1), static_cast<int64_t>(std::pow(static_cast<T>(n), 0.25))); // from NR
+			dj = numerics::max(static_cast<int64_t>(1), static_cast<int64_t>(std::pow(static_cast<T>(n), 0.25))); // from chatbot
+
+			if (mm < 2 || n < mm) throw std::invalid_argument("Base_interp: invalid mm or n");
+		  	if (!xx || !yy)       throw std::invalid_argument("Base_interp: null data pointers");
+		  	if (n < 2)            throw std::invalid_argument("Base_interp: need at least 2 points");
+
+			bool asc = false;
+			if (xx[0] < xx[1]){
+				asc = true;
+			}
+			for (int64_t i = 1; i < n; ++i){
+				if (!(xx[i] > xx[i-1]) && asc) {
+					throw std::invalid_argument("x must be strictly increasing");
+				} else if (!(xx[i] < xx[i-1]) && !asc){
+        			throw std::invalid_argument("x must be strictly decreasing");
+				}
+			}
+		}
+
+		T interp(T x){
+			int64_t jlo;
+			if (cor){
+				jlo = hunt(x);
+			}
+			else{
+				jlo = locate(x);
+			}
+			return rawinterp(jlo,x);
+		}
+
+		// Derived classes provide this as the actual interpolation method.
+		T virtual rawinterp(int64_t jlo, T x) = 0;
+
+
+		int64_t locate(const T x){
+			int64_t ju, jl;
+			int64_t jm;
+
+			if (n < 2 || mm < 2 || mm > n){
+				throw std::runtime_error("Interpolate: locate size error");	
+			}
+
+			bool ascnd = (xx[n-1] >= xx[0]);			// True if ascending order of table, false otherwise.
+			jl = 0;										// Initialize lower
+			ju = n-1;									// and upper limits.
+			while (ju - jl > 1) {						// If we are not yet done,
+				jm = (ju+jl) >> 1;						// compute a midpoint,
+				if ((x >= xx[jm]) == ascnd){
+					jl=jm;								// and replace either the lower limit
+				}else{
+					ju=jm;								// or the upper limit, as appropriate.
+				}
+			}											// Repeat until the test condition is satisﬁed.
+
+			if (std::abs(jl - jsav) > dj){				// Decide whether to use hunt or locate next time.
+				cor = false;
+			}else{
+				cor = true;
+			}
+			jsav = jl;
+			return numerics::max(static_cast<int64_t>(0), numerics::min(n-mm, jl-((mm-2)>>1)));
+		}
+
+		int64_t hunt(const T x){
+			int64_t jl=jsav, jm, ju, inc=1;
+
+			if (n < 2 || mm < 2 || mm > n){
+				throw std::runtime_error("Interpolate: hunt size error");	
+			}
+			bool ascnd=(xx[n-1] >= xx[0]);				// True if ascending order of table, false otherwise.
+			if (jl < 0 || jl > n-1) {					// Input guess not useful. Go immediately to bisection.
+				jl=0;
+				ju=n-1;
+			}else{
+				if ((x >= xx[jl]) == ascnd){			// Hunt up:
+					for (;;){
+						ju = jl + inc;
+						if (ju >= n-1){
+							ju = n-1;
+							break;						// Off end of table.
+						}else if((x < xx[ju]) == ascnd){
+							break;						// Found bracket.
+						}else{							// Not done, so double the increment and try again.
+							jl = ju;
+							inc += inc;
+						}
+					}	
+				}else{									// Hunt down:
+					ju = jl;
+					for (;;){
+						jl = jl - inc;
+						if (jl <= 0){					//Off end of table.
+							jl = 0;	
+							break;
+						}else if((x >= xx[jl]) == ascnd){
+							break;						// Found bracket.
+						}
+						else{							// Not done, so double the increment and try again.
+							ju = jl;
+							inc += inc;
+						}
+					}
+				}
+			}
+
+
+			while(ju-jl > 1){							// Hunt is done, so begin the ﬁnal bisection phase:
+				jm = (ju+jl) >> 1;
+				if ((x >= xx[jm]) == ascnd){
+					jl =jm;
+				}else{
+					ju=jm;
+				}
+			}
+			if (numerics::abs(jl-jsav) > dj){
+				cor = false;
+			}else{
+				cor = true;
+			}
+			jsav = jl;
+			return numerics::max(static_cast<int64_t>(0), numerics::min(n-mm, jl-((mm-2)>>1)));
+
+		}
+
+	};
+
+} // namespace numerics
+
--- a/include/numerics/interpolation1d/interpolation1d_cubic_spline.h
+++ b/include/numerics/interpolation1d/interpolation1d_cubic_spline.h
@@ -0,0 +1,86 @@
+#pragma once
+
+#include "./numerics/interpolation1d/interpolation1d_base.h"
+
+//#include "./numerics/abs.h"
+#include "./utils/vector.h"
+
+
+namespace numerics{
+	
+	template <typename T>
+	struct interp_cubic_spline : Base_interp<T> {
+		using Base   = Base_interp<T>;
+	  	// bring base data members into scope (or use this->xx / this->yy below)
+	  	using Base::xx;
+	  	using Base::yy;
+	  	//using Base::mm;
+
+	  	utils::Vector<T> y2;
+
+
+		interp_cubic_spline(utils::Vector<T> &xv, utils::Vector<T> &yv, T yp1=T{1.e99}, T ypn=T{1.e99})
+		: Base_interp<T>(xv, &yv[0], 2), y2(xv.size(),T{0}) {
+			sety2(&xv[0], &yv[0], yp1, ypn);
+		}
+
+		interp_cubic_spline(utils::Vector<T> &xv, const T *yv, T yp1=T{1.e99}, T ypn=T{1.e99})
+		: Base_interp<T>(xv, yv, 2), y2(xv.size(),T{0}) {
+			sety2(&xv[0], yv, yp1, ypn);
+		}
+
+
+		void sety2(const T *xv, const T *yv, T yp1, T ypn){
+
+			T p, qn, sig, un;
+			uint64_t n = y2.size();
+			utils::Vector<T> u(n-1, T{0});
+
+			if (yp1 > static_cast<T>(0.99e99)){						// The lower boundary condition is set either to be “natural”
+				y2[0] = u[0] = T{0};
+			}else{													// or else to have a speciﬁed ﬁrst derivative.
+				y2[0] = T{-0.5};
+				u[0] = (3.0/(xv[1]-xv[0]))*(((yv[1]-yv[0])/(xv[1]-xv[0]))-yp1);
+			}
+			for (uint64_t i = 1; i < n-1; ++i){						// This is the decomposition loop of the tridiagonal algorithm
+				sig = (xv[i]-xv[i-1])/(xv[i+1]-xv[i-1]);
+				p = sig*y2[i-1]+T{2};
+				y2[i] = (sig - T{1})/p;								// y2 and u are used for temporary storage of the decomposed factors.
+				u[i]=((yv[i+1]-yv[i])/(xv[i+1]-xv[i])) - ((yv[i]-yv[i-1])/(xv[i]-xv[i-1]));
+				u[i]=((T{6}*u[i]/(xv[i+1]-xv[i-1])) - sig*u[i-1])/p;
+			}
+			if (ypn > static_cast<T>(0.99e99)){						// The upper boundary condition is set either to be “natural”
+				qn = un = T{0};
+			}else{													// or else to have a speciﬁed ﬁrst derivative.
+				qn = T{0.5};
+				un = (T{3}/(xv[n-1]-xv[n-2]))*(ypn-((yv[n-1]-yv[n-2])/(xv[n-1]-xv[n-2])));
+			}
+			y2[n-1] = (un-(qn*u[n-2]))/((qn*y2[n-2])+T{1});
+			for (int64_t k = n-2; k >= 0; --k){
+				y2[k] = y2[k] * y2[k+1]+u[k];
+			}
+		}
+
+
+		T rawinterp(int64_t jl, T x) override{
+
+			int64_t klo=jl, khi=jl+1;
+			T y, h, b, a;
+
+			h = xx[khi] - xx[klo];
+			if (h == T{0}){											// The xa’s must be distinct.
+				throw std::invalid_argument("interp_cubic_spline: Bad input to routine splint");
+			}
+
+			a = (xx[khi] - x)/h;									// Cubic spline polynomial is now evaluated.
+			b = (x - xx[klo])/h;
+			y = a*yy[klo] + b*yy[khi] + ( ((a*a*a) - a)*y2[klo] + ((b*b*b) - b)*y2[khi] ) * (h*h) / T{6};
+
+
+			return y;
+		}
+
+	};
+
+} // namespace numerics
+
--- a/include/numerics/interpolation1d/interpolation1d_linear.h
+++ b/include/numerics/interpolation1d/interpolation1d_linear.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include "./numerics/interpolation1d/interpolation1d_base.h"
+
+
+namespace numerics{
+	
+	template <typename T>
+	struct interp_linear : Base_interp<T> {
+		using Base   = Base_interp<T>;
+	  	// bring base data members into scope (or use this->xx / this->yy below)
+	  	using Base::xx;
+	  	using Base::yy;
+
+
+		interp_linear(const utils::Vector<T> &xv, const utils::Vector<T> &yv): Base_interp<T>(xv, &yv[0], 2){}
+
+		T rawinterp(int64_t j, T x) override{
+			if (xx[j]==xx[j+1]){
+				return yy[j]; 			// Table is defective, but we can recover.
+			}else {
+				return (yy[j] + ((x-xx[j])/(xx[j+1]-xx[j]))*(yy[j+1]-yy[j]));
+			}
+		}
+
+	};
+
+} // namespace numerics
+
--- a/include/numerics/interpolation1d/interpolation1d_polynomial.h
+++ b/include/numerics/interpolation1d/interpolation1d_polynomial.h
@@ -0,0 +1,75 @@
+#pragma once
+
+#include "./numerics/interpolation1d/interpolation1d_base.h"
+
+#include "./numerics/abs.h"
+#include "./utils/vector.h"
+
+
+namespace numerics{
+	
+	template <typename T>
+	struct interp_polynomial : Base_interp<T> {
+		using Base   = Base_interp<T>;
+	  	// bring base data members into scope (or use this->xx / this->yy below)
+	  	using Base::xx;
+	  	using Base::yy;
+	  	using Base::mm;
+
+	  	T dy;
+
+
+		interp_polynomial(const utils::Vector<T> &xv, const utils::Vector<T> &yv, uint64_t m)
+		: Base_interp<T>(xv, &yv[0], m), dy(T{0}){}
+
+		T rawinterp(int64_t jl, T x) override{
+
+			int64_t ns=0;
+			T y, den, dif, dift, ho, hp, w;
+			const T *xa = &xx[jl], *ya = &yy[jl];
+			utils::Vector<T> c(mm,0), d(mm,0);
+			dif = numerics::abs(x-xa[0]);
+
+			for (int64_t i = 0; i < mm; ++i){					// Here we ﬁnd the index ns of the closest table entry,
+				dift = numerics::abs(x-xa[i]);
+				if (dift < dif){
+					ns = i;
+					dif=dift;
+				}
+				c[i]=ya[i];									// and initialize the tableau of c’s and d’s.
+				d[i]=ya[i];
+			}
+			y = ya[ns];										// This is the initial approximation to y.
+			ns -= 1;
+
+			for (int64_t m = 1; m < mm; ++m){				// For each column of the tableau,
+				for (int64_t i = 0; i < mm-m; ++i){		// we loop over the current c’s and d’s and update them.
+					ho = xa[i]-x;
+					hp = xa[i+m]-x;
+					w = c[i+1]-d[i];
+					den = ho-hp;
+					if (den == T{0.0}){
+						throw std::invalid_argument("interp_polynomial error"); // This error can occur only if two input xa’s are (to within roundoff identical.
+					}
+					den = w/den;							// Here the c’s and d’s are updated.
+					d[i] = hp*den;
+					c[i] = ho*den;
+				}
+				bool take_left = 2 * (ns + 1) < (mm - m);
+
+				if (take_left) {
+				    dy = c[ns + 1];
+				    y += dy;
+				} else {
+				    dy = d[ns];   
+				    y += dy;
+				    ns -= 1;
+				}
+			}
+			return y;
+		}
+
+	};
+
+} // namespace numerics
+
--- a/include/numerics/interpolation1d/interpolation1d_rational.h
+++ b/include/numerics/interpolation1d/interpolation1d_rational.h
@@ -0,0 +1,79 @@
+#pragma once
+
+#include "./numerics/interpolation1d/interpolation1d_base.h"
+
+#include "./utils/vector.h"
+#include "./numerics/abs.h"
+
+namespace numerics{
+	
+	template <typename T>
+	struct interp_rational : Base_interp<T> {
+		using Base   = Base_interp<T>;
+
+	  	// bring base data members into scope (or use this->xx / this->yy below)
+	  	using Base::xx;
+	  	using Base::yy;
+	  	using Base::mm;
+
+	  	T dy;
+
+
+
+		interp_rational(const utils::Vector<T> &xv, const utils::Vector<T> &yv, uint64_t m)
+		: Base_interp<T>(xv, &yv[0], m), dy(T{0}){}
+
+		T rawinterp(int64_t jl, T x) override{
+
+			const T TINY = T{1.0e-99};
+			int64_t ns=0;
+			T y, w, t, hh, h, dd;
+			const T *xa = &xx[jl], *ya = &yy[jl];
+			utils::Vector<T> c(mm, T{0}), d(mm, T{0});
+
+			hh = numerics::abs(x - xa[0]);
+
+			for (int64_t i = 0; i < mm; ++i){
+				h = numerics::abs(x-xa[i]);
+				if (h == T{0}){
+					dy = T{0};
+					return ya[i];
+				}else if (h < hh){
+					ns = i;
+					hh = h;
+				}
+				c[i] = ya[i];
+				d[i] = ya[i] + TINY;					// The TINY part is needed to prevent a rare zero-over-zero condition.
+			}
+			y = ya[ns];
+			ns -= 1;
+			for (int64_t m = 1; m < mm; ++m){
+				for (int64_t i = 0; i < mm-m; ++i){
+					w = c[i+1] - d[i];
+					h = xa[i+m] - x;					// h will never be zero, since this was tested in the initializing loop.
+					t = (xa[i] - x)*d[i]/h;
+					dd = t - c[i+1];
+					if (dd == T{0}){					// This error condition indicates that the interpolating function has a pole at the requested value of x.
+						throw std::invalid_argument("Error in routine interp_rational"); // 
+					}
+					dd = w/dd;
+					d[i] = c[i+1]*dd;
+					c[i] = t*dd;
+				}
+				const bool take_left = (2 * (ns + 1) < (mm - m));
+
+				if (take_left) {
+					dy = c[ns + 1];
+				} else {
+					dy = d[ns];
+					ns -= 1;
+				}
+				y += dy;
+			}
+			return y;
+		}
+
+	};
+
+} // namespace numerics
+
--- a/include/numerics/inverse.h
+++ b/include/numerics/inverse.h
@@ -0,0 +1,45 @@
+#ifndef _inverse_n_
+#define _inverse_n_
+
+
+#include "./utils/vector.h"
+#include "./utils/matrix.h"
+
+#include "./numerics/inverse/inverse_gauss_jordan.h"
+#include "./numerics/inverse/inverse_lu.h"
+
+#include <omp.h>
+
+
+namespace numerics{
+	
+	template <typename T>
+   	void inplace_inverse(utils::Matrix<T>& A, std::string method = "Gauss-Jordan"){
+
+		if (A.rows() != A.cols()) {
+		    throw std::runtime_error("inplace_inverse: non-square matrix");
+		}
+
+   		if (method == "Gauss-Jordan"){
+   			inverse_gj(A);
+		}
+		else if(method == "LU"){
+			inplace_inverse_lu(A);
+		}
+		else{
+			throw std::runtime_error("numerics::inplace_inverse(" + method + ") - Not implemented yet \r \nImplemented: 'Gauss-Jordan', 'LU'");
+		}
+	}
+
+
+
+	template <typename T>
+	utils::Matrix<T> inverse(utils::Matrix<T>& A, std::string method = "Gauss-Jordan"){
+		utils::Matrix<T> B = A;
+		inplace_inverse(B, method);
+		return B;
+	}
+
+} // namespace numerics
+
+#endif // _inverse_n_
--- a/include/numerics/inverse/inverse_gauss_jordan.h
+++ b/include/numerics/inverse/inverse_gauss_jordan.h
@@ -0,0 +1,100 @@
+#ifndef _inverse_gj_n_
+#define _inverse_gj_n_
+
+
+#include "./utils/vector.h"
+#include "./utils/matrix.h"
+
+#include "./numerics/initializers/eye.h"
+
+#include <omp.h>
+
+namespace numerics{
+	
+	template <typename T>
+   	void inverse_gj(utils::Matrix<T>& A){
+		//utils::Matrix<T> B(A.rows(),A.cols(), T{0});
+		utils::Matrix<T> B;
+		B = eye_omp_auto<T>(A.rows());
+
+
+		uint64_t icol{0}, irow{0}, rows{A.rows()}, cols{A.cols()};
+		double big, dum, pivinv, temp;
+		utils::Vi indxc(rows,0), indxr(rows,0), ipiv(rows,0);
+
+		//for (uint64_t j = 0; j < N; ++j){ ipiv[j] = 0;}
+
+		for (uint64_t i = 0; i < rows; i++){
+			big = 0.0;
+			for (uint64_t j = 0; j < rows; j++){
+				if (ipiv[j] != 1){
+					for (uint64_t k = 0; k < rows; k++){
+						if (ipiv[k] == 0){
+							if (abs(A(j,k)) >= big){
+								big = abs(A(j,k));
+								irow = j;
+								icol = k;
+							}
+						}
+					}
+				}
+			}
+			if (big <= T{1e-14}){
+				throw std::runtime_error("utill:inplace_inverse('Gauss-Jordan' - Singular Matrix");
+			}
+			ipiv[icol]++;
+			if (irow != icol){
+				for (uint64_t l = 0; l < rows; l++){ // SWAP 
+					temp = A(irow,l);
+					A(irow,l) = A(icol,l);
+					A(icol,l) = temp;
+				}
+				for (uint64_t l = 0; l < cols; l++){ // SWAP temp matrix
+					temp = B(irow,l);
+					B(irow,l) = B(icol,l);
+					B(icol,l) = temp;
+				}
+			}
+			indxr[i] = irow;
+			indxc[i] = icol;
+			if (A(icol,icol) == 0.0){
+				throw std::runtime_error("utill:inplace_inverse('Gauss-Jordan' - Singular Matrix");
+			}
+			pivinv= 1.0/A(icol,icol);
+			A(icol,icol)=1.0;
+		
+			for (uint64_t l = 0; l < rows; l++){
+				A(icol,l) *= pivinv;
+			}
+			for (uint64_t l = 0; l < cols; l++){
+				B(icol,l) *= pivinv;
+			}
+			for (uint64_t ll = 0; ll < rows; ll++){
+				if (ll != icol){
+					dum = A(ll,icol);
+					A(ll,icol) = 0;
+					for (uint64_t l = 0; l < rows; l++){
+						A(ll,l) -= A(icol,l)*dum;
+					}
+					for (uint64_t l = 0; l < rows; l++){
+						B(ll,l) -= B(icol,l)*dum;
+					}
+				}
+			}
+
+		}
+		//m = temp_m;
+		for (int64_t l = rows-1; l >= 0; l--){
+			if (indxr[l] != indxc[l]){
+				for (uint64_t k = 0; k < rows; k++){
+					temp = A(k,indxr[l]);
+				A(k,indxr[l]) = A(k,indxc[l]);
+				A(k,indxc[l]) = temp;
+				}
+			}
+		}
+	}
+
+} // namespace numerics
+
+#endif // _inverse_gj_n_
--- a/include/numerics/inverse/inverse_lu.h
+++ b/include/numerics/inverse/inverse_lu.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "./decomp/lu.h"
+
+
+namespace numerics{
+	
+	template <typename T>
+   	void inplace_inverse_lu(utils::Matrix<T>& A){
+   		if (A.rows() != A.cols()){
+   			throw std::runtime_error("numerics inverse_lu: non-square matrix");
+   		}
+
+   		decomp::LUdcmp<T> lu(A);
+   		lu.inplace_inverse(A);
+   	}
+
+}
--- a/include/numerics/matadd.h
+++ b/include/numerics/matadd.h
@@ -0,0 +1,226 @@
+#ifndef _matadd_n_
+#define _matadd_n_
+
+#include "./utils/vector.h"
+#include "./utils/matrix.h"
+#include "./core/omp_config.h"
+
+namespace numerics{
+	
+	template <typename T>
+	void inplace_matadd_colvec(utils::Matrix<T>& A, const utils::Vector<T>& x) {
+	    
+	    const uint64_t rows = A.rows();
+	    const uint64_t cols = A.cols();
+
+	    if (rows != x.size()) {
+	        throw std::runtime_error("inplace_matadd_colvec: dimension mismatch");
+	    }
+
+	    for (uint64_t i = 0; i < cols; ++i) {
+	        for (uint64_t j = 0; j < rows; ++j) {
+	            A(j, i) +=  x[j];
+	        }
+	    }
+	}
+
+	template <typename T>
+	void inplace_matadd_rowvec(utils::Matrix<T>& A, const utils::Vector<T>& x) {
+	    
+	    const uint64_t rows = A.rows();
+	    const uint64_t cols = A.cols();
+
+	    if (cols != x.size()) {
+	        throw std::runtime_error("inplace_matadd_rowvec: dimension mismatch");
+	    }
+
+	    for (uint64_t i = 0; i < cols; ++i) {
+	        for (uint64_t j = 0; j < rows; ++j) {
+	            A(j, i) +=  x[i];
+	        }
+	    }
+	}
+
+	template <typename T>
+	utils::Matrix<T> matadd_colvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
+	    
+	    //const uint64_t rows = A.rows();
+	    //const uint64_t cols = A.cols();
+
+	    utils::Matrix<T> B = A;
+
+	    inplace_matadd_colvec(B, x);
+
+	    return B;
+	}
+
+	template <typename T>
+	utils::Matrix<T> matadd_rowvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
+	    
+	    //const uint64_t rows = A.rows();
+	    //const uint64_t cols = A.cols();
+
+	    utils::Matrix<T> B = A;
+
+	    inplace_matadd_rowvec(B, x);
+
+	    return B;
+	}
+
+	template <typename T>
+	utils::Matrix<T> matadd(const utils::Matrix<T>& A, const utils::Vector<T>& x, std::string method = "auto"){
+
+		const uint64_t rows = A.rows();
+	    const uint64_t cols = A.cols();
+	    const uint64_t N = x.size();
+
+	    if (method=="auto"){
+	    	
+			if (rows==cols){
+				throw std::runtime_error("matadd: too many options for dimensions");
+			} else if (rows == N){
+				return matadd_rowvec(A, x);
+			} else if (cols == N){
+				return matadd_colvec(A, x);
+			}else{
+				throw std::runtime_error("matadd: undefined fault - auto");
+			}
+	    }else if(method=="row"){
+				return matadd_rowvec(A, x);
+		} else if (method=="col"){
+			return matadd_colvec(A, x);
+		}else{
+			throw std::runtime_error("matadd: undefined fault - defined method");
+		}
+    }
+
+
+
+
+	/*
+	// -------------- Collapse(2) OpenMP ----------------
+	template <typename T>
+	utils::Vector<T> matvec_omp(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
+	    if (A.cols() != x.size()) {
+	        throw std::runtime_error("matvec: dimension mismatch");
+	    }
+
+	    const uint64_t m = A.rows();
+	    const uint64_t n = A.cols();
+
+	    utils::Vector<T> y(m, T{0}); // <-- y has length m (rows)
+
+
+        const T* xptr = x.data();
+        const T* Aptr = A.data();	// row-major: A(i,j) == Aptr[i*n + j]
+
+        // Each row i is an independent dot product: y[i] = dot(A[i,*], x)
+	    #pragma omp parallel for schedule(static)
+	    for (uint64_t i = 0; i < m; ++i) {
+            const T* row = Aptr + i * n;     // contiguous row i
+            T acc = T{0};
+            #pragma omp simd reduction(+:acc)
+	        for (uint64_t j = 0; j < n; ++j) {
+	            acc += row[j] * xptr[j];
+	        }
+	        y[i] = acc;
+		}
+
+	    return y;
+	}
+
+	// -------------- Auto OpenMP ----------------
+	template <typename T>
+	utils::Vector<T> matvec_auto(const utils::Matrix<T>& A,
+	                             const utils::Vector<T>& x) {
+
+	    
+	    uint64_t work = A.rows() * A.cols();
+
+	    bool can_parallel = omp_config::omp_parallel_allowed();
+		#ifdef _OPENMP
+		    int threads = omp_get_max_threads();
+		#else
+		    int threads = 1;
+		#endif
+
+	    if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
+	        return matvec_omp(A,x);
+	    }
+	    else{
+	    	// Safe fallback
+	    	return matvec(A,x);
+	    }
+	    
+	}
+
+// =================================================
+//   y = x * A    (Vector–Matrix product)
+// =================================================
+	template <typename T>
+	utils::Vector<T> vecmat(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
+	    if (x.size() != A.rows()) {
+	        throw std::runtime_error("vecmat: dimension mismatch");
+	    }
+	    const uint64_t m = A.rows();
+	    const uint64_t n = A.cols();
+
+	    utils::Vector<T> y(n, T{0});
+
+	    for (uint64_t j = 0; j < n; ++j) {
+	        for (uint64_t i = 0; i < m; ++i) {
+	            y[j] += x[i] * A(i, j);
+	        }
+	    }
+	    return y;
+	}
+
+	// -------------- Collapse(2) OpenMP ----------------
+	template <typename T>
+	utils::Vector<T> vecmat_omp(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
+	    if (x.size() != A.rows()) {
+	        throw std::runtime_error("vecmat: dimension mismatch");
+	    }
+	    const uint64_t m = A.rows();
+	    const uint64_t n = A.cols();
+
+	    utils::Vector<T> y(n, T{0});
+	    #pragma omp parallel for schedule(static)
+	    for (uint64_t j = 0; j < n; ++j) {
+	        T acc = T{0};
+	        for (uint64_t i = 0; i < m; ++i) {
+	            acc += x[i] * A(i, j);
+	        }
+	        y[j] = acc;
+	    }
+	    return y;
+	}
+
+	// -------------- Auto OpenMP ----------------
+	template <typename T>
+	utils::Vector<T> vecmat_auto(const utils::Vector<T>& x,
+								 const utils::Matrix<T>& A) {
+	    
+	    uint64_t work = A.rows() * A.cols();
+
+	    bool can_parallel = omp_config::omp_parallel_allowed();
+		#ifdef _OPENMP
+		    int threads = omp_get_max_threads();
+		#else
+		    int threads = 1;
+		#endif
+
+	    if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
+	        return vecmat_omp(x,A);
+	    }
+	    else{
+	    	// Safe fallback
+	    	return vecmat(x,A);
+	    }
+	    
+	}
+*/
+
+} // namespace numerics
+
+#endif // _matadd_n_
--- a/include/numerics/matdiv.h
+++ b/include/numerics/matdiv.h
@@ -0,0 +1,38 @@
+#ifndef _matdiv_n_
+#define _matdiv_n_
+
+
+#include "./utils/matrix.h"
+#include "./core/omp_config.h"
+
+
+namespace numerics{
+
+// ---------------- Serial baseline ----------------
+	template <typename T>
+	utils::Matrix<T> matdiv(const utils::Matrix<T>& A, const utils::Vector<T>& b, std::string method){
+        
+        utils::Matrix<T> C = A;
+
+        if (method == "row"){
+            for (uint64_t i = 0; i < A.rows(); ++i){
+                for (uint64_t j = 0; j < A.cols(); ++j){
+                    C(i,j) /= b[j];
+                }
+            }
+        }else if (method == "col"){
+            for (uint64_t i = 0; i < A.rows(); ++i){
+                for (uint64_t j = 0; j < A.cols(); ++j){
+                    C(i,j) /= b[i];
+                }
+            }
+        }else{
+            throw std::runtime_error("matdiv: choose div by: 'row' or 'col'");
+        }
+        return C;
+	}
+
+
+} // namespace numerics
+
+#endif // _matdiv_n_
--- a/include/numerics/matequal.h
+++ b/include/numerics/matequal.h
@@ -0,0 +1,85 @@
+#pragma once
+
+#include "./core/omp_config.h"
+
+#include "./utils/matrix.h"
+#include "./numerics/abs.h"
+
+
+namespace numerics{
+	
+	// -------------- Serial ----------------
+	template <typename T>
+	bool matequal(const utils::Matrix<T>& A, const utils::Matrix<T>& B, double tol = 1e-9) {
+
+	    if (A.rows() != B.rows() || A.cols() != B.cols()) {
+	        return false;
+	    }
+
+	    bool decimal = std::is_floating_point<T>::value;
+	    const uint64_t rows=A.rows(), cols=A.cols();
+
+	    for (uint64_t i = 0; i < rows; ++i)
+	        for (uint64_t j = 0; j < cols; ++j)
+	            if (decimal) {
+	                if (numerics::abs(A(i,j) - B(i,j)) > static_cast<T>(tol)){
+	                	return false;
+	                } 
+	            } else {
+	                if (A(i,j) != B(i,j)){
+	                	return false;
+	                } 
+	            }
+	    return true; 
+	}
+
+	// -------------- Parallel ----------------
+	template <typename T>
+	bool matequal_omp(const utils::Matrix<T>& A, const utils::Matrix<T>& B, double tol = 1e-9) {
+
+	    if (A.rows() != B.rows() || A.cols() != B.cols()) {
+	        return false;
+	    }
+
+	    bool decimal = std::is_floating_point<T>::value;
+	    bool eq = true;
+	    const uint64_t rows=A.rows(), cols=A.cols();
+
+	    #pragma omp parallel for collapse(2) schedule(static) reduction(&&:eq)
+	    for (uint64_t i = 0; i < rows; ++i)
+	        for (uint64_t j = 0; j < cols; ++j)
+	            if (decimal) {
+	            	eq = eq && (numerics::abs(A(i,j) - B(i,j)) <= static_cast<T>(tol));
+	            } else {
+	            	eq = eq && (A(i,j) == B(i,j));
+	            } 	
+	    return eq; 
+	}
+
+	// -------------- Auto OpenMP ----------------
+	template <typename T>
+	bool matequal_auto(const utils::Matrix<T>& A, const utils::Matrix<T>& B, double tol = 1e-9) {
+	    
+	    if (A.rows() != B.rows() || A.cols() != B.cols()) {
+	        return false;
+	    }
+	   
+	    uint64_t work = A.rows() * A.cols();
+
+		#ifdef _OPENMP
+			bool can_parallel = omp_config::omp_parallel_allowed();
+		    uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
+		#else
+		    bool can_parallel = false;
+		    uint64_t threads = 1;
+		#endif
+
+	    if (can_parallel || work > threads * 4ull) {
+	        return matequal_omp(A,B,tol);
+	    }
+	    else{
+	    	// Safe fallback
+	    	return matequal(A,B,tol);
+	    }    
+	}
+} // namespace numerics
--- a/include/numerics/matmul.h
+++ b/include/numerics/matmul.h
@@ -0,0 +1,124 @@
+#ifndef _matmul_n_
+#define _matmul_n_
+
+
+#include "./utils/matrix.h"
+#include "./core/omp_config.h"
+
+
+namespace numerics{
+
+// ---------------- Serial baseline ----------------
+	template <typename T>
+	utils::Matrix<T> matmul(const utils::Matrix<T>& A, const utils::Matrix<T>& B){
+        
+		if(A.cols() != B.rows()){
+			throw std::runtime_error("matmul: dimension mismatch");
+		}
+        
+		const uint64_t m = A.rows();
+		const uint64_t n = A.cols(); // also B.rows()
+		const uint64_t p = B.cols();
+		T tmp;
+
+		utils::Matrix<T> C(m, p, T{0});
+
+		for (uint64_t i = 0; i < m; ++i){
+			for (uint64_t j = 0; j < n; ++j){
+				tmp = A(i,j);
+				for (uint64_t k = 0; k < p; ++k){
+					C(i,k) += tmp * B(j,k);
+				}
+			}
+		}
+		return C;
+	}
+
+// ---------------- Rows-only OpenMP ----------------
+template <typename T>
+utils::Matrix<T> matmul_rows_omp(const utils::Matrix<T>& A,
+                                 const utils::Matrix<T>& B) {
+    if (A.cols() != B.rows()) throw std::runtime_error("matmul_rows_omp: dim mismatch");
+    const uint64_t m=A.rows(), n=A.cols(), p=B.cols();
+
+    utils::Matrix<T> C(m, p, T{0});
+
+    #pragma omp parallel for schedule(static)
+    for (uint64_t i=0;i<m;++i) {
+        for (uint64_t j=0;j<p;++j) {
+            T acc=T{0};
+            for (uint64_t k=0;k<n;++k) {
+            	acc += A(i,k)*B(k,j);
+            }
+            C(i,j)=acc;
+        }
+    }
+    return C;
+}
+
+// -------------- Collapse(2) OpenMP ----------------
+template <typename T>
+utils::Matrix<T> matmul_collapse_omp(const utils::Matrix<T>& A,
+                                     const utils::Matrix<T>& B) {
+    if (A.cols() != B.rows()) throw std::runtime_error("matmul_collapse_omp: dim mismatch");
+    const uint64_t m=A.rows(), n=A.cols(), p=B.cols();
+    utils::Matrix<T> C(m, p, T{0});
+
+    #pragma omp parallel for collapse(2) schedule(static)
+    for (uint64_t i=0;i<m;++i) {
+        for (uint64_t j=0;j<p;++j) {
+            T acc=T{0};
+            for (uint64_t k=0;k<n;++k){
+            	acc += A(i,k)*B(k,j);
+            } 
+            C(i,j)=acc;
+        }
+    }
+    return C;
+}
+
+
+// -------------------- Auto selector ---------------------
+template <typename T>
+utils::Matrix<T> matmul_auto(const utils::Matrix<T>& A,
+                             const utils::Matrix<T>& B) {
+    const uint64_t m=A.rows(), p=B.cols();
+    const uint64_t work = m * p;
+
+    
+
+    #ifdef _OPENMP
+    bool can_parallel = omp_config::omp_parallel_allowed();
+      uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
+    #else
+      bool can_parallel = false;
+      uint64_t threads = 1;
+    #endif
+
+
+    // Tiny problems: serial is cheapest.
+    if (!can_parallel || work < threads*4ull) {
+        
+        return matmul(A,B);
+    }
+    // Plenty of (i,j) work → collapse(2) is a great default.
+    else if (work >= 8ull * threads) {
+        return matmul_collapse_omp(A,B);
+    }
+    // Many rows and very few columns → rows-only cheaper overhead.
+    else if (m >= static_cast<uint64_t>(threads) && p <= 4) {
+        return matmul_rows_omp(A,B);
+    }
+    else{
+	    // Safe fallback
+	    return matmul(A,B);
+    }
+}
+
+
+
+
+
+} // namespace numerics
+
+#endif // _matmul_n_
--- a/include/numerics/matsubtract.h
+++ b/include/numerics/matsubtract.h
@@ -0,0 +1,102 @@
+#ifndef _matsubtract_n_
+#define _matsubtract_n_
+
+#include "./utils/vector.h"
+#include "./utils/matrix.h"
+#include "./core/omp_config.h"
+
+namespace numerics{
+	
+	template <typename T>
+	void inplace_matsubtract_colvec(utils::Matrix<T>& A, const utils::Vector<T>& x) {
+	    
+	    const uint64_t rows = A.rows();
+	    const uint64_t cols = A.cols();
+
+	    if (rows != x.size()) {
+	        throw std::runtime_error("inplace_matsubtract_colvec: dimension mismatch");
+	    }
+
+	    for (uint64_t i = 0; i < cols; ++i) {
+	        for (uint64_t j = 0; j < rows; ++j) {
+	            A(j, i) -=  x[j];
+	        }
+	    }
+	}
+
+	template <typename T>
+	void inplace_matsubtract_rowvec(utils::Matrix<T>& A, const utils::Vector<T>& x) {
+	    
+	    const uint64_t rows = A.rows();
+	    const uint64_t cols = A.cols();
+
+	    if (cols != x.size()) {
+	        throw std::runtime_error("inplace_matsubtract_rowvec: dimension mismatch");
+	    }
+
+	    for (uint64_t i = 0; i < cols; ++i) {
+	        for (uint64_t j = 0; j < rows; ++j) {
+	            A(j, i) -=  x[i];
+	        }
+	    }
+	}
+
+	template <typename T>
+	utils::Matrix<T> matsubtract_colvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
+	    
+	    //const uint64_t rows = A.rows();
+	    //const uint64_t cols = A.cols();
+
+	    utils::Matrix<T> B = A;
+
+	    inplace_matsubtract_colvec(B, x);
+
+	    return B;
+	}
+
+	template <typename T>
+	utils::Matrix<T> matsubtract_rowvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
+	    
+	    //const uint64_t rows = A.rows();
+	    //const uint64_t cols = A.cols();
+
+	    utils::Matrix<T> B = A;
+
+	    inplace_matsubtract_rowvec(B, x);
+
+	    return B;
+	}
+
+	template <typename T>
+	utils::Matrix<T> matsubtract(const utils::Matrix<T>& A, const utils::Vector<T>& x, std::string method = "auto"){
+
+		const uint64_t rows = A.rows();
+	    const uint64_t cols = A.cols();
+	    const uint64_t N = x.size();
+
+	    if (method=="auto"){
+	    	
+			if (rows==cols){
+				throw std::runtime_error("matsubtract: too many options for dimensions");
+			} else if (rows == N){
+				return matsubtract_rowvec(A, x);
+			} else if (cols == N){
+				return matsubtract_colvec(A, x);
+			}else{
+				throw std::runtime_error("matsubtract: undefined fault - auto");
+			}
+	    }else if(method=="row"){
+				return matsubtract_rowvec(A, x);
+		} else if (method=="col"){
+			return matsubtract_colvec(A, x);
+		}else{
+			throw std::runtime_error("matsubtract: undefined fault - defined method");
+		}
+    }
+
+
+
+
+} // namespace numerics
+
+#endif // _matsubtract_n_
--- a/include/numerics/matsum.h
+++ b/include/numerics/matsum.h
@@ -0,0 +1,39 @@
+#ifndef _matsum_n_
+#define _matsum_n_
+
+#include "./utils/vector.h"
+#include "./utils/matrix.h"
+#include "./core/omp_config.h"
+
+namespace numerics{
+	
+	template <typename T>
+	utils::Vector<T> matsum(utils::Matrix<T>& A, std::string method) {
+
+		utils::Vector<T> b;
+
+		if (method == "row"){
+			b.resize(A.cols(), T{0});
+			for (uint64_t i = 0; i < A.cols(); ++i){
+				for (uint64_t j = 0; j < A.rows(); ++j){
+					b[i] += A(j, i); 
+				}
+			}
+		}else if (method == "col"){
+			b.resize(A.rows(), T{0});
+
+			for (uint64_t i = 0; i < A.cols(); ++i){
+				for (uint64_t j = 0; j < A.rows(); ++j){
+					b[j] += A(j, i); 
+				}
+			}
+		}else{
+			throw std::runtime_error("matsum: choose sum by: 'row' or 'col'");
+		}
+		return b;
+	}
+
+
+} // namespace numerics
+
+#endif // _matadd_n_
--- a/include/numerics/matvec.h
+++ b/include/numerics/matvec.h
@@ -0,0 +1,156 @@
+#ifndef _matvec_n_
+#define _matvec_n_
+
+
+#include "./utils/matrix.h"
+#include "./core/omp_config.h"
+
+namespace numerics{
+	
+// =================================================
+//   y = A * x    (Matrix–Vector product)
+// =================================================
+	template <typename T>
+	utils::Vector<T> matvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
+	    if (A.cols() != x.size()) {
+	        throw std::runtime_error("matvec: dimension mismatch");
+	    }
+
+	    const uint64_t m = A.rows();
+	    const uint64_t n = A.cols();
+
+	    utils::Vector<T> y(m, T{0});
+
+	    for (uint64_t i = 0; i < m; ++i) {
+	        for (uint64_t j = 0; j < n; ++j) {
+	            y[i] += A(i, j) * x[j];
+	        }
+	    }
+	    return y;
+	}
+	// -------------- Collapse(2) OpenMP ----------------
+	template <typename T>
+	utils::Vector<T> matvec_omp(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
+	    if (A.cols() != x.size()) {
+	        throw std::runtime_error("matvec: dimension mismatch");
+	    }
+
+	    const uint64_t m = A.rows();
+	    const uint64_t n = A.cols();
+
+	    utils::Vector<T> y(m, T{0}); // <-- y has length m (rows)
+
+
+        const T* xptr = x.data();
+        const T* Aptr = A.data();	// row-major: A(i,j) == Aptr[i*n + j]
+
+        // Each row i is an independent dot product: y[i] = dot(A[i,*], x)
+	    #pragma omp parallel for schedule(static)
+	    for (uint64_t i = 0; i < m; ++i) {
+            const T* row = Aptr + i * n;     // contiguous row i
+            T acc = T{0};
+            #pragma omp simd reduction(+:acc)
+	        for (uint64_t j = 0; j < n; ++j) {
+	            acc += row[j] * xptr[j];
+	        }
+	        y[i] = acc;
+		}
+
+	    return y;
+	}
+
+	// -------------- Auto OpenMP ----------------
+	template <typename T>
+	utils::Vector<T> matvec_auto(const utils::Matrix<T>& A,
+	                             const utils::Vector<T>& x) {
+
+	    
+	    uint64_t work = A.rows() * A.cols();
+
+	    bool can_parallel = omp_config::omp_parallel_allowed();
+		#ifdef _OPENMP
+		    int threads = omp_get_max_threads();
+		#else
+		    int threads = 1;
+		#endif
+
+	    if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
+	        return matvec_omp(A,x);
+	    }
+	    else{
+	    	// Safe fallback
+	    	return matvec(A,x);
+	    }
+	    
+	}
+
+// =================================================
+//   y = x * A    (Vector–Matrix product)
+// =================================================
+	template <typename T>
+	utils::Vector<T> vecmat(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
+	    if (x.size() != A.rows()) {
+	        throw std::runtime_error("vecmat: dimension mismatch");
+	    }
+	    const uint64_t m = A.rows();
+	    const uint64_t n = A.cols();
+
+	    utils::Vector<T> y(n, T{0});
+
+	    for (uint64_t j = 0; j < n; ++j) {
+	        for (uint64_t i = 0; i < m; ++i) {
+	            y[j] += x[i] * A(i, j);
+	        }
+	    }
+	    return y;
+	}
+
+	// -------------- Collapse(2) OpenMP ----------------
+	template <typename T>
+	utils::Vector<T> vecmat_omp(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
+	    if (x.size() != A.rows()) {
+	        throw std::runtime_error("vecmat: dimension mismatch");
+	    }
+	    const uint64_t m = A.rows();
+	    const uint64_t n = A.cols();
+
+	    utils::Vector<T> y(n, T{0});
+	    #pragma omp parallel for schedule(static)
+	    for (uint64_t j = 0; j < n; ++j) {
+	        T acc = T{0};
+	        for (uint64_t i = 0; i < m; ++i) {
+	            acc += x[i] * A(i, j);
+	        }
+	        y[j] = acc;
+	    }
+	    return y;
+	}
+
+	// -------------- Auto OpenMP ----------------
+	template <typename T>
+	utils::Vector<T> vecmat_auto(const utils::Vector<T>& x,
+								 const utils::Matrix<T>& A) {
+	    
+	    uint64_t work = A.rows() * A.cols();
+
+	    bool can_parallel = omp_config::omp_parallel_allowed();
+		#ifdef _OPENMP
+		    int threads = omp_get_max_threads();
+		#else
+		    int threads = 1;
+		#endif
+
+	    if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
+	        return vecmat_omp(x,A);
+	    }
+	    else{
+	    	// Safe fallback
+	    	return vecmat(x,A);
+	    }
+	    
+	}
+
+
+} // namespace numerics
+
+#endif // _matvec_n_
--- a/include/numerics/max.h
+++ b/include/numerics/max.h
@@ -0,0 +1,76 @@
+#pragma once
+
+
+#include "./utils/vector.h"
+#include "./utils/matrix.h"
+
+
+namespace numerics{
+	
+	template <typename T>
+	T max(const T a, const T b){
+
+		if(a < b){
+			return b;
+		}else{
+			return a;
+		}
+	}
+
+	template <typename T>
+	void inplace_max(utils::Matrix<T>& A, const T b){
+
+		const uint64_t rows = A.rows();
+		const uint64_t cols = A.cols();
+
+		for (uint64_t i = 0; i < rows; ++i){
+			for (uint64_t j = 0; j < cols; ++j){
+
+				if (b > A(i,j)){
+					//std::cout << A(i,j) << std::endl;
+					A(i,j) = b;
+					//std::cout << A(i,j) << std::endl;
+				}
+			}
+		}
+	}
+
+	template <typename T>
+	utils::Matrix<T> max(const utils::Matrix<T>& A, const T b){
+
+		utils::Matrix<T> B = A;
+		inplace_max(B, b);
+		return B;
+	}
+
+	template <typename T>
+	utils::Vector<T> max(const utils::Matrix<T>& A, std::string method){
+
+		utils::Vector<T> b;
+
+		if (method == "cols"){
+			b.resize(A.cols(), T{0});
+			for (uint64_t i = 0; i < A.cols(); ++i){
+				for (uint64_t j = 0; j < A.rows(); ++j){
+					b[i] = max(A(j, i), b[i]);
+				}
+			}
+		}else if (method == "rows"){
+			b.resize(A.rows(), T{0});
+			for (uint64_t i = 0; i < A.rows(); ++i){
+				for (uint64_t j = 0; j < A.cols(); ++j){
+					//std::cout << i << ":" << j << std::endl;
+					b[i] = max(A(i, j), b[i]);
+				}
+			}
+		}else{
+			throw std::runtime_error("max: choose 'rows or 'cols'");
+		}
+		return b;
+
+	}
+
+
+
+} // namespace numerics
+
--- a/include/numerics/mean.h
+++ b/include/numerics/mean.h
@@ -0,0 +1,31 @@
+#ifndef _mean_n_
+#define _mean_n_
+
+#include "./utils/vector.h"
+#include "./utils/matrix.h"
+#include "./core/omp_config.h"
+
+namespace numerics{
+	
+	template <typename T>
+	T mean(utils::Vector<T>& A) {
+
+		T mean(T{0});
+	    
+	    const uint64_t rows = A.rows();
+	    const uint64_t cols = A.cols();
+
+
+	    for (uint64_t i = 0; i < cols; ++i) {
+	        for (uint64_t j = 0; j < rows; ++j) {
+	            mean += A(j, i);
+	        }
+	    }
+	    mean /= (static_cast<T>(rows)* static_cast<T>(cols));
+	    return mean;
+	}
+
+
+} // namespace numerics
+
+#endif // _mean_n_
--- a/include/numerics/min.h
+++ b/include/numerics/min.h
@@ -0,0 +1,21 @@
+#pragma once
+
+
+#include "./utils/vector.h"
+#include "./utils/matrix.h"
+
+
+namespace numerics{
+	
+	template <typename T>
+	T min(const T a, const T b){
+
+		if(a < b){
+			return a;
+		}else{
+			return b;
+		}
+	}
+
+} // namespace numerics
+
--- a/include/numerics/numerics.h
+++ b/include/numerics/numerics.h
@@ -0,0 +1,21 @@
+// "./numerics/numerics.h"
+#pragma once
+
+#include "./numerics/initializers/eye.h"
+#include "./numerics/matequal.h"
+#include "./numerics/transpose.h"
+#include "./numerics/inverse.h"
+#include "./numerics/matmul.h"
+#include "./numerics/matdiv.h"
+#include "./numerics/matvec.h"
+#include "./numerics/matadd.h"
+#include "./numerics/matsubtract.h"
+#include "./numerics/matsum.h"
+#include "./numerics/min.h"
+#include "./numerics/max.h"
+#include "./numerics/abs.h"
+#include "./numerics/mean.h"
+#include "./numerics/exponential.h"
+#include "./numerics/interpolation1d.h"                       		// base
+
+
--- a/include/numerics/transpose.h
+++ b/include/numerics/transpose.h
@@ -0,0 +1,156 @@
+#ifndef _transpose_n_
+#define _transpose_n_
+
+
+#include "./utils/matrix.h"
+#include "./core/omp_config.h"
+
+
+namespace numerics{
+	
+	template <typename T>
+	void inplace_transpose_square(utils::Matrix<T>& A){
+
+		const uint64_t rows = A.rows();
+		const uint64_t cols = A.cols();
+
+		if (rows != cols){
+			throw std::runtime_error("inplace_transpose only valid for square matrices");
+		}
+
+		for (uint64_t i = 0; i < rows; ++i){
+			for (uint64_t j = i + 1; j < cols; ++j){
+				T tmp = A(j,i);
+				A(j,i) = A(i,j);
+				A(i,j) = tmp;
+				//std::swap(A(j,i), A(i,j));
+			}
+		}
+	}
+
+	template <typename T>
+	void inplace_transpose_square_omp(utils::Matrix<T>& A){
+
+		const uint64_t rows = A.rows();
+		const uint64_t cols = A.cols();
+
+		if (rows != cols){
+			throw std::runtime_error("inplace_transpose only valid for square matrices");
+		}
+
+		#pragma omp parallel for schedule(static)
+		for (uint64_t i = 0; i < rows; ++i){
+			for (uint64_t j = i + 1; j < cols; ++j){
+				T tmp = A(j,i);
+				A(j,i) = A(i,j);
+				A(i,j) = tmp;
+				//std::swap(A(j,i), A(i,j));
+			}
+		}
+	}
+
+
+
+	template <typename T>
+	utils::Matrix<T> transpose(const utils::Matrix<T>& A){
+
+		const uint64_t rows = A.rows();
+		const uint64_t cols = A.cols();
+
+		utils::Matrix<T> B(cols, rows, T{0});
+
+		for (uint64_t i = 0; i < rows; ++i){
+			for (uint64_t j = 0; j < cols; ++j){
+				B(j,i) = A(i,j);
+			}
+		}
+		return B;
+	}
+
+	template <typename T>
+	utils::Matrix<T> transpose_omp(const utils::Matrix<T>& A){
+
+		const uint64_t rows = A.rows();
+		const uint64_t cols = A.cols();
+
+		utils::Matrix<T> B(cols, rows, T{0});
+
+		#pragma omp parallel for collapse(2) schedule(static)
+		for (uint64_t i = 0; i < rows; ++i){
+			for (uint64_t j = 0; j < cols; ++j){
+				B(j,i) = A(i,j);
+			}
+		}
+		return B;
+	}
+
+
+    // -------- Auto selectors --------
+    template <typename T>
+    void inplace_transpose_square_auto(utils::Matrix<T>& A) {
+        const uint64_t rows = A.rows(), cols = A.cols();
+
+        if (rows != cols) {
+            throw std::runtime_error("inplace_transpose_auto: only valid for square matrices");
+        }
+        const std::uint64_t work = static_cast<std::uint64_t>((rows * (rows - 1)) / 2); // number of swaps
+
+	    #ifdef _OPENMP
+	        bool can_parallel = omp_config::omp_parallel_allowed();
+	        uint64_t threads = static_cast<std::uint64_t>(omp_get_max_threads());
+	    #else
+	        bool can_parallel = false;
+	        uint64_t threads = 1;
+		#endif
+
+
+        if (can_parallel && work > threads * 4ull) {
+            inplace_transpose_square_omp(A);
+        }else {
+            inplace_transpose_square(A);
+        }
+    }
+
+    template <typename T>
+    utils::Matrix<T> transpose_auto(const utils::Matrix<T>& A) {
+
+    	const uint64_t rows = A.rows();
+    	const uint64_t cols = A.cols();
+
+        uint64_t work = A.rows() * A.cols();
+
+
+
+        if (rows==cols){
+        	utils::Matrix<T> B = A;
+        	inplace_transpose_square_auto(B);
+        	return B;
+        }
+
+	    #ifdef _OPENMP
+	        bool can_parallel = omp_config::omp_parallel_allowed();
+	        uint64_t threads = static_cast<std::uint64_t>(omp_get_max_threads());
+	    #else
+	        bool can_parallel = false;
+	        uint64_t threads = 1;
+		#endif
+
+        if (!can_parallel || work > threads * 4ull) {
+            return transpose_omp(A);
+        } else {
+            return transpose(A);
+        }
+    }
+
+
+
+
+
+
+
+
+
+
+} // namespace numerics
+
+#endif // _transpose_n_