LU and LU inverse is done

2025-09-14 18:35:37 +02:00
parent 88087ea6a6
commit 92437e5ef1
23 changed files with 503 additions and 978 deletions
@@ -7,55 +7,172 @@ namespace decomp{
 	// Stores PA = LU with partial pivoting (row permutations).
 	template <typename T>
-	struct LU{
+	struct LUdcmp{
-		utils::Matrix<T> LUmat; 	// combined L (unit diagonal implied) and U
+		uint64_t rows;				// Stores number of rows.
-		std::vector<uint64_t> piv;	// pivot indices (row permutations)
+		utils::Matrix<T> lu; 		// Stores the decomposition.
-		bool singular= false;		// set true if zero (or near-zero) pivots encountered
+		std::vector<uint64_t> indx;	// Stores the permutation.
 		T d;						// Used by det.
-		LU() = default;
+		// Default Constructor
 		LUdcmp() = default;
-    	explicit LU(const utils::Matrix<T>& A) {
+		// Constructor
-        	factor(A);
+    	LUdcmp(const utils::Matrix<T>& A){
        	decomp(A);
        }
-    	void factor(const utils::Matrix<T>&A){
+    	void decomp(const utils::Matrix<T>&A){
    		rows = A.rows();
    		const uint64_t rows = A.rows();
    		if (rows != A.cols()){
-    			throw std::runtime_error("LU: factor non-square");
+    			throw std::runtime_error("LUdcmp: decomp non-square");
    		}
-    		if (rows == 0){
+    		uint64_t imax{0};
-    			LUmat = A;
+    		lu = A;
-    			piv.clear();
+    		indx.resize(rows);
-    			singular = false;
+    		std::vector<T> vv(rows);	// vv stores the implicit scaling of each row.
-    			return;
+    		T big{T{0}}, tmp{T{0}};// TINY{T{1.0e-40}};
    		}
-    		T big, tmp;
+    		d = T{1}; 					// No row interchanges yet.
-    		LUmat = A;
+    		// Loop over rows to get the implicit scaling information.
-    		piv.resize(rows);		// piv stores the implicit scaling of each row.
+    		for (uint64_t i = 0; i < rows; ++i){ 
    		//double d = 1.0;			// No row interchanges yet.
    		for (uint64_t i = 0; i < rows; ++i){	// Loop over rows to get the implicit scaling information.
    			big = T{0};
    			for (uint64_t j = 0; j < rows; ++j){
-    				tmp=std::abs(LUmat(i,j));
+    				tmp = std::abs(lu(i,j));
    				if (tmp > big){
    					big = tmp;
    				}
    			}
    			if (big == T{0}){
-    				throw std::runtime_error("Singular matrix in LU.factor()");
+    				throw std::runtime_error("LUdcmp: Singular matrix");
    			}
    			// No nonzero largest element.
    			vv[i] = T{1}/big; 							// Save the scaling.
    		}
    		// This is the outermost kij loop. Initialize for the search for largest pivot element.
    		for (uint64_t k = 0; k < rows; ++k){
    			big = T{0};
    			imax = k;
    			for (uint64_t i = k; i < rows; ++i){
    				tmp = vv[i] * static_cast<T>(std::fabs(static_cast<double>(lu(i,k))));
    				if (tmp > big){ 						// Is the figure of merit for the pivot better than the best so far?
    					big = tmp;
    					imax = i;
    				}
    			}
    			if (k != imax){ 							// Do we need to interchange rows?
 					lu.swap_rows(imax, k);				// Yes, do so...
    				d = -d;									// ...and change the parity of d.
    				vv[imax] = vv[k];						// Also interchange the scale factor.
    			}
    			indx[k] = imax;
    			if (lu(k,k) == T{0.0}){						// if the pivot element is zero, the matrix is singular (at least to the precision of thealgorithm).
    				throw std::runtime_error("LUdcmp: Singular matrix");
    				//lu(k,k) = TINY;							// For some applications on singular matrices, it is desirable to substitute TINY for zero.
    			}
    			for (uint64_t i = k+1; i < rows; ++i){
    				tmp = lu(i,k) /= lu(k,k);				// Divide by the pivot element.
    				for (uint64_t j = k+1; j < rows; ++j){	// Innermost loop: reduce remaining submatrix.
    					lu(i,j) -= tmp*lu(k,j);
    				}
    			}
    		}
    	} // end void decomp(const utils::Matrix<T>&A)
    	// Solves the set of n linear equations A*x=b using the stored LU decomposition of A.
    	void inplace_solve(const utils::Vector<T>& b, utils::Vector<T>& x){
    		T sum{T{0}};
    		uint64_t ii{0}, ip{0};
    		if (b.size() != rows || x.size() != rows){
    			throw std::runtime_error("LUdcmp: inplace_solve bad sizes");
    		}
    		x = b;
    		for (uint64_t i = 0; i < rows; ++i){				// When ii is set to a positive value, it will become the index of the first nonvanishing element of b.
    			ip = indx[i];
    			sum = x[ip];
    			x[ip] = x[i];
    			if (ii >= 0){
    				for (uint64_t j = ii; j < i; ++j){
    					sum -= lu(i,j)*x[j];
    				}
    			}else if (sum != T{0}) {						// A nonzero element was encountered, so from now on we will have to do the sums in the loop above.
    				ii = i+1;
    			}
    			x[i] = sum;
    		}
    		for (int64_t i = static_cast<int64_t>(rows)-1; i >= 0; --i){				// Now we do the backsubstitution.
    			sum=x[i];
    			for (uint64_t j = static_cast<uint64_t>(i)+1; j < rows; ++j){			
    				sum -= lu(static_cast<uint64_t>(i),j)*x[j];
    			}
    			x[static_cast<uint64_t>(i)] = sum/lu(static_cast<uint64_t>(i),static_cast<uint64_t>(i));		// Store a component of the solution vector x.
    		}
    	} // end inplace_solve(utils::Vector<T>& b, utils::Vector<T>& x)
    	// SSolves m sets of n linear equations A*X=B using the stored LU decomposition of A.
    	void inplace_solve(const utils::Matrix<T>& B, utils::Matrix<T>& X) {
 	 		uint64_t m{B.cols()};
 	 		if (B.rows() != rows || X.rows() != rows || B.cols() != X.cols()){
 	 			throw std::runtime_error("LUdcmp: inplace_solve bad sizes");
 	 		}
 	 		utils::Vector<T> xx(rows);
 	 		for (uint64_t j = 0; j < m; ++j){		// Copy and solve each column in turn.
 	 			xx = B.get_col(j);
 	 			inplace_solve(xx,xx);
 	 			X.set_col(j, xx);
 	 		}
    	} // end inplace_solve(utils::Matrix<T>& B, utils::Matrix<T>& X)
    	// Solves the set of n linear equations A*x=b using the stored LU decomposition of A.
    	utils::Vector<T> solve(const utils::Vector<T>& b) {
    		utils::Vector<T> x(rows,T{0});
    		inplace_solve(b, x);
    		return x;
    	}
    	// Solves the set of n linear equations A*X=B using the stored LU decomposition of A.
    	utils::Matrix<T> solve(const utils::Matrix<T>& B) {
    		utils::Matrix<T> X(rows,B.cols(),T{0});
    		inplace_solve(B, X);
    		return X;
    	}
    	void inplace_inverse(utils::Matrix<T>& Ainv){
    		Ainv.eye(rows);
    		inplace_solve(Ainv, Ainv);
    	}
    	utils::Matrix<T> inverse(){
    		utils::Matrix<T> Ainv;
    		inplace_inverse(Ainv);
    		return Ainv;
    	}
    	T det(){
    		T dd = d;
    		for (uint64_t i = 0; i < rows; ++i){
    			dd *= lu(i,i);
    		}
    		return dd;
    	}
 	}; // struct LU
-	typedef LU<float> LUf;
+	typedef LUdcmp<float> LUdcmpf;
-	typedef LU<double> LUd;
+	typedef LUdcmp<double> LUdcmpd;
 } // namespace decomp
@@ -23,8 +23,11 @@ namespace numerics{
   		if (method == "Gauss-Jordan"){
   			inverse_gj(A);
 		}
 		else if(method == "LU"){
 			inplace_inverse_lu(A);
 		}
 		else{
-			throw std::runtime_error("numerics::inplace_inverse(" + method + ") - Not implemented yet \r \nImplemented: 'Gauss-Jordan',");
+			throw std::runtime_error("numerics::inplace_inverse(" + method + ") - Not implemented yet \r \nImplemented: 'Gauss-Jordan', 'LU'");
 		}
 	}
@@ -32,21 +35,11 @@ namespace numerics{
 	template <typename T>
 	utils::Matrix<T> inverse(utils::Matrix<T>& A, std::string method = "Gauss-Jordan"){
 		utils::Matrix<T> B = A;
 		inplace_inverse(B, method);
 		return B;
 	}
 } // namespace numerics
 #endif // _inverse_n_
@@ -7,12 +7,13 @@
 #include <omp.h>
 namespace numerics{
 	template <typename T>
   	void inverse_gj(utils::Matrix<T>& A){
-		utils::Matrix<T> B(A.rows(),A.cols(), T{0});
+		//utils::Matrix<T> B(A.rows(),A.cols(), T{0});
 		utils::Matrix<T> B;
 		B.eye(A.rows());
 		uint64_t icol{0}, irow{0}, rows{A.rows()}, cols{A.cols()};
@@ -36,6 +37,9 @@ namespace numerics{
 					}
 				}
 			}
 			if (big <= T{1e-14}){
 				throw std::runtime_error("utill:inplace_inverse('Gauss-Jordan' - Singular Matrix");
 			}
 			ipiv[icol]++;
 			if (irow != icol){
 				for (uint64_t l = 0; l < rows; l++){ // SWAP 
@@ -0,0 +1,20 @@
 #pragma once
 #include "./decomp/lu.h"
 namespace numerics{
 	template <typename T>
   	void inplace_inverse_lu(utils::Matrix<T>& A){
   		if (A.rows() != A.cols()){
   			throw std::runtime_error("numerics inverse_lu: non-square matrix");
   		}
   		decomp::LUdcmp<T> lu(A);
   		lu.inplace_inverse(A);
   	}
 }
@@ -45,7 +45,7 @@ public:
 		    return !(*this == A);
 		}
 		bool nearly_equal(const Matrix<T>& A, T tol = static_cast<T>(1e-9)) const {
-		    if (rows_ != A.rows_ || cols_ != A.cols_) return false;
+		    if (rows_ != A.rows() || cols_ != A.cols()) return false;
 		    for (uint64_t i = 0; i < rows_; ++i)
 		        for (uint64_t j = 0; j < cols_; ++j) {
 		            T a = (*this)(i,j);
@@ -59,6 +59,22 @@ public:
 		    return true;
 		}
 		void eye(uint64_t rows_cols){
 			rows_ = cols_ = rows_cols;
 			data_.clear();
 			data_.resize(rows_cols*rows_cols, T{0});
 			for (uint64_t i = 0; i < rows_; ++i){
 				data_[i * cols_ + i] = T{1};
 			}
 		}
 		void resize(uint64_t rows, uint64_t cols, const T& value = T(0)){
 			rows_ = rows;
 			cols_ = cols;
 			data_.resize(rows_*cols_, value);
 		}
 		//#            MATRIX: row helpers (copy out)            #
@@ -27,6 +27,7 @@ public:
 	    }
 	    //##########################################################
 		//#            VECTOR: --- basic properties ---            #
 		//##########################################################
@@ -399,13 +400,37 @@ public:
 		void print() const{
 			std::cout << *this << std::endl;
 		}
 		bool nearly_equal(const Vector<T>& a, T tol = static_cast<T>(1e-9)) const {
 		    if (v.size() != a.size()){
 		    	return false;
 		    }
 		    for (uint64_t i = 0; i < v.size(); ++i){
 		            T val1 = v[i];
 		            T val2 = a[i];
 				if (std::is_floating_point<T>::value) {
 	                if (std::fabs(val1 - val2) > tol) return false;
 	            } else {
 	                if (val1 != val2) return false;
 				}
 			}
 			return true;
 		}
 	};
 	typedef Vector<int> Vi;
 	typedef Vector<float> Vf;
 	typedef Vector<double> Vd;
-
+/*
-
+if (std::is_floating_point<T>::value) {
 		                if (std::fabs(a - b) > tol) return false;
 		            } else {
 		                if (a != b) return false;
 }*/
 } // namespace utils
 #endif // _vector_n_
@@ -14,8 +14,7 @@ int main(int argc, char const *argv[])
 {    
    utils::Md A;
-    decomp::LUd lu;
+    decomp::LUdcmpd lu(A);
    lu.factor(A);
    // Single-level, 16 threads, runtime may adjust
    //omp_configure(/*max_levels=*/1, /*dynamic=*/true, /*threads_per_level=*/{16});
@@ -1,813 +0,0 @@
 #include "./utils/numerics.h"
 namespace numeric{
 	//#######################################
 	//#   UTILITY FUNCTIONS FOR VECTOR      #
 	//#######################################
 	Vd vector_fill_RNG(const Vd& vect, const double min, const double max){
 		Vd vect_out;
 		vect_out.fill(vect_out.v.size(), 0);
 		for (uint64_t i = 0; i < vect_out.v.size(); i++){
 				vect_out.v[i] = random(min, max);
 		}
 		return vect_out;
 	}
 	double vector_dot(const Vd& v1, const Vd& v2){
 		double dot_product = 0;
 		if (v1.v.size() != v2.v.size()){
 			LOG("numerics::vector_dot");
 			throw std::exception();
 		}
 		else{
 			for (uint64_t i = 0; i < v1.v.size(); i++){
 				dot_product += v1.v[i]*v2.v[i];
 			}
 		}
 		return dot_product;
 	}
 	Vd vector_max_cap(const Vd& vect, const double value){
 		Vd vect_out;
 		vect_out.fill(vect.v.size(), 0);	
 		for (uint64_t i = 0; i < vect.v.size(); i++){
 			vect_out.v[i] = MIN(vect.v[i], value);
 		}
 		return vect_out;
 	}
 	Vd vector_min_cap(const Vd& vect, const double value){
 		Vd vect_out;
 		vect_out.fill(vect.v.size(), 0);
 		for (uint64_t i = 0; i < vect.v.size(); i++){
 			vect_out.v[i] = MAX(vect.v[i], value);
 		}
 		return vect_out;
 	}
 	Vd vector_clip(const Vd& vect, const double min_value, const double max_value){
 		Vd vect_out;
 		vect_out.fill(vect.v.size(), 0);
 		for (uint64_t i = 0; i < vect.v.size(); i++){
 			vect_out = vector_max_cap(vect, max_value);
 			vect_out = vector_min_cap(vect_out, min_value);
 		}
 		return vect_out;
 	}
 	Vd vector_normalize(const Vd& vect, const double value){
 		Vd vect_out;
 		vect_out.fill(vect.v.size(), 0);
 		double vect_sum = vect.get_sum();
 		for (uint64_t i = 0; i < vect.v.size(); i++){
 			vect_out.v[i] = vect.v[i]/vect_sum;
 		}
 		return vect_out;
 	}
 	//#######################################
 	//#   UTILITY FUNCTIONS FOR Matrix      #
 	//#######################################
 	Md matrix_dot(const Md& M, const Md& N){
 		Md matr_out;
 		matr_out.fill(M.m.size(), N.m[0].v.size(),0);
 		if (M.m[0].v.size() != N.m.size()){
 			LOG("numerics::matrix_dot");
 			throw std::exception();
 		}
 		else{
 			for (uint64_t i = 0; i < M.m.size(); i++){
 				for (uint64_t j = 0; j < N.m[0].v.size(); j++){
 					for (uint64_t k = 0; k < N.m.size(); k++)
 					{
 						matr_out.m[i].v[j] += M.m[i].v[k] * N.m[k].v[j];
 					}
 				}
 			}
 		}
 		return matr_out;
 	}
 	Md matrix_add_vector(const Md& M, const Vd& vect, bool axis){
 		Md matr_out;
 		matr_out.fill(M.m.size(), vect.v.size(), 0);
 		if (axis == 0 && matr_out.m[0].v.size() == vect.v.size()){
 			for (uint64_t i = 0; i < matr_out.m.size(); i++){
 				for (uint64_t j = 0; j < matr_out.m[0].v.size(); j++){
 					matr_out.m[i].v[j] = M.m[i].v[j] + vect.v[j];
 				}
 			}
 		}
 		else if (axis == 1 && matr_out.m.size() == vect.v.size()){
 			for (uint64_t i = 0; i < matr_out.m.size(); i++){
 				for (uint64_t j = 0; j < matr_out.m[0].v.size(); j++){
 					matr_out.m[i].v[j] = M.m[i].v[j] + vect.v[i];
 				}
 			}
 		}
 		else{
 			LOG("numerics::matrix_add_vector");
 			throw std::exception();
 		}
 		return matr_out;
 	}
 	Md matrix_add_matrix(const Md& M, const Md& N){
 		Md matr_out;
 		matr_out.fill(M.m.size(), M.m[0].v.size(), 0);
 		for (uint64_t i = 0; i < matr_out.m.size(); i++){
 			for (uint64_t j = 0; j < matr_out.m[0].v.size(); j++){
 				matr_out.m[i].v[j] = M.m[i].v[j] + N.m[i].v[j];
 			}
 		}
 		return matr_out;
 	}
 	Md matrix_sub_vector(const Md& M, const Vd& vect, bool axis){
 		Md matr_out;
 		matr_out.fill(M.m.size(), M.m[0].v.size(), 0);
 		if (axis == 0 && matr_out.m[0].v.size() == vect.v.size()){
 			for (uint64_t i = 0; i < matr_out.m.size(); i++){
 				for (uint64_t j = 0; j < matr_out.m[0].v.size(); j++){
 					matr_out.m[i].v[j] = M.m[i].v[j] - vect.v[j];
 				}
 			}
 		}
 		else if (axis == 1 && matr_out.m.size() == vect.v.size()){
 			for (uint64_t i = 0; i < matr_out.m.size(); i++){
 				for (uint64_t j = 0; j < matr_out.m[0].v.size(); j++){
 					matr_out.m[i].v[j] = M.m[i].v[j] - vect.v[i];
 				}
 			}
 		}
 		else{
 			LOG("numerics::matrix_sub_vector");
 			throw std::exception();
 		}
 		return matr_out;
 	}
 	Md matrix_max_cap(const Md& M, const double value){
 		Md matr_out;
 		matr_out.fill(M.m.size(), M.m[0].v.size(), 0);
 		for (uint64_t i = 0; i < M.m.size(); i++){
 			for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 				matr_out.m[i].v[j] = MIN(M.m[i].v[j], value);
 			}
 		}
 		return matr_out;
 	}
 	Md matrix_min_cap(const Md& M, const double value){
 		Md matr_out;
 		matr_out.fill(M.m.size(), M.m[0].v.size(), 0);
 		for (uint64_t i = 0; i < M.m.size(); i++){
 			for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 				matr_out.m[i].v[j] = MAX(M.m[i].v[j], value);
 			}
 		}
 		return matr_out;
 	}
 	Md matrix_clip(const Md& M, const double min_value, const double max_value){
 		Md matr_out;
 		matr_out.fill(M.m.size(), M.m[0].v.size(), 0);
 		for (uint64_t i = 0; i < M.m.size(); i++){
 			for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 				matr_out.m[i].v[j] = MAX(M.m[i].v[j], min_value);
 				matr_out.m[i].v[j] = MIN(M.m[i].v[j], max_value);
 			}
 		}
 		return matr_out;
 	}
 	Vd matrix_get_max(const Md& M, bool axis){
 		Vd vect_out;
 		if (axis == 0){
 			if(M.m[0].v.size() != vect_out.v.size()){
 				vect_out.fill(M.m[0].v.size(), 0);
 			}
 			for (uint64_t i = 0; i < M.m.size(); i++){
 				for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 					vect_out.v[j] = MAX(vect_out.v[j], M.m[i].v[j]);
 				}
 			}
 		}
 		else if(axis == 1){
 			if (M.m.size() != vect_out.v.size()){
 				vect_out.fill(M.m.size(), 0);
 			}
 			for (uint64_t i = 0; i < M.m.size(); i++){
 				for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 					vect_out.v[i] = MAX(vect_out.v[i], M.m[i].v[j]);
 				}
 			}
 		}
 		else{
 			LOG("numerics::matrix_get_max");
 			throw std::exception();
 		}
 		return vect_out;
 	}
 	Vd matrix_get_min(const Md& M, bool axis){
 		Vd vect_out;
 		if (axis == 0 && M.m[0].v.size() != vect_out.v.size()){
 			vect_out.fill(M.m[0].v.size(), 0);
 			for (uint64_t i = 0; i < M.m.size(); i++){
 				for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 					vect_out.v[j] = MAX(vect_out.v[j], M.m[i].v[j]);
 				}
 			}
 		}
 		else if(axis == 1 && M.m.size() != vect_out.v.size()){
 			vect_out.fill(M.m.size(), 0);
 			for (uint64_t i = 0; i < M.m.size(); i++){
 				for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 					vect_out.v[i] = MIN(vect_out.v[i], M.m[i].v[j]);
 				}
 			}
 		}
 		else{
 			LOG("numerics::matrix_get_min");
 			throw std::exception();
 		}
 		return vect_out;
 	}
 	Vd matrix_get_sum(const Md& M, bool axis){
 		Vd vect_out;
 		if (axis == 0 && M.m[0].v.size() != vect_out.v.size()){
 			vect_out.fill(M.m[0].v.size(), 0);
 			for (uint64_t i = 0; i < M.m.size(); i++){
 				for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 					vect_out.v[j] += M.m[i].v[j];
 				}
 			}
 		}
 		else if(axis == 1 && M.m.size() != vect_out.v.size()){
 			vect_out.fill(M.m.size(), 0);
 			for (uint64_t i = 0; i < M.m.size(); i++){
 				vect_out.v[i] = 0;
 				for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 					vect_out.v[i] += M.m[i].v[j];
 				}
 			}
 		}
 		else{
 			LOG("numerics::matrix_get_sum");
 			throw std::exception();
 		}
 		return vect_out;
 	}
 	Md matrix_normalize(const Md& M, bool axis){
 		Md matr_out;
 		matr_out.fill(M.m.size(), M.m[0].v.size(), 0);
 		Vd vect_sum;
 		vect_sum = matrix_get_sum(M, axis);
 		if (axis == 0){
 			for (uint64_t i = 0; i < M.m.size(); i++){
 				for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 					matr_out.m[i].v[j] = M.m[i].v[j] / vect_sum.v[j];
 				}
 			}
 		}
 		else if(axis == 1){
 			for (uint64_t i = 0; i < M.m.size(); i++){
 				for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 					matr_out.m[i].v[j] = M.m[i].v[j] / vect_sum.v[i];
 				}
 			}
 		}
 		else{
 			LOG("numerics::matrix_normalize");
 			throw std::exception();
 		}
 		return matr_out;
 	}
 	Md matrix_exp(const Md& M){
 		Md matr_out;
 		matr_out.fill(M.m.size(), M.m[0].v.size(), 0);
 		for (uint64_t i = 0; i < M.m.size(); i++){
 			for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 				matr_out.m[i].v[j] = pow(EULERS_NUMBER, M.m[i].v[j]);
 			}
 		}
 		return matr_out;
 	}
 	Vd matrix_argmax(const Md& M){
 		Vd vect_out;
 		vect_out.fill(M.m.size(), 0);
 		uint64_t idx = 0;
 		for (uint64_t i = 0; i < M.m.size(); i++){
 			idx = 0;
 			for (uint64_t j = 1; j < M.m[0].v.size(); j++){
 				if (M.m[i].v[j-1] < M.m[i].v[j]){
 					idx = j;
 				}
 			}
 			vect_out.v[i] = idx;
 		}
 		return vect_out;
 	}
 	Md matrix_transpose(const Md& M){
 		Md matr_out;
 		uint64_t rows = M.m.size();
 		uint64_t cols = M.m[0].v.size();
 		for (uint64_t i = 0; i < cols; i++){
 			Vd temp_vec;
 			for (uint64_t j = 0; j < rows; j++){
 				temp_vec.v.push_back(M.m[j].v[i]);
 			}
 			matr_out.m.push_back(temp_vec);
 		}
 		return matr_out;
    }
   	double matrix_determinant(const Md& M){
   		//https://stackoverflow.com/questions/60300482/c-calculating-the-inverse-of-a-matrix
   		if (M.m.size() != M.m[0].v.size()){
   			throw std::runtime_error("numeric::matrix_determinant - Matrix is not quadratic");
   		}
 		uint64_t dims = M.m.size();
 		double determinant = 0;
 		double sign = 1;
 		uint64_t temp = 0;
 		if(dims == 0){
 			determinant = 1.0f;
 		}
 		else if(dims == 1){
 			determinant = M.m[0].v[0];
 		}
 		else if(dims == 2){
 			determinant = (M.m[0].v[0]*M.m[1].v[1] - M.m[0].v[1]*M.m[1].v[0]);
 		}
 		else{
 			Md matr_temp;
 			matr_temp.fill(dims-1, dims-1, 0);
 			for (uint64_t i = 0; i < dims; i++){
 				for (uint64_t m = 1; m < dims; m++){
 					temp = 0;
 					for (uint64_t n = 0; n < dims; n++){						
 						if(n != i){
 							matr_temp.m[m-1].v[temp] = M.m[m].v[n];
 							temp++;
 						}
 					}
 				}
 				// recursice call
 				determinant += sign * M.m[0].v[i] * matrix_determinant(matr_temp);
 				sign = -sign;
 			}
 		}
 		return determinant;
    }
   	Md matrix_cofactor(const Md& M){
   		//https://stackoverflow.com/questions/60300482/c-calculating-the-inverse-of-a-matrix
   		if (M.m.size() != M.m[0].v.size()){
   			throw std::runtime_error("numeric::matrix_determinant - Matrix is not quadratic");
   		}
 		uint64_t dims = M.m.size();
 		Md cofactor;
 		Md matr_temp;
 		uint64_t temp_p = 0;
 		uint64_t temp_q = 0;
 		cofactor.fill(dims, dims, 0);
 		matr_temp.fill(dims-1, dims-1, 0);
 		for (uint64_t i = 0; i < dims; i++){
 			for(uint64_t j = 0; j < dims; j++){
 				temp_p = 0;
 				for (uint64_t x = 0; x < dims; x++){
 					if(x == i){
 						continue;
 					}
 					temp_q = 0;
 					for (uint64_t y = 0; y < dims; y++){
 						if (y == j){
 							continue;
 						}
 						matr_temp.m[temp_p].v[temp_q] = M.m[x].v[y];
 						temp_q++; 
 					}
 					temp_p++;
 				}
 				cofactor.m[i].v[j] = pow(-1, (i + j)) * matrix_determinant(matr_temp);
 			}
 		}
 		return cofactor;
    }
   	Md matrix_inverse(const Md& M){
   		//https://stackoverflow.com/questions/60300482/c-calculating-the-inverse-of-a-matrix
   		if (M.m.size() != M.m[0].v.size()){
   			throw std::runtime_error("numeric::matrix_determinant - Matrix is not quadratic");
   		}
   		double det = 1.0/matrix_determinant(M);
   		uint64_t dims = M.m.size();
 		Md matr_out;
 		matr_out.fill(dims, dims, 0);
 		for (uint64_t i = 0; i < dims; i++){
 			for (uint64_t j = 0; j < dims; j++){
 				matr_out.m[i].v[j] = M.m[i].v[j] * det;
 			}
 		}
 		return matrix_transpose(matrix_cofactor(matr_out));
    }
   	Md matrix_scalar_element_wise_division(const Md& M, const double& C){
 		Md matr_out;
 		matr_out.fill(M.m.size(), M.m[0].v.size(), 0);
 		for (uint64_t i = 0; i < M.m.size(); i++){
 			for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 				matr_out.m[i].v[j] = M.m[i].v[j] / C;
 			}
 		}
 		return matr_out;
    }
   	Md matrix_element_wise_division(const Md& M, const Md& N){
   		if (M.m.size() != N.m.size() || M.m[0].v.size() != N.m[0].v.size()){
   			throw std::runtime_error("numeric::matrix_element_wise_division - Matrix is not same size");
   		}
 		Md matr_out;
 		matr_out.fill(M.m.size(), M.m[0].v.size(), 0);
 		for (uint64_t i = 0; i < M.m.size(); i++){
 			for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 				matr_out.m[i].v[j] = M.m[i].v[j] / N.m[i].v[j];
 			}
 		}
 		return matr_out;
    }
   	Md matrix_sign(const Md& M){
 		Md matr_out;
 		matr_out.fill(M.m.size(), M.m[0].v.size(), 0);
 		for (uint64_t i = 0; i < M.m.size(); i++){
 			for (uint64_t j = 0; j < M.m[0].v.size(); j++){
 				matr_out.m[i].v[j] = -M.m[i].v[j];
 			}
 		}
 		return matr_out;
    }
 	double random(const double& min, const double& max){
    std::mt19937_64 rng{}; 
    rng.seed( std::random_device{}());
    return std::uniform_real_distribution<>{min, max}(rng);
 	}
 }
 void print_matrix(const std::vector<std::vector<double>> *const M){
 	uint64_t rows = (*M).size();
 	uint64_t cols = (*M)[0].size();
 	//printf("Rows: %ld\r\n", rows);
 	//printf("Cols:%ld\r\n", cols);
 	if (cols == 1)
 	{	
 		printf("[");
 		for (uint64_t i = 0; i < rows; ++i)
 		{
 			if (i == rows-1)
 			{
 				printf("[%f]]\r\n", (*M)[i][0]);
 			}
 			else
 			{
 				printf("[%f]\r\n", (*M)[i][0]);
 			}
 		}
 	}
 	else
 	{
 		for (uint64_t i = 0; i < rows; ++i)
 		{
 			for (uint64_t j = 0; j < cols; ++j)
 			{
 				if (j == cols-1 && i == rows-1)
 				{
 					printf("%f]]\r\n", (*M)[rows-1][cols-1]);
 				}
 				else if (j == 0 && i == 0)
 				{
 					printf("[[%f, ", (*M)[i][j]);
 				}
 				else if (j == 0)
 				{
 					printf("[%f, ", (*M)[i][j]);
 				}
 				else if (j == cols-1)
 				{
 					printf("%f]\r\n", (*M)[i][cols-1]);
 				}
 				else
 				{
 					printf("%f, ", (*M)[i][j]);
 				}
 			}
 		}
 	}
 }
 std::vector<std::vector<double>> transpose(const std::vector<std::vector<double>> *const M){
 	try{
 		if ((*M).size() != 0)
 		{
 			uint64_t rows = (*M).size();
 			uint64_t cols = (*M)[0].size();
 		    std::vector<std::vector<double>> trans_matr(cols, std::vector<double>(rows, 0));
 		    for (uint64_t i = 0; i < rows; i++)
 		    {
 		        for (uint64_t j = 0; j < cols; j++)
 		        {
 		            trans_matr[j][i] = ((*M)[i][j]);
 		        }
 		    }
 		    return trans_matr;
 		}
 		else{
 			throw std::invalid_argument("Dimensions in transpose is not valid");
 		}
 	}
    catch (std::invalid_argument& e){
        std::cerr << e.what() << std::endl;
        std::vector<std::vector<double>> trans_matr(1, std::vector<double>(1, -1));
        return trans_matr;
    }
 }
 //passing vectors by reference avoids unnecessary copies
 std::vector<std::vector<double>> dot(const std::vector<std::vector<double>> *const M, const std::vector<std::vector<double>> *const N){
 	try{
 		uint64_t M_rows = (*M).size();
 		uint64_t M_cols = (*M)[0].size();
 		uint64_t N_rows = (*N).size();
 		uint64_t N_cols = (*N)[0].size();
 		if (M_cols == N_rows)
 		{
 		    std::vector<std::vector<double>> dot_matr(M_rows, std::vector<double>(N_cols, 0));
 		    // Loop over rows
 		    for (uint64_t i = 0; i < M_rows; i++)
 		    {
 		    	// Loop over columns
 		    	for (uint64_t j = 0; j < N_cols; j++)
 		    	{
 		    		for (uint64_t k = 0; k < M_cols; k++)
 		    		{
 		    			dot_matr[i][j] += (*M)[i][k] * (*N)[k][j];
 		    		}
 		    	}
 		    }
 		    return dot_matr;
 		}
 		else{
 			throw std::invalid_argument("Dimensions in dot() does not match");
 		}
 	}
    catch (std::invalid_argument& e){
        std::cerr << e.what() << std::endl;
        std::vector<std::vector<double>> dot_matr(1, std::vector<double>(1, -1));
        return dot_matr;
    }
 }
 std::vector<std::vector<double>> matr_add(const std::vector<std::vector<double>> *const M, const std::vector<std::vector<double>> *const N){
 	try{
 		uint64_t M_rows = (*M).size();
 		uint64_t M_cols = (*M)[0].size();
 		uint64_t N_rows = (*N).size();
 		uint64_t N_cols = (*N)[0].size();
 		printf("cols :%ld\r\n", N_cols);
 		if (M_rows == N_rows && M_cols == N_cols)
 		{
 		    std::vector<std::vector<double>> add_matr(M_rows, std::vector<double>(M_cols, 0));
 		    // Loop over rows
 		    for (uint64_t i = 0; i < M_rows; i++)
 		    {
 		    	// Loop over columns
 		    	for (uint64_t j = 0; j < M_cols; j++)
 		    	{
 		    			add_matr[i][j] += (*M)[i][j] + (*N)[i][j];
 		    	}
 		    }
 		    return add_matr;
 		}
 		else{
 			throw std::invalid_argument("Dimensions in matr_add() does not match");
 		}
 	}
    catch (std::invalid_argument& e){
        std::cerr << e.what() << std::endl;
        std::vector<std::vector<double>> add_matr(1, std::vector<double>(1, -1));
        return add_matr;
    }
 }
 std::vector<std::vector<double>> matr_add_vect(const std::vector<std::vector<double>> *const M, const std::vector<double> *const v){
 	try{
 		uint64_t M_rows = (*M).size();
 		uint64_t M_cols = (*M)[0].size();
 		uint64_t size = (*v).size();
 		if (M_cols == size)
 		{
 			std::vector<std::vector<double>> matr_add_vect(M_rows, std::vector<double>(M_cols, 0));
 		    // Loop over columns
 		    for (uint64_t i = 0; i < M_rows; i++)
 		    {
 			    for (uint64_t j = 0; j < M_cols; j++)
 			    {
 			    	matr_add_vect[i][j] += (*M)[i][j] + (*v)[j];
 			    }
 		    }
 		    return matr_add_vect;
 		}
 		else{
 			throw std::invalid_argument("Dimensions in matr_add_vect does not match");
 		}
 	}
    catch (std::invalid_argument& e){
        std::cerr << e.what() << std::endl;
        std::vector<std::vector<double>> matr_add_vect(1, std::vector<double>(1, -1));
        return matr_add_vect;
    }
 }
@@ -1,15 +0,0 @@
 #include "utils/numerics.h"
 #include <random>
 namespace utils{
 	double random(const double& min, const double& max){
    std::mt19937_64 rng{}; 
    rng.seed( std::random_device{}());
    return std::uniform_real_distribution<>{min, max}(rng);
 	}
 }
@@ -4,123 +4,112 @@
 #include "./numerics/matmul.h"
-TEST_CASE(Inverse_2x2_WellConditioned) {
+TEST_CASE(Inverse_GJ_Basic_3x3) {
    using T = double;
    // A = [[4,7],[2,6]]  inverse = (1/10) * [[6,-7],[-2,4]]
    utils::Matrix<T> A(2,2, T{0});
    A(0,0)=4; A(0,1)=7;
    A(1,0)=2; A(1,1)=6;
    auto Ainv = numerics::inverse<T>(A);  // out-of-place
    // Check A * Ainv ≈ I and Ainv * A ≈ I
    auto Ileft  = numerics::matmul(A, Ainv);
    auto Iright = numerics::matmul(Ainv, A);
    utils::Md Iref(2,2, T{0});
    for (uint64_t i=0;i<Iref.rows();++i) Iref(i,i)=T{1};
    //auto Iref   = eye<T>(2);
    CHECK((Ileft.nearly_equal(Iref, 1e-12)), "A * inverse(A) ≠ I");
    CHECK((Iright.nearly_equal(Iref, 1e-12)), "inverse(A) * A ≠ I");
 }
 TEST_CASE(Inverse_InPlace_Equals_OutOfPlace) {
    using T = double;
    utils::Matrix<T> A(3,3, T{0});
-    // A = [[3, 0, 2],
+    // Well-conditioned 3x3
-    //      [2, 0, -2],
+    A(0,0)=3;   A(0,1)=0.2; A(0,2)=-0.1;
-    //      [0, 1, 1]]
+    A(1,0)=0.1; A(1,1)=2.5; A(1,2)=0.3;
-    A(0,0)=3; A(0,1)=0;  A(0,2)= 2;
+    A(2,0)=-0.2;A(2,1)=0.4; A(2,2)=2.0;
    A(1,0)=2; A(1,1)=0;  A(1,2)=-2;
    A(2,0)=0; A(2,1)=1;  A(2,2)= 1;
-    auto Ainv_ref = numerics::inverse<T>(A);  // copy path
+    auto Ainv = numerics::inverse<T>(A, "Gauss-Jordan");
    utils::Matrix<T> I;
    I.eye(3);
    auto prod = numerics::matmul<T>(A, Ainv);
-    auto A_inp = A;
+    CHECK(prod.nearly_equal(I, (T)1e-10), "inverse(GJ): A*A^{-1} ≈ I");
-    numerics::inplace_inverse<T>(A_inp);      // in-place path
+}
-    CHECK((A_inp.nearly_equal(Ainv_ref, 1e-12)), "in-place inverse differs from out-of-place");
+TEST_CASE(Inverse_LU_Basic_3x3) {
    using T = double;
    utils::Matrix<T> A(3,3, T{0});
    A(0,0)=3;   A(0,1)=0.2; A(0,2)=-0.1;
    A(1,0)=0.1; A(1,1)=2.5; A(1,2)=0.3;
    A(2,0)=-0.2;A(2,1)=0.4; A(2,2)=2.0;
    auto Ainv = numerics::inverse<T>(A, "LU");
    utils::Matrix<T> I;
    I.eye(3);
    auto prod = numerics::matmul<T>(A, Ainv);
    CHECK(prod.nearly_equal(I, (T)1e-10), "inverse(LU): A*A^{-1} ≈ I");
 }
 TEST_CASE(Inverse_GJ_vs_LU_Consistency) {
    using T = double;
    utils::Matrix<T> A(3,3, T{0});
    A(0,0)=4; A(0,1)=1;  A(0,2)=2;
    A(1,0)=0; A(1,1)=3;  A(1,2)=-1;
    A(2,0)=0; A(2,1)=0;  A(2,2)=2;
    auto GJ = numerics::inverse<T>(A, "Gauss-Jordan");
    auto LU = numerics::inverse<T>(A, "LU");
    CHECK(GJ.nearly_equal(LU, (T)1e-12), "inverse: GJ and LU produce nearly the same result");
 }
 TEST_CASE(Inplace_Inverse_LU) {
    using T = double;
    utils::Matrix<T> A(3,3, T{0});
    A(0,0)=3;   A(0,1)=0.2; A(0,2)=-0.1;
    A(1,0)=0.1; A(1,1)=2.5; A(1,2)=0.3;
    A(2,0)=-0.2;A(2,1)=0.4; A(2,2)=2.0;
    auto Ainv_ref = numerics::inverse<T>(A, "LU"); // out-of-place
    auto A_copy   = A;
    numerics::inplace_inverse<T>(A_copy, "LU");    // in-place
    CHECK(A_copy.nearly_equal(Ainv_ref, (T)1e-12), "inplace_inverse(LU) equals out-of-place");
 }
 TEST_CASE(Inplace_Inverse_GJ) {
    using T = double;
    utils::Matrix<T> A(2,2, T{0});
    A(0,0)=2; A(0,1)=1;
    A(1,0)=1; A(1,1)=3;
    auto Ainv_ref = numerics::inverse<T>(A, "Gauss-Jordan");
    auto A_copy   = A;
    numerics::inplace_inverse<T>(A_copy, "Gauss-Jordan");
    CHECK(A_copy.nearly_equal(Ainv_ref, (T)1e-12), "inplace_inverse(GJ) equals out-of-place");
 }
 TEST_CASE(Inverse_Identity) {
    using T = double;
    utils::Matrix<T> I;
    I.eye(3);
    auto invI = numerics::inverse<T>(I, "LU");
    CHECK(invI.nearly_equal(I, (T)0), "inverse(I) == I");
 }
 TEST_CASE(Inverse_NonSquare_Throws) {
    using T = double;
    utils::Matrix<T> A(2,3, T{0}); // non-square
    bool threw1=false, threw2=false;
    try { auto X = numerics::inverse<T>(A, "LU"); (void)X; } catch(...) { threw1=true; }
    try { numerics::inplace_inverse<T>(A, "Gauss-Jordan"); } catch(...) { threw2=true; }
    CHECK(threw1 && threw2, "inverse throws on non-square for both methods");
 }
 TEST_CASE(Inverse_Singular_Throws) {
    using T = double;
-    utils::Matrix<T> S(2,2, T{0});
+    utils::Matrix<T> S(3,3, T{0});
-    // Singular: rows are multiples → det = 0
+    S(0,0)=1; S(0,1)=2; S(0,2)=3;
-    S(0,0)=1; S(0,1)=2;
+    S(1,0)=1; S(1,1)=2; S(1,2)=3; // duplicate row -> singular
-    S(1,0)=2; S(1,1)=4;
+    S(2,0)=0; S(2,1)=1; S(2,2)=0;
-    bool threw=false;
+    bool threw_gj=false, threw_lu=false;
-    try {
+    try { auto X = numerics::inverse<T>(S, "Gauss-Jordan"); (void)X; } catch(...) { threw_gj=true; }
-        auto _ = numerics::inverse<T>(S);
+    try { auto X = numerics::inverse<T>(S, "LU");           (void)X; } catch(...) { threw_lu=true;  }
-        (void)_;
+    CHECK(threw_gj && threw_lu, "inverse throws on singular for both methods");
    } catch (const std::runtime_error&) { threw = true; }
    CHECK(threw, "inverse should throw on singular matrix");
    threw=false;
    try {
        numerics::inplace_inverse<T>(S);
    } catch (const std::runtime_error&) { threw = true; }
    CHECK(threw, "inplace_inverse should throw on singular matrix");
 }
 TEST_CASE(Inverse_RoundTrip_DiagonallyDominant_5x5) {
    // Build a well-conditioned 5x5: diagonally dominant
    utils::Md A(5,5,0.0);
    for (uint64_t i=0;i<5;++i) {
        double rowsum = 0.0;
        for (uint64_t j=0;j<5;++j) {
            if (i==j) continue;
            A(i,j) = 0.01 * double(1 + ((i+1)*(j+3)) % 7);
            rowsum += std::fabs(A(i,j));
        }
        A(i,i) = rowsum + 1.0; // strictly diagonally dominant
    }
    utils::Md A_copy = A; // ensure wrapper doesn't mutate input
    utils::Md Ainv = numerics::inverse<double>(A);
    // Input must be unchanged by the non-inplace wrapper
    CHECK(A.nearly_equal(A_copy, 0.0), "inverse wrapper modified input");
    utils::Md I(5,5, 0);
    for (uint64_t i=0;i<I.rows();++i) I(i,i)=1;
    auto L  = numerics::matmul<double>(A, Ainv);
    auto R  = numerics::matmul<double>(Ainv, A);
    CHECK(L.nearly_equal(I, 1e-10), "A * Ainv not close to I");
    CHECK(R.nearly_equal(I, 1e-10), "Ainv * A not close to I");
 }
 TEST_CASE(Inverse_NonSquare_Throws) {
    // Non-square: 2x3 — algorithm expects square; should throw
    utils::Md A(2,3,0.0);
    bool threw = false;
    try {
        numerics::inplace_inverse<double>(A);
    } catch (const std::runtime_error&) {
        threw = true;
    } catch (...) {
        threw = true; // any failure is fine; must not silently succeed
    }
    CHECK(threw, "inplace_inverse should throw on non-square matrix");
 }
 TEST_CASE(Inverse_Unknown_Method_Throws) {
-
+    using T = double;
-    utils::Md A(3,3, 0);
+    utils::Matrix<T> A(2,2, T{0});
-    for (uint64_t i=0;i<A.rows();++i) A(i,i)=1;
+    A(0,0)=1; A(1,1)=1;
    bool threw=false;
-    try {
+    try { auto X = numerics::inverse<T>(A, "Foobar"); (void)X; } catch(...) { threw=true; }
-        numerics::inplace_inverse<double>(A, "NotARealMethod");
+    CHECK(threw, "inverse unknown method throws");
    } catch (const std::runtime_error&) {
        threw = true;
    }
    CHECK(threw, "should throw for unknown inverse method");
 }
@@ -0,0 +1,190 @@
 #include "test_common.h"
 #include "./utils/utils.h"           // brings in vector.h, matrix.h, etc.
 #include "./numerics/matmul.h"       // numerics::matmul
 #include "./decomp/lu.h"
 //#include <chrono>
 TEST_CASE(LU_Solve_Vector_Basic) {
    using T = double;
    // A * x = b  with exact solution x = [1, 1, 2]^T
    utils::Matrix<T> A(3,3, T{0});
    A(0,0)=2;  A(0,1)=1;  A(0,2)=1;
    A(1,0)=4;  A(1,1)=-6; A(1,2)=0;
    A(2,0)=-2; A(2,1)=7;  A(2,2)=2;
    utils::Vector<T> b(3, T{0});
    b[0]=5; b[1]=-2; b[2]=9;
    decomp::LUdcmpd lu(A);
    auto x = lu.solve(b);
    utils::Vector<T> x_true(3, T{0});
    x_true[0]=1; x_true[1]=1; x_true[2]=2;
    CHECK( (x.nearly_equal(x_true,1e-12)), "LU solve (vector RHS) failed" );
 }
 TEST_CASE(LU_Solve_MatrixRHS_TwoColumns) {
    using T = double;
    // Same A, solve two RHS at once
    utils::Matrix<T> A(3,3, T{0});
    A(0,0)=2;  A(0,1)=1;  A(0,2)=1;
    A(1,0)=4;  A(1,1)=-6; A(1,2)=0;
    A(2,0)=-2; A(2,1)=7;  A(2,2)=2;
    utils::Matrix<T> B(3,2, T{0});
    // First column b1 (same as previous test)
    B(0,0)=5;  B(1,0)=-2; B(2,0)=9;
    // Second column b2 → choose solution x2 = [0, 2, 1]^T
    // Compute b2 = A * x2 by hand:
    // Row0: 2*0 + 1*2 + 1*1 = 3
    // Row1: 4*0 -6*2 + 0*1 = -12
    // Row2: -2*0 +7*2 + 2*1 = 16
    B(0,1)=3;  B(1,1)=-12; B(2,1)=16;
    decomp::LUdcmpd lu(A);
    auto X = lu.solve(B);
    // Check A*X ≈ B
    auto AX = numerics::matmul(A, X);
    CHECK( AX.nearly_equal(B, 1e-12), "A * X does not match B for matrix RHS" );
 }
 TEST_CASE(LU_Determinant_Known) {
    using T = double;
    // Determinant of:
    // [[1,2,3],[0,1,4],[5,6,0]] is 1
    utils::Matrix<T> A(3,3, T{0});
    A(0,0)=1; A(0,1)=2; A(0,2)=3;
    A(1,0)=0; A(1,1)=1; A(1,2)=4;
    A(2,0)=5; A(2,1)=6; A(2,2)=0;
    decomp::LUdcmpd lu(A);
    T det = lu.det();
    CHECK( std::fabs(det - T{1}) < 1e-12, "det(A) should be 1" );
 }
 TEST_CASE(LU_Pivoting_Handles_Zero_Leading) {
    using T = double;
    // Requires pivoting (A(0,0)=0); system has solution x=[1,2]^T, b = A*x = [2,3]^T
    utils::Matrix<T> A(2,2, T{0});
    A(0,0)=0; A(0,1)=1;
    A(1,0)=1; A(1,1)=1;
    utils::Vector<T> b(2, T{0});
    b[0]=2; b[1]=3;
    decomp::LUdcmpd lu(A);
    auto x = lu.solve(b);
    utils::Vector<T> x_true(2, T{0}); x_true[0]=1; x_true[1]=2;
    CHECK( (x.nearly_equal(x_true,1e-12)), "Pivoting failed on zero-leading matrix" );
 }
 TEST_CASE(LU_Input_Unchanged_By_NonInplace_Path) {
    using T = double;
    utils::Matrix<T> A(4,4, T{0});
    for (uint64_t i=0;i<4;++i) {
        for (uint64_t j=0;j<4;++j) {
            A(i,j) = (i==j) ? 3.0 : 0.1 * ((i+1)*(j+2) % 5 + 1);
        }
    }
    utils::Matrix<T> A_copy = A;
    decomp::LUdcmpd lu(A); // constructor should not mutate input A
    CHECK( A.nearly_equal(A_copy, 0.0), "LU constructor modified input matrix" );
    // Also check solve doesn't mutate RHS copy when using out-of-place convenience
    utils::Vector<T> b(4, 0.0);
    for (uint64_t i=0;i<4;++i) b[i] = double(i+1);
    auto b_copy = b;
    auto x = lu.solve(b);
    (void)x;
    CHECK( (b.nearly_equal(b_copy, 1e-300)), "solve(b) modified its input vector" );
 }
 TEST_CASE(LU_Inplace_Equals_OutOfPlace_Solve_Vector) {
    using T = double;
    utils::Matrix<T> A(3,3, T{0});
    A(0,0)=4;  A(0,1)=1;  A(0,2)=2;
    A(1,0)=0;  A(1,1)=3;  A(1,2)=-1;
    A(2,0)=0;  A(2,1)=0;  A(2,2)=2;
    utils::Vector<T> b(3, T{0}); b[0]=7; b[1]=5; b[2]=4;
    decomp::LUdcmpd lu(A);
    auto x1 = lu.solve(b);
    utils::Vector<T> x2(3, T{0});
    lu.inplace_solve(b, x2);
    CHECK( (x1.nearly_equal(x2,1e-12)), "inplace_solve(b,x) differs from solve(b)" );
 }
 TEST_CASE(LU_Singular_Throws) {
    using T = double;
    // Singular (row2 = 2 * row1)
    utils::Matrix<T> S(2,2, T{0});
    S(0,0)=1; S(0,1)=2;
    S(1,0)=2; S(1,1)=4;
    bool threw=false;
    try {
        decomp::LUdcmpd lu(S);
        (void)lu;
    } catch (const std::runtime_error&) { threw = true; }
    CHECK(threw, "LU should throw on singular matrix");
 }
 TEST_CASE(LU_NonSquare_Throws) {
    using T = double;
    utils::Matrix<T> A(3,2, T{0});
    bool threw = false;
    try {
        decomp::LUdcmpd lu(A);
        (void)lu;
    } catch (const std::runtime_error&) { threw = true; }
    CHECK(threw, "LU should throw on non-square input");
 }
 TEST_CASE(LU_Inverse_RoundTrip) {
    using T = double;
    // Build a strictly diagonally dominant 5x5
    utils::Matrix<T> A(5,5, T{0});
    for (uint64_t i=0;i<5;++i) {
        T rowsum = 0;
        for (uint64_t j=0;j<5;++j) {
            if (i==j) continue;
            A(i,j) = T(0.01 * double(1 + ((i+2)*(j+3)) % 7));
            rowsum += std::fabs(A(i,j));
        }
        A(i,i) = rowsum + T{1};
    }
    decomp::LUdcmpd lu(A);
    auto Ainv = lu.inverse();
    utils::Md I(5,5, 0.0);
    for (uint64_t i=0;i<I.rows();++i) I(i,i)=1.0;
    auto L = numerics::matmul(A, Ainv);
    auto R = numerics::matmul(Ainv, A);
    CHECK(L.nearly_equal(I, 1e-10), "A * inverse(A) not close to I");
    CHECK(R.nearly_equal(I, 1e-10), "inverse(A) * A not close to I");
 }