Finishing up and starting lu decomp

2025-09-13 21:44:20 +02:00
parent 320436ce98
commit 88087ea6a6
24 changed files with 1502 additions and 699 deletions
@@ -5,100 +5,39 @@
 #include "./utils/vector.h"
 #include "./utils/matrix.h"

+#include "./numerics/inverse/inverse_gauss_jordan.h"
+#include "./numerics/inverse/inverse_lu.h"
+
+#include <omp.h>
+

 namespace numerics{
 	
 	template <typename T>
   	void inplace_inverse(utils::Matrix<T>& A, std::string method = "Gauss-Jordan"){
+
+		if (A.rows() != A.cols()) {
+		    throw std::runtime_error("inplace_inverse: non-square matrix");
+		}
+
   		if (method == "Gauss-Jordan"){
-
-   			utils::Matrix<T> B(A.rows(),A.cols(), T{0});
-
-
-   			uint64_t icol{0}, irow{0}, rows{A.rows()}, cols{A.cols()};
-   			double big, dum, pivinv, temp;
-   			utils::Vi indxc(rows,0), indxr(rows,0), ipiv(rows,0);
-
-   			//for (uint64_t j = 0; j < N; ++j){ ipiv[j] = 0;}
-				for (uint64_t i = 0; i < rows; i++){
-					big = 0.0;
-					for (uint64_t j = 0; j < rows; j++){
-						if (ipiv[j] != 1){
-							for (uint64_t k = 0; k < rows; k++){
-								if (ipiv[k] == 0){
-									if (abs(A(j,k)) >= big){
-										big = abs(A(j,k));
-										irow = j;
-										icol = k;
-									}
-								}
-							}
-						}
-					}
-					ipiv[icol]++;
-					if (irow != icol){
-						for (uint64_t l = 0; l < rows; l++){ // SWAP 
-							temp = A(irow,l);
-							A(irow,l) = A(icol,l);
-							A(icol,l) = temp;
-						}
-						for (uint64_t l = 0; l < cols; l++){ // SWAP temp matrix
-							temp = B(irow,l);
-							B(irow,l) = B(icol,l);
-							B(icol,l) = temp;
-						}
-					}
-
-					indxr[i] = irow;
-					indxc[i] = icol;
-					if (A(icol,icol) == 0.0){
-						throw std::runtime_error("utill:inplace_inverse('Gauss-Jordan' - Singular Matrix");
-					}
-					pivinv= 1.0/A(icol,icol);
-					A(icol,icol)=1.0;
-				
-					for (uint64_t l = 0; l < rows; l++){
-						A(icol,l) *= pivinv;
-					}
-					for (uint64_t l = 0; l < cols; l++){
-						B(icol,l) *= pivinv;
-					}
-					for (uint64_t ll = 0; ll < rows; ll++){
-						if (ll != icol){
-							dum = A(ll,icol);
-							A(ll,icol) = 0;
-							for (uint64_t l = 0; l < rows; l++){
-								A(ll,l) -= A(icol,l)*dum;
-							}
-							for (uint64_t l = 0; l < rows; l++){
-								B(ll,l) -= B(icol,l)*dum;
-							}
-						}
-					}
-
-				}
-				//m = temp_m;
-				for (int64_t l = rows-1; l >= 0; l--){
-					if (indxr[l] != indxc[l]){
-						for (uint64_t k = 0; k < rows; k++){
-							temp = A(k,indxr[l]);
-						A(k,indxr[l]) = A(k,indxc[l]);
-						A(k,indxc[l]) = temp;
-						}
-					}
-				}
-   		}
+   			inverse_gj(A);
+		}
 		else{
 			throw std::runtime_error("numerics::inplace_inverse(" + method + ") - Not implemented yet \r \nImplemented: 'Gauss-Jordan',");
 		}
-    }
+	}



 	template <typename T>
 	utils::Matrix<T> inverse(utils::Matrix<T>& A, std::string method = "Gauss-Jordan"){
+
+
 		utils::Matrix<T> B = A;
+
 		inplace_inverse(B, method);
+
 		return B;
 	}

@@ -0,0 +1,94 @@
+#ifndef _inverse_gj_n_
+#define _inverse_gj_n_
+
+
+#include "./utils/vector.h"
+#include "./utils/matrix.h"
+
+#include <omp.h>
+
+
+namespace numerics{
+	
+	template <typename T>
+   	void inverse_gj(utils::Matrix<T>& A){
+		utils::Matrix<T> B(A.rows(),A.cols(), T{0});
+
+
+		uint64_t icol{0}, irow{0}, rows{A.rows()}, cols{A.cols()};
+		double big, dum, pivinv, temp;
+		utils::Vi indxc(rows,0), indxr(rows,0), ipiv(rows,0);
+
+		//for (uint64_t j = 0; j < N; ++j){ ipiv[j] = 0;}
+
+		for (uint64_t i = 0; i < rows; i++){
+			big = 0.0;
+			for (uint64_t j = 0; j < rows; j++){
+				if (ipiv[j] != 1){
+					for (uint64_t k = 0; k < rows; k++){
+						if (ipiv[k] == 0){
+							if (abs(A(j,k)) >= big){
+								big = abs(A(j,k));
+								irow = j;
+								icol = k;
+							}
+						}
+					}
+				}
+			}
+			ipiv[icol]++;
+			if (irow != icol){
+				for (uint64_t l = 0; l < rows; l++){ // SWAP 
+					temp = A(irow,l);
+					A(irow,l) = A(icol,l);
+					A(icol,l) = temp;
+				}
+				for (uint64_t l = 0; l < cols; l++){ // SWAP temp matrix
+					temp = B(irow,l);
+					B(irow,l) = B(icol,l);
+					B(icol,l) = temp;
+				}
+			}
+			indxr[i] = irow;
+			indxc[i] = icol;
+			if (A(icol,icol) == 0.0){
+				throw std::runtime_error("utill:inplace_inverse('Gauss-Jordan' - Singular Matrix");
+			}
+			pivinv= 1.0/A(icol,icol);
+			A(icol,icol)=1.0;
+		
+			for (uint64_t l = 0; l < rows; l++){
+				A(icol,l) *= pivinv;
+			}
+			for (uint64_t l = 0; l < cols; l++){
+				B(icol,l) *= pivinv;
+			}
+			for (uint64_t ll = 0; ll < rows; ll++){
+				if (ll != icol){
+					dum = A(ll,icol);
+					A(ll,icol) = 0;
+					for (uint64_t l = 0; l < rows; l++){
+						A(ll,l) -= A(icol,l)*dum;
+					}
+					for (uint64_t l = 0; l < rows; l++){
+						B(ll,l) -= B(icol,l)*dum;
+					}
+				}
+			}
+
+		}
+		//m = temp_m;
+		for (int64_t l = rows-1; l >= 0; l--){
+			if (indxr[l] != indxc[l]){
+				for (uint64_t k = 0; k < rows; k++){
+					temp = A(k,indxr[l]);
+				A(k,indxr[l]) = A(k,indxc[l]);
+				A(k,indxc[l]) = temp;
+				}
+			}
+		}
+	}
+
+} // namespace numerics
+
+#endif // _inverse_gj_n_
@@ -3,10 +3,12 @@


 #include "./utils/matrix.h"
+#include "./core/omp_config.h"


 namespace numerics{
-	
+
+// ---------------- Serial baseline ----------------
 	template <typename T>
 	utils::Matrix<T> matmul(const utils::Matrix<T>& A, const utils::Matrix<T>& B){

@@ -19,10 +21,8 @@ namespace numerics{
 		const uint64_t p = B.cols();
 		T tmp;

-		utils::Matrix<T> C(m, n, T{0});
+		utils::Matrix<T> C(m, p, T{0});

-		//#pragma omp parallel for collapse(2) schedule(static)
-		#pragma omp parallel for
 		for (uint64_t i = 0; i < m; ++i){
 			for (uint64_t j = 0; j < n; ++j){
 				tmp = A(i,j);
@@ -34,6 +34,85 @@ namespace numerics{
 		return C;
 	}

+// ---------------- Rows-only OpenMP ----------------
+template <typename T>
+utils::Matrix<T> matmul_rows_omp(const utils::Matrix<T>& A,
+                                 const utils::Matrix<T>& B) {
+    if (A.cols() != B.rows()) throw std::runtime_error("matmul_rows_omp: dim mismatch");
+    const uint64_t m=A.rows(), n=A.cols(), p=B.cols();
+
+    utils::Matrix<T> C(m, p, T{0});
+
+    #pragma omp parallel for schedule(static)
+    for (uint64_t i=0;i<m;++i) {
+        for (uint64_t j=0;j<p;++j) {
+            T acc=T{0};
+            for (uint64_t k=0;k<n;++k) {
+            	acc += A(i,k)*B(k,j);
+            }
+            C(i,j)=acc;
+        }
+    }
+    return C;
+}
+
+// -------------- Collapse(2) OpenMP ----------------
+template <typename T>
+utils::Matrix<T> matmul_collapse_omp(const utils::Matrix<T>& A,
+                                     const utils::Matrix<T>& B) {
+    if (A.cols() != B.rows()) throw std::runtime_error("matmul_collapse_omp: dim mismatch");
+    const uint64_t m=A.rows(), n=A.cols(), p=B.cols();
+    utils::Matrix<T> C(m, p, T{0});
+
+    #pragma omp parallel for collapse(2) schedule(static)
+    for (uint64_t i=0;i<m;++i) {
+        for (uint64_t j=0;j<p;++j) {
+            T acc=T{0};
+            for (uint64_t k=0;k<n;++k){
+            	acc += A(i,k)*B(k,j);
+            } 
+            C(i,j)=acc;
+        }
+    }
+    return C;
+}
+
+
+// -------------------- Auto selector ---------------------
+template <typename T>
+utils::Matrix<T> matmul_auto(const utils::Matrix<T>& A,
+                             const utils::Matrix<T>& B) {
+    const uint64_t m=A.rows(), p=B.cols();
+    const uint64_t work = m * p;
+
+    bool can_parallel = omp_config::omp_parallel_allowed();
+
+    #ifdef _OPENMP
+      int threads = omp_get_max_threads();
+    #else
+      int threads = 1;
+    #endif
+
+
+    // Tiny problems: serial is cheapest.
+    if (!can_parallel || work < static_cast<uint64_t>(threads)*4ull) {
+        return matmul(A,B);
+    }
+    // Plenty of (i,j) work → collapse(2) is a great default.
+    else if (work >= 8ull * static_cast<uint64_t>(threads)) {
+        return matmul_collapse_omp(A,B);
+    }
+    // Many rows and very few columns → rows-only cheaper overhead.
+    else if (m >= static_cast<uint64_t>(threads) && p <= 4) {
+        return matmul_rows_omp(A,B);
+    }
+    else{
+	    // Safe fallback
+	    return matmul(A,B);
+    }
+}
+
+



@@ -3,11 +3,13 @@


 #include "./utils/matrix.h"
-
+#include "./core/omp_config.h"

 namespace numerics{
 	
-	// y = A * x, where A is (m×n) and x is length n and y is length m
+// =================================================
+//   y = A * x    (Matrix–Vector product)
+// =================================================
 	template <typename T>
 	utils::Vector<T> matvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
 	    if (A.cols() != x.size()) {
@@ -18,6 +20,27 @@ namespace numerics{
 	    const uint64_t n = A.cols();

 	    utils::Vector<T> y(m, T{0});
+
+	    for (uint64_t i = 0; i < m; ++i) {
+	        for (uint64_t j = 0; j < n; ++j) {
+	            y[i] += A(i, j) * x[j];
+	        }
+	    }
+	    return y;
+	}
+	// -------------- Collapse(2) OpenMP ----------------
+	template <typename T>
+	utils::Vector<T> matvec_omp(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
+	    if (A.cols() != x.size()) {
+	        throw std::runtime_error("matvec: dimension mismatch");
+	    }
+
+	    const uint64_t m = A.rows();
+	    const uint64_t n = A.cols();
+
+	    utils::Vector<T> y(m, T{0});
+
+	    #pragma omp parallel for schedule(static)
 	    for (uint64_t i = 0; i < m; ++i) {
 	        T acc = T{0};
 	        for (uint64_t j = 0; j < n; ++j) {
@@ -28,7 +51,34 @@ namespace numerics{
 	    return y;
 	}

-	// y = x * A, where x is length m and A is (m×n) -> y is length n
+	// -------------- Auto OpenMP ----------------
+	template <typename T>
+	utils::Vector<T> matvec_auto(const utils::Matrix<T>& A,
+	                             const utils::Vector<T>& x) {
+
+	    
+	    uint64_t work = A.rows() * A.cols();
+
+	    bool can_parallel = omp_config::omp_parallel_allowed();
+		#ifdef _OPENMP
+		    int threads = omp_get_max_threads();
+		#else
+		    int threads = 1;
+		#endif
+
+	    if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
+	        return matvec_omp(A,x);
+	    }
+	    else{
+	    	// Safe fallback
+	    	return matvec(A,x);
+	    }
+	    
+	}
+
+// =================================================
+//   y = x * A    (Vector–Matrix product)
+// =================================================
 	template <typename T>
 	utils::Vector<T> vecmat(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
 	    if (x.size() != A.rows()) {
@@ -38,6 +88,26 @@ namespace numerics{
 	    const uint64_t n = A.cols();

 	    utils::Vector<T> y(n, T{0});
+
+	    for (uint64_t j = 0; j < n; ++j) {
+	        for (uint64_t i = 0; i < m; ++i) {
+	            y[j] += x[i] * A(i, j);
+	        }
+	    }
+	    return y;
+	}
+
+	// -------------- Collapse(2) OpenMP ----------------
+	template <typename T>
+	utils::Vector<T> vecmat_omp(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
+	    if (x.size() != A.rows()) {
+	        throw std::runtime_error("vecmat: dimension mismatch");
+	    }
+	    const uint64_t m = A.rows();
+	    const uint64_t n = A.cols();
+
+	    utils::Vector<T> y(n, T{0});
+	    #pragma omp parallel for schedule(static)
 	    for (uint64_t j = 0; j < n; ++j) {
 	        T acc = T{0};
 	        for (uint64_t i = 0; i < m; ++i) {
@@ -48,6 +118,30 @@ namespace numerics{
 	    return y;
 	}

+	// -------------- Auto OpenMP ----------------
+	template <typename T>
+	utils::Vector<T> vecmat_auto(const utils::Vector<T>& x,
+								 const utils::Matrix<T>& A) {
+	    
+	    uint64_t work = A.rows() * A.cols();
+
+	    bool can_parallel = omp_config::omp_parallel_allowed();
+		#ifdef _OPENMP
+		    int threads = omp_get_max_threads();
+		#else
+		    int threads = 1;
+		#endif
+
+	    if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
+	        return vecmat_omp(x,A);
+	    }
+	    else{
+	    	// Safe fallback
+	    	return vecmat(x,A);
+	    }
+	    
+	}
+

 } // namespace numerics

@@ -43,28 +43,6 @@ namespace numerics{
 	}


-
-
-
-
-
-
-
-
-
-
-
 } // namespace numerics

-
-
-
-
-
-
-
-
-
-
-
 #endif // _transpose_n_