Ready for fvm steady case

2025-09-21 20:57:02 +02:00
parent 3a53b6ebf7
commit 513f071748
59 changed files with 1813 additions and 983 deletions
@@ -0,0 +1,138 @@
+#pragma once
+#include "./utils/matrix.h"
+#include "./core/omp_config.h"
+
+
+namespace numerics {
+
+	template <typename T>
+	void inplace_eye(utils::Matrix<T>& A, uint64_t N = 0){
+
+		bool need_full_zero = true;
+
+		if (N != 0){
+			A.resize(N,N,T{0});
+			need_full_zero = false;
+		}else{
+			N = A.rows();
+		    if (N != A.cols()) {
+		        throw std::runtime_error("inplace_eye: non-square matrix");
+		    }
+		}
+		// 1) Zero the whole matrix if we didn't just resize with zeros
+		if (need_full_zero){
+			for (uint64_t i = 0; i < N; ++i){
+				for (uint64_t j = 0; j < N; ++j){
+					if (i==j){
+						A(i,j) = T{1};
+					}else{
+						A(i,j) = T{0};
+					}
+				}
+			}
+		}else{
+			for (uint64_t i = 0; i < N; ++i){
+				A(i,i) = T{1};
+			}
+		}
+
+	}
+
+
+	template <typename T>
+	void inplace_eye_omp(utils::Matrix<T>& A, uint64_t N = 0){
+
+		bool need_full_zero = true;
+
+		if (N != 0){
+			A.resize(N,N,T{0});
+			need_full_zero = false;
+		}else{
+			N = A.rows();
+		    if (N != A.cols()) {
+		        throw std::runtime_error("inplace_eye_omp: non-square matrix");
+		    }
+		}
+
+		// 1) Zero the whole matrix if we didn't just resize with zeros
+		if (need_full_zero){
+			T* ptr = A.data();
+			uint64_t NN = N*N;
+			#pragma omp parallel for schedule(static)
+			for (uint64_t i = 0; i < NN; ++i){
+				ptr[i] = T{0};
+			}
+		}
+		// 2) Set the diagonal to 1
+		#pragma omp parallel for schedule(static)
+		for (uint64_t i = 0; i < N; ++i){
+			A(i,i) = T{1};
+		}
+
+	}
+
+	template <typename T>
+	utils::Matrix<T> eye(uint64_t N){
+		utils::Matrix<T> A;
+		inplace_eye(A, N);
+		return A;
+
+	}
+
+	template <typename T>
+	utils::Matrix<T> eye_omp(uint64_t N){
+		utils::Matrix<T> A;
+		inplace_eye_omp(A, N);
+		return A;
+	}
+
+	template <typename T>
+	utils::Matrix<T> eye_omp_auto(uint64_t N){
+
+	    uint64_t work = N*N;
+	    utils::Matrix<T> A(N,N,T{0});
+
+		#ifdef _OPENMP
+			bool can_parallel = omp_config::omp_parallel_allowed();
+		    uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
+		#else
+		    bool can_parallel = false;
+		    uint64_t threads = 1;
+		#endif
+
+	    if (can_parallel || work > threads * 4ull) {
+	        inplace_eye_omp(A, 0);
+	    }
+	    else{
+	    	// Safe fallback
+	    	inplace_eye(A, 0);
+	    }
+
+		return A;
+	}
+ // Untested:
+	template <typename T>
+	void inplace_eye_omp_auto(utils::Matrix<T>& A, uint64_t N = 0){
+
+	    uint64_t work = N*N;
+
+		#ifdef _OPENMP
+			bool can_parallel = omp_config::omp_parallel_allowed();
+		    uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
+		#else
+		    bool can_parallel = false;
+		    uint64_t threads = 1;
+		#endif
+
+	    if (can_parallel || work > threads * 4ull) {
+	        inplace_eye_omp(A, 0);
+	    }
+	    else{
+	    	// Safe fallback
+	    	inplace_eye(A, 0);
+	    }
+	}
+
+
+
+} // namespace utils
@@ -0,0 +1,9 @@
+#pragma once
+
+
+//#include "./numerics/interpolation1d/interpolation1d_base.h"
+#include "./numerics/interpolation1d/interpolation1d_barycentric.h"
+#include "./numerics/interpolation1d/interpolation1d_cubic_spline.h"
+#include "./numerics/interpolation1d/interpolation1d_linear.h"
+#include "./numerics/interpolation1d/interpolation1d_polynomial.h"
+#include "./numerics/interpolation1d/interpolation1d_rational.h"
@@ -1,6 +1,6 @@
 #pragma once

-#include "./numerics/interpolation1d_base.h"
+#include "./numerics/interpolation1d/interpolation1d_base.h"

 #include "./utils/vector.h"
 #include "./numerics/min.h"
@@ -43,11 +43,9 @@ namespace numerics{
 		T interp(T x){
 			int64_t jlo;
 			if (cor){
-				std::cout << "Uses hunt()" << std::endl;
 				jlo = hunt(x);
 			}
 			else{
-				std::cout << "Uses locate()" << std::endl;
 				jlo = locate(x);
 			}
 			return rawinterp(jlo,x);
@@ -1,6 +1,6 @@
 #pragma once

-#include "./numerics/interpolation1d_base.h"
+#include "./numerics/interpolation1d/interpolation1d_base.h"

 //#include "./numerics/abs.h"
 #include "./utils/vector.h"
@@ -1,6 +1,6 @@
 #pragma once

-#include "./numerics/interpolation1d_base.h"
+#include "./numerics/interpolation1d/interpolation1d_base.h"


 namespace numerics{
@@ -1,6 +1,6 @@
 #pragma once

-#include "./numerics/interpolation1d_base.h"
+#include "./numerics/interpolation1d/interpolation1d_base.h"

 #include "./numerics/abs.h"
 #include "./utils/vector.h"
@@ -1,6 +1,6 @@
 #pragma once

-#include "./numerics/interpolation1d_base.h"
+#include "./numerics/interpolation1d/interpolation1d_base.h"

 #include "./utils/vector.h"
 #include "./numerics/abs.h"
@@ -5,6 +5,8 @@
 #include "./utils/vector.h"
 #include "./utils/matrix.h"

+#include "./numerics/initializers/eye.h"
+
 #include <omp.h>

 namespace numerics{
@@ -13,7 +15,7 @@ namespace numerics{
   	void inverse_gj(utils::Matrix<T>& A){
 		//utils::Matrix<T> B(A.rows(),A.cols(), T{0});
 		utils::Matrix<T> B;
-		B.eye(A.rows());
+		B = eye_omp_auto<T>(A.rows());


 		uint64_t icol{0}, irow{0}, rows{A.rows()}, cols{A.cols()};
@@ -3,8 +3,6 @@
 #include "./decomp/lu.h"


-
-
 namespace numerics{
 	
 	template <typename T>
@@ -0,0 +1,85 @@
+#pragma once
+
+#include "./core/omp_config.h"
+
+#include "./utils/matrix.h"
+#include "./numerics/abs.h"
+
+
+namespace numerics{
+	
+	// -------------- Serial ----------------
+	template <typename T>
+	bool matequal(const utils::Matrix<T>& A, const utils::Matrix<T>& B, double tol = 1e-9) {
+
+	    if (A.rows() != B.rows() || A.cols() != B.cols()) {
+	        return false;
+	    }
+
+	    bool decimal = std::is_floating_point<T>::value;
+	    const uint64_t rows=A.rows(), cols=A.cols();
+
+	    for (uint64_t i = 0; i < rows; ++i)
+	        for (uint64_t j = 0; j < cols; ++j)
+	            if (decimal) {
+	                if (numerics::abs(A(i,j) - B(i,j)) > static_cast<T>(tol)){
+	                	return false;
+	                } 
+	            } else {
+	                if (A(i,j) != B(i,j)){
+	                	return false;
+	                } 
+	            }
+	    return true; 
+	}
+
+	// -------------- Parallel ----------------
+	template <typename T>
+	bool matequal_omp(const utils::Matrix<T>& A, const utils::Matrix<T>& B, double tol = 1e-9) {
+
+	    if (A.rows() != B.rows() || A.cols() != B.cols()) {
+	        return false;
+	    }
+
+	    bool decimal = std::is_floating_point<T>::value;
+	    bool eq = true;
+	    const uint64_t rows=A.rows(), cols=A.cols();
+
+	    #pragma omp parallel for collapse(2) schedule(static) reduction(&&:eq)
+	    for (uint64_t i = 0; i < rows; ++i)
+	        for (uint64_t j = 0; j < cols; ++j)
+	            if (decimal) {
+	            	eq = eq && (numerics::abs(A(i,j) - B(i,j)) <= static_cast<T>(tol));
+	            } else {
+	            	eq = eq && (A(i,j) == B(i,j));
+	            } 	
+	    return eq; 
+	}
+
+	// -------------- Auto OpenMP ----------------
+	template <typename T>
+	bool matequal_auto(const utils::Matrix<T>& A, const utils::Matrix<T>& B, double tol = 1e-9) {
+	    
+	    if (A.rows() != B.rows() || A.cols() != B.cols()) {
+	        return false;
+	    }
+	   
+	    uint64_t work = A.rows() * A.cols();
+
+		#ifdef _OPENMP
+			bool can_parallel = omp_config::omp_parallel_allowed();
+		    uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
+		#else
+		    bool can_parallel = false;
+		    uint64_t threads = 1;
+		#endif
+
+	    if (can_parallel || work > threads * 4ull) {
+	        return matequal_omp(A,B,tol);
+	    }
+	    else{
+	    	// Safe fallback
+	    	return matequal(A,B,tol);
+	    }    
+	}
+} // namespace numerics
@@ -85,21 +85,23 @@ utils::Matrix<T> matmul_auto(const utils::Matrix<T>& A,
    const uint64_t m=A.rows(), p=B.cols();
    const uint64_t work = m * p;

-    bool can_parallel = omp_config::omp_parallel_allowed();
+    

    #ifdef _OPENMP
-      int threads = omp_get_max_threads();
+    bool can_parallel = omp_config::omp_parallel_allowed();
+      uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
    #else
-      int threads = 1;
+      bool can_parallel = false;
+      uint64_t threads = 1;
    #endif


    // Tiny problems: serial is cheapest.
-    if (!can_parallel || work < static_cast<uint64_t>(threads)*4ull) {
+    if (!can_parallel || work < threads*4ull) {
        return matmul(A,B);
    }
    // Plenty of (i,j) work → collapse(2) is a great default.
-    else if (work >= 8ull * static_cast<uint64_t>(threads)) {
+    else if (work >= 8ull * threads) {
        return matmul_collapse_omp(A,B);
    }
    // Many rows and very few columns → rows-only cheaper overhead.
@@ -38,16 +38,24 @@ namespace numerics{
 	    const uint64_t m = A.rows();
 	    const uint64_t n = A.cols();

-	    utils::Vector<T> y(m, T{0});
+	    utils::Vector<T> y(m, T{0}); // <-- y has length m (rows)

+
+        const T* xptr = x.data();
+        const T* Aptr = A.data();	// row-major: A(i,j) == Aptr[i*n + j]
+
+        // Each row i is an independent dot product: y[i] = dot(A[i,*], x)
 	    #pragma omp parallel for schedule(static)
 	    for (uint64_t i = 0; i < m; ++i) {
-	        T acc = T{0};
+            const T* row = Aptr + i * n;     // contiguous row i
+            T acc = T{0};
+            #pragma omp simd reduction(+:acc)
 	        for (uint64_t j = 0; j < n; ++j) {
-	            acc += A(i, j) * x[j];
+	            acc += row[j] * xptr[j];
 	        }
 	        y[i] = acc;
-	    }
+		}
+
 	    return y;
 	}

@@ -1,6 +1,8 @@
 // "./numerics/numerics.h"
 #pragma once

+#include "./numerics/initializers/eye.h"
+#include "./numerics/matequal.h"
 #include "./numerics/transpose.h"
 #include "./numerics/inverse.h"
 #include "./numerics/matmul.h"
@@ -3,12 +3,13 @@


 #include "./utils/matrix.h"
+#include "./core/omp_config.h"


 namespace numerics{
 	
 	template <typename T>
-	void inplace_transpose(utils::Matrix<T>& A){
+	void inplace_transpose_square(utils::Matrix<T>& A){

 		const uint64_t rows = A.rows();
 		const uint64_t cols = A.cols();
@@ -27,13 +28,54 @@ namespace numerics{
 		}
 	}

+	template <typename T>
+	void inplace_transpose_square_omp(utils::Matrix<T>& A){
+
+		const uint64_t rows = A.rows();
+		const uint64_t cols = A.cols();
+
+		if (rows != cols){
+			throw std::runtime_error("inplace_transpose only valid for square matrices");
+		}
+
+		#pragma omp parallel for schedule(static)
+		for (uint64_t i = 0; i < rows; ++i){
+			for (uint64_t j = i + 1; j < cols; ++j){
+				T tmp = A(j,i);
+				A(j,i) = A(i,j);
+				A(i,j) = tmp;
+				//std::swap(A(j,i), A(i,j));
+			}
+		}
+	}
+
+
+
 	template <typename T>
 	utils::Matrix<T> transpose(const utils::Matrix<T>& A){

 		const uint64_t rows = A.rows();
 		const uint64_t cols = A.cols();

-		utils::Matrix<T> B(cols, rows, T{});
+		utils::Matrix<T> B(cols, rows, T{0});
+
+		for (uint64_t i = 0; i < rows; ++i){
+			for (uint64_t j = 0; j < cols; ++j){
+				B(j,i) = A(i,j);
+			}
+		}
+		return B;
+	}
+
+	template <typename T>
+	utils::Matrix<T> transpose_omp(const utils::Matrix<T>& A){
+
+		const uint64_t rows = A.rows();
+		const uint64_t cols = A.cols();
+
+		utils::Matrix<T> B(cols, rows, T{0});
+
+		#pragma omp parallel for collapse(2) schedule(static)
 		for (uint64_t i = 0; i < rows; ++i){
 			for (uint64_t j = 0; j < cols; ++j){
 				B(j,i) = A(i,j);
@@ -43,6 +85,69 @@ namespace numerics{
 	}


+    // -------- Auto selectors --------
+    template <typename T>
+    void inplace_transpose_square_auto(utils::Matrix<T>& A) {
+        const uint64_t rows = A.rows(), cols = A.cols();
+
+        if (rows != cols) {
+            throw std::runtime_error("inplace_transpose_auto: only valid for square matrices");
+        }
+        const std::uint64_t work = static_cast<std::uint64_t>((rows * (rows - 1)) / 2); // number of swaps
+
+	    #ifdef _OPENMP
+	        bool can_parallel = omp_config::omp_parallel_allowed();
+	        uint64_t threads = static_cast<std::uint64_t>(omp_get_max_threads());
+	    #else
+	        bool can_parallel = false;
+	        uint64_t threads = 1;
+		#endif
+
+        if (can_parallel && work > threads * 4ull) {
+            inplace_transpose_square_omp(A);
+        }else {
+            inplace_transpose_square(A);
+        }
+    }
+
+    template <typename T>
+    utils::Matrix<T> transpose_auto(const utils::Matrix<T>& A) {
+
+    	const uint64_t rows = A.rows();
+    	const uint64_t cols = A.cols();
+
+        uint64_t work = A.rows() * A.cols();
+
+        if (rows==cols){
+        	utils::Matrix<T> B(rows, cols, T{0});
+        	inplace_transpose_square_auto(B);
+        	return B;
+        }
+
+	    #ifdef _OPENMP
+	        bool can_parallel = omp_config::omp_parallel_allowed();
+	        uint64_t threads = static_cast<std::uint64_t>(omp_get_max_threads());
+	    #else
+	        bool can_parallel = false;
+	        uint64_t threads = 1;
+		#endif
+
+        if (!can_parallel || work > threads * 4ull) {
+            return transpose_omp(A);
+        } else {
+            return transpose(A);
+        }
+    }
+
+
+
+
+
+
+
+
+
+
 } // namespace numerics

 #endif // _transpose_n_