diff --git a/bin/abc_lab b/bin/abc_lab deleted file mode 100755 index 1e68cd0..0000000 Binary files a/bin/abc_lab and /dev/null differ diff --git a/bin/tests b/bin/tests new file mode 100755 index 0000000..747dc16 Binary files /dev/null and b/bin/tests differ diff --git a/include/decomp/lu.h b/include/decomp/lu.h index 0cbbe91..e23999d 100644 --- a/include/decomp/lu.h +++ b/include/decomp/lu.h @@ -3,6 +3,8 @@ #include "./utils/vector.h" #include "./utils/matrix.h" +#include "./numerics/initializers/eye.h" + namespace decomp{ // Stores PA = LU with partial pivoting (row permutations). @@ -150,7 +152,7 @@ namespace decomp{ } void inplace_inverse(utils::Matrix& Ainv){ - Ainv.eye(rows); + numerics::inplace_eye(Ainv); inplace_solve(Ainv, Ainv); } diff --git a/include/numerics/initializers/eye.h b/include/numerics/initializers/eye.h new file mode 100644 index 0000000..9fe04e0 --- /dev/null +++ b/include/numerics/initializers/eye.h @@ -0,0 +1,138 @@ +#pragma once +#include "./utils/matrix.h" +#include "./core/omp_config.h" + + +namespace numerics { + + template + void inplace_eye(utils::Matrix& A, uint64_t N = 0){ + + bool need_full_zero = true; + + if (N != 0){ + A.resize(N,N,T{0}); + need_full_zero = false; + }else{ + N = A.rows(); + if (N != A.cols()) { + throw std::runtime_error("inplace_eye: non-square matrix"); + } + } + // 1) Zero the whole matrix if we didn't just resize with zeros + if (need_full_zero){ + for (uint64_t i = 0; i < N; ++i){ + for (uint64_t j = 0; j < N; ++j){ + if (i==j){ + A(i,j) = T{1}; + }else{ + A(i,j) = T{0}; + } + } + } + }else{ + for (uint64_t i = 0; i < N; ++i){ + A(i,i) = T{1}; + } + } + + } + + + template + void inplace_eye_omp(utils::Matrix& A, uint64_t N = 0){ + + bool need_full_zero = true; + + if (N != 0){ + A.resize(N,N,T{0}); + need_full_zero = false; + }else{ + N = A.rows(); + if (N != A.cols()) { + throw std::runtime_error("inplace_eye_omp: non-square matrix"); + } + } + + // 1) Zero the whole matrix if we didn't just resize with zeros + if (need_full_zero){ + T* ptr = A.data(); + uint64_t NN = N*N; + #pragma omp parallel for schedule(static) + for (uint64_t i = 0; i < NN; ++i){ + ptr[i] = T{0}; + } + } + // 2) Set the diagonal to 1 + #pragma omp parallel for schedule(static) + for (uint64_t i = 0; i < N; ++i){ + A(i,i) = T{1}; + } + + } + + template + utils::Matrix eye(uint64_t N){ + utils::Matrix A; + inplace_eye(A, N); + return A; + + } + + template + utils::Matrix eye_omp(uint64_t N){ + utils::Matrix A; + inplace_eye_omp(A, N); + return A; + } + + template + utils::Matrix eye_omp_auto(uint64_t N){ + + uint64_t work = N*N; + utils::Matrix A(N,N,T{0}); + + #ifdef _OPENMP + bool can_parallel = omp_config::omp_parallel_allowed(); + uint64_t threads = static_cast(omp_get_max_threads()); + #else + bool can_parallel = false; + uint64_t threads = 1; + #endif + + if (can_parallel || work > threads * 4ull) { + inplace_eye_omp(A, 0); + } + else{ + // Safe fallback + inplace_eye(A, 0); + } + + return A; + } + // Untested: + template + void inplace_eye_omp_auto(utils::Matrix& A, uint64_t N = 0){ + + uint64_t work = N*N; + + #ifdef _OPENMP + bool can_parallel = omp_config::omp_parallel_allowed(); + uint64_t threads = static_cast(omp_get_max_threads()); + #else + bool can_parallel = false; + uint64_t threads = 1; + #endif + + if (can_parallel || work > threads * 4ull) { + inplace_eye_omp(A, 0); + } + else{ + // Safe fallback + inplace_eye(A, 0); + } + } + + + +} // namespace utils \ No newline at end of file diff --git a/include/numerics/interpolation1d.h b/include/numerics/interpolation1d.h new file mode 100644 index 0000000..968b1d2 --- /dev/null +++ b/include/numerics/interpolation1d.h @@ -0,0 +1,9 @@ +#pragma once + + +//#include "./numerics/interpolation1d/interpolation1d_base.h" +#include "./numerics/interpolation1d/interpolation1d_barycentric.h" +#include "./numerics/interpolation1d/interpolation1d_cubic_spline.h" +#include "./numerics/interpolation1d/interpolation1d_linear.h" +#include "./numerics/interpolation1d/interpolation1d_polynomial.h" +#include "./numerics/interpolation1d/interpolation1d_rational.h" \ No newline at end of file diff --git a/include/numerics/interpolation1d/interpolation1d_barycentric.h b/include/numerics/interpolation1d/interpolation1d_barycentric.h index 61d3e19..2a39d75 100644 --- a/include/numerics/interpolation1d/interpolation1d_barycentric.h +++ b/include/numerics/interpolation1d/interpolation1d_barycentric.h @@ -1,6 +1,6 @@ #pragma once -#include "./numerics/interpolation1d_base.h" +#include "./numerics/interpolation1d/interpolation1d_base.h" #include "./utils/vector.h" #include "./numerics/min.h" diff --git a/include/numerics/interpolation1d_base.h b/include/numerics/interpolation1d/interpolation1d_base.h similarity index 97% rename from include/numerics/interpolation1d_base.h rename to include/numerics/interpolation1d/interpolation1d_base.h index f50831d..07f1b55 100644 --- a/include/numerics/interpolation1d_base.h +++ b/include/numerics/interpolation1d/interpolation1d_base.h @@ -43,11 +43,9 @@ namespace numerics{ T interp(T x){ int64_t jlo; if (cor){ - std::cout << "Uses hunt()" << std::endl; jlo = hunt(x); } else{ - std::cout << "Uses locate()" << std::endl; jlo = locate(x); } return rawinterp(jlo,x); diff --git a/include/numerics/interpolation1d/interpolation1d_cubic_spline.h b/include/numerics/interpolation1d/interpolation1d_cubic_spline.h index a1e1c91..8185c9b 100644 --- a/include/numerics/interpolation1d/interpolation1d_cubic_spline.h +++ b/include/numerics/interpolation1d/interpolation1d_cubic_spline.h @@ -1,6 +1,6 @@ #pragma once -#include "./numerics/interpolation1d_base.h" +#include "./numerics/interpolation1d/interpolation1d_base.h" //#include "./numerics/abs.h" #include "./utils/vector.h" diff --git a/include/numerics/interpolation1d/interpolation1d_linear.h b/include/numerics/interpolation1d/interpolation1d_linear.h index 11c7497..6ff4b05 100644 --- a/include/numerics/interpolation1d/interpolation1d_linear.h +++ b/include/numerics/interpolation1d/interpolation1d_linear.h @@ -1,6 +1,6 @@ #pragma once -#include "./numerics/interpolation1d_base.h" +#include "./numerics/interpolation1d/interpolation1d_base.h" namespace numerics{ diff --git a/include/numerics/interpolation1d/interpolation1d_polynomial.h b/include/numerics/interpolation1d/interpolation1d_polynomial.h index d65beab..45670d0 100644 --- a/include/numerics/interpolation1d/interpolation1d_polynomial.h +++ b/include/numerics/interpolation1d/interpolation1d_polynomial.h @@ -1,6 +1,6 @@ #pragma once -#include "./numerics/interpolation1d_base.h" +#include "./numerics/interpolation1d/interpolation1d_base.h" #include "./numerics/abs.h" #include "./utils/vector.h" diff --git a/include/numerics/interpolation1d/interpolation1d_rational.h b/include/numerics/interpolation1d/interpolation1d_rational.h index 2f548be..cc4dc4a 100644 --- a/include/numerics/interpolation1d/interpolation1d_rational.h +++ b/include/numerics/interpolation1d/interpolation1d_rational.h @@ -1,6 +1,6 @@ #pragma once -#include "./numerics/interpolation1d_base.h" +#include "./numerics/interpolation1d/interpolation1d_base.h" #include "./utils/vector.h" #include "./numerics/abs.h" diff --git a/include/numerics/inverse/inverse_gauss_jordan.h b/include/numerics/inverse/inverse_gauss_jordan.h index d9c0513..e150d7d 100644 --- a/include/numerics/inverse/inverse_gauss_jordan.h +++ b/include/numerics/inverse/inverse_gauss_jordan.h @@ -5,6 +5,8 @@ #include "./utils/vector.h" #include "./utils/matrix.h" +#include "./numerics/initializers/eye.h" + #include namespace numerics{ @@ -13,7 +15,7 @@ namespace numerics{ void inverse_gj(utils::Matrix& A){ //utils::Matrix B(A.rows(),A.cols(), T{0}); utils::Matrix B; - B.eye(A.rows()); + B = eye_omp_auto(A.rows()); uint64_t icol{0}, irow{0}, rows{A.rows()}, cols{A.cols()}; diff --git a/include/numerics/inverse/inverse_lu.h b/include/numerics/inverse/inverse_lu.h index 8bfb115..b61dcfe 100644 --- a/include/numerics/inverse/inverse_lu.h +++ b/include/numerics/inverse/inverse_lu.h @@ -3,8 +3,6 @@ #include "./decomp/lu.h" - - namespace numerics{ template diff --git a/include/numerics/matequal.h b/include/numerics/matequal.h new file mode 100644 index 0000000..8e1aaf7 --- /dev/null +++ b/include/numerics/matequal.h @@ -0,0 +1,85 @@ +#pragma once + +#include "./core/omp_config.h" + +#include "./utils/matrix.h" +#include "./numerics/abs.h" + + +namespace numerics{ + + // -------------- Serial ---------------- + template + bool matequal(const utils::Matrix& A, const utils::Matrix& B, double tol = 1e-9) { + + if (A.rows() != B.rows() || A.cols() != B.cols()) { + return false; + } + + bool decimal = std::is_floating_point::value; + const uint64_t rows=A.rows(), cols=A.cols(); + + for (uint64_t i = 0; i < rows; ++i) + for (uint64_t j = 0; j < cols; ++j) + if (decimal) { + if (numerics::abs(A(i,j) - B(i,j)) > static_cast(tol)){ + return false; + } + } else { + if (A(i,j) != B(i,j)){ + return false; + } + } + return true; + } + + // -------------- Parallel ---------------- + template + bool matequal_omp(const utils::Matrix& A, const utils::Matrix& B, double tol = 1e-9) { + + if (A.rows() != B.rows() || A.cols() != B.cols()) { + return false; + } + + bool decimal = std::is_floating_point::value; + bool eq = true; + const uint64_t rows=A.rows(), cols=A.cols(); + + #pragma omp parallel for collapse(2) schedule(static) reduction(&&:eq) + for (uint64_t i = 0; i < rows; ++i) + for (uint64_t j = 0; j < cols; ++j) + if (decimal) { + eq = eq && (numerics::abs(A(i,j) - B(i,j)) <= static_cast(tol)); + } else { + eq = eq && (A(i,j) == B(i,j)); + } + return eq; + } + + // -------------- Auto OpenMP ---------------- + template + bool matequal_auto(const utils::Matrix& A, const utils::Matrix& B, double tol = 1e-9) { + + if (A.rows() != B.rows() || A.cols() != B.cols()) { + return false; + } + + uint64_t work = A.rows() * A.cols(); + + #ifdef _OPENMP + bool can_parallel = omp_config::omp_parallel_allowed(); + uint64_t threads = static_cast(omp_get_max_threads()); + #else + bool can_parallel = false; + uint64_t threads = 1; + #endif + + if (can_parallel || work > threads * 4ull) { + return matequal_omp(A,B,tol); + } + else{ + // Safe fallback + return matequal(A,B,tol); + } + } +} // namespace numerics diff --git a/include/numerics/matmul.h b/include/numerics/matmul.h index cb5916e..6461e31 100644 --- a/include/numerics/matmul.h +++ b/include/numerics/matmul.h @@ -85,21 +85,23 @@ utils::Matrix matmul_auto(const utils::Matrix& A, const uint64_t m=A.rows(), p=B.cols(); const uint64_t work = m * p; - bool can_parallel = omp_config::omp_parallel_allowed(); + #ifdef _OPENMP - int threads = omp_get_max_threads(); + bool can_parallel = omp_config::omp_parallel_allowed(); + uint64_t threads = static_cast(omp_get_max_threads()); #else - int threads = 1; + bool can_parallel = false; + uint64_t threads = 1; #endif // Tiny problems: serial is cheapest. - if (!can_parallel || work < static_cast(threads)*4ull) { + if (!can_parallel || work < threads*4ull) { return matmul(A,B); } // Plenty of (i,j) work → collapse(2) is a great default. - else if (work >= 8ull * static_cast(threads)) { + else if (work >= 8ull * threads) { return matmul_collapse_omp(A,B); } // Many rows and very few columns → rows-only cheaper overhead. diff --git a/include/numerics/matvec.h b/include/numerics/matvec.h index 0c137e6..b6d944b 100644 --- a/include/numerics/matvec.h +++ b/include/numerics/matvec.h @@ -38,16 +38,24 @@ namespace numerics{ const uint64_t m = A.rows(); const uint64_t n = A.cols(); - utils::Vector y(m, T{0}); + utils::Vector y(m, T{0}); // <-- y has length m (rows) + + const T* xptr = x.data(); + const T* Aptr = A.data(); // row-major: A(i,j) == Aptr[i*n + j] + + // Each row i is an independent dot product: y[i] = dot(A[i,*], x) #pragma omp parallel for schedule(static) for (uint64_t i = 0; i < m; ++i) { - T acc = T{0}; + const T* row = Aptr + i * n; // contiguous row i + T acc = T{0}; + #pragma omp simd reduction(+:acc) for (uint64_t j = 0; j < n; ++j) { - acc += A(i, j) * x[j]; + acc += row[j] * xptr[j]; } y[i] = acc; - } + } + return y; } diff --git a/include/numerics/numerics.h b/include/numerics/numerics.h index 5669c0d..e3cd5ad 100644 --- a/include/numerics/numerics.h +++ b/include/numerics/numerics.h @@ -1,6 +1,8 @@ // "./numerics/numerics.h" #pragma once +#include "./numerics/initializers/eye.h" +#include "./numerics/matequal.h" #include "./numerics/transpose.h" #include "./numerics/inverse.h" #include "./numerics/matmul.h" diff --git a/include/numerics/transpose.h b/include/numerics/transpose.h index 7e6cb5c..3052e56 100644 --- a/include/numerics/transpose.h +++ b/include/numerics/transpose.h @@ -3,12 +3,13 @@ #include "./utils/matrix.h" +#include "./core/omp_config.h" namespace numerics{ template - void inplace_transpose(utils::Matrix& A){ + void inplace_transpose_square(utils::Matrix& A){ const uint64_t rows = A.rows(); const uint64_t cols = A.cols(); @@ -27,13 +28,54 @@ namespace numerics{ } } + template + void inplace_transpose_square_omp(utils::Matrix& A){ + + const uint64_t rows = A.rows(); + const uint64_t cols = A.cols(); + + if (rows != cols){ + throw std::runtime_error("inplace_transpose only valid for square matrices"); + } + + #pragma omp parallel for schedule(static) + for (uint64_t i = 0; i < rows; ++i){ + for (uint64_t j = i + 1; j < cols; ++j){ + T tmp = A(j,i); + A(j,i) = A(i,j); + A(i,j) = tmp; + //std::swap(A(j,i), A(i,j)); + } + } + } + + + template utils::Matrix transpose(const utils::Matrix& A){ const uint64_t rows = A.rows(); const uint64_t cols = A.cols(); - utils::Matrix B(cols, rows, T{}); + utils::Matrix B(cols, rows, T{0}); + + for (uint64_t i = 0; i < rows; ++i){ + for (uint64_t j = 0; j < cols; ++j){ + B(j,i) = A(i,j); + } + } + return B; + } + + template + utils::Matrix transpose_omp(const utils::Matrix& A){ + + const uint64_t rows = A.rows(); + const uint64_t cols = A.cols(); + + utils::Matrix B(cols, rows, T{0}); + + #pragma omp parallel for collapse(2) schedule(static) for (uint64_t i = 0; i < rows; ++i){ for (uint64_t j = 0; j < cols; ++j){ B(j,i) = A(i,j); @@ -43,6 +85,69 @@ namespace numerics{ } + // -------- Auto selectors -------- + template + void inplace_transpose_square_auto(utils::Matrix& A) { + const uint64_t rows = A.rows(), cols = A.cols(); + + if (rows != cols) { + throw std::runtime_error("inplace_transpose_auto: only valid for square matrices"); + } + const std::uint64_t work = static_cast((rows * (rows - 1)) / 2); // number of swaps + + #ifdef _OPENMP + bool can_parallel = omp_config::omp_parallel_allowed(); + uint64_t threads = static_cast(omp_get_max_threads()); + #else + bool can_parallel = false; + uint64_t threads = 1; + #endif + + if (can_parallel && work > threads * 4ull) { + inplace_transpose_square_omp(A); + }else { + inplace_transpose_square(A); + } + } + + template + utils::Matrix transpose_auto(const utils::Matrix& A) { + + const uint64_t rows = A.rows(); + const uint64_t cols = A.cols(); + + uint64_t work = A.rows() * A.cols(); + + if (rows==cols){ + utils::Matrix B(rows, cols, T{0}); + inplace_transpose_square_auto(B); + return B; + } + + #ifdef _OPENMP + bool can_parallel = omp_config::omp_parallel_allowed(); + uint64_t threads = static_cast(omp_get_max_threads()); + #else + bool can_parallel = false; + uint64_t threads = 1; + #endif + + if (!can_parallel || work > threads * 4ull) { + return transpose_omp(A); + } else { + return transpose(A); + } + } + + + + + + + + + + } // namespace numerics #endif // _transpose_n_ \ No newline at end of file diff --git a/include/utils/matrix.h b/include/utils/matrix.h index 604f691..ad72e9d 100644 --- a/include/utils/matrix.h +++ b/include/utils/matrix.h @@ -3,6 +3,11 @@ #include "./utils/vector.h" +#ifdef _OPENMP + #include +#endif + + #include namespace utils{ @@ -32,42 +37,6 @@ public: T* data() noexcept { return data_.data(); } const T* data() const noexcept { return data_.data(); } - //# MATRIX: equal operator # - bool operator==(const Matrix& A) const { - if (rows_ != A.rows_ || cols_ != A.cols_) return false; - for (uint64_t i = 0; i < rows_; ++i) - for (uint64_t j = 0; j < cols_; ++j) - if (data_[i*cols_ + j] != A(i,j)) - return false; - return true; - } - bool operator!=(const Matrix& A) const { - return !(*this == A); - } - bool nearly_equal(const Matrix& A, T tol = static_cast(1e-9)) const { - if (rows_ != A.rows() || cols_ != A.cols()) return false; - for (uint64_t i = 0; i < rows_; ++i) - for (uint64_t j = 0; j < cols_; ++j) { - T a = (*this)(i,j); - T b = A(i,j); - if (std::is_floating_point::value) { - if (std::fabs(a - b) > tol) return false; - } else { - if (a != b) return false; - } - } - return true; - } - - void eye(uint64_t rows_cols){ - rows_ = cols_ = rows_cols; - - data_.clear(); - data_.resize(rows_cols*rows_cols, T{0}); - for (uint64_t i = 0; i < rows_; ++i){ - data_[i * cols_ + i] = T{1}; - } - } void resize(uint64_t rows, uint64_t cols, const T& value = T(0)){ rows_ = rows; @@ -186,7 +155,7 @@ private: std::vector data_; }; - typedef Matrix Mi; + typedef Matrix Mi; typedef Matrix Mf; typedef Matrix Md; diff --git a/include/utils/matrix/matrix_equal_adapter.h b/include/utils/matrix/matrix_equal_adapter.h new file mode 100644 index 0000000..2b439a1 --- /dev/null +++ b/include/utils/matrix/matrix_equal_adapter.h @@ -0,0 +1,22 @@ +#pragma once +#include "./utils/matrix.h" +#include "./numerics/matequal.h" + +namespace utils { + + // definitions of the previously-declared members + template + inline bool Matrix::equals(const Matrix& B, T tol) const { + return numerics::matequal(*this, B, tol); + } + template + inline bool Matrix::equals_omp(const Matrix& B, T tol) const { + return numerics::matequal_omp(*this, B, tol); + } + + template + inline bool Matrix::equals_auto(const Matrix& B, T tol) const { + return numerics::matequal_auto(*this, B, tol); + } + +} // namespace utils \ No newline at end of file diff --git a/include/utils/vector.h b/include/utils/vector.h index 2eab532..9b71ac1 100644 --- a/include/utils/vector.h +++ b/include/utils/vector.h @@ -6,6 +6,11 @@ #include +#include +#include +#include +#include + namespace utils{ //###################################### //# VECTOR TYPE # @@ -49,6 +54,10 @@ public: v.resize(new_size, value); } + + T* data() noexcept { return v.data(); } + const T* data() const noexcept { return v.data(); } + //########################################### //# VECTOR: == and != # //########################################### diff --git a/makefile b/makefile index be0edc3..3d04849 100644 --- a/makefile +++ b/makefile @@ -47,7 +47,7 @@ TEST_MAIN := $(OBJ_DIR)/test/test_all.o # === OpenMP runtime configuration (override-able) === OMP_PROC_BIND ?= close # close|spread|master OMP_PLACES ?= cores # cores|threads|sockets -OMP_MAX_LEVELS ?= 2 # 1 = no nested teams; set 2+ to allow nesting +OMP_MAX_LEVELS ?= 1 # 1 = no nested teams; set 2+ to allow nesting OMP_THREADS ?= 16 # e.g. "16" or "8,4" for nested (outer,inner) OMP_DYNAMIC ?= TRUE # TRUE/FALSE: let runtime adjust threads OMP_SCHEDULE ?= STATIC # STATIC recommended for matvec/matmul @@ -103,13 +103,20 @@ run: clean-test $(TARGET) ./$(TARGET) # Handy presets -.PHONY: run-single -run-single: ## Single-level parallel (good default) +.PHONY: run-single-core +run-single-core: ## Single-level one core (good default) + $(MAKE) run OMP_MAX_LEVELS=1 OMP_THREADS=1 OMP_PROC_BIND=close OMP_PLACES=cores + +# Handy presets +.PHONY: run-multi-core +run-multi-core: ## Single-level parallel (good default) $(MAKE) run OMP_MAX_LEVELS=1 OMP_THREADS=16 OMP_PROC_BIND=close OMP_PLACES=cores -.PHONY: run-nested -run-nested: ## Two-level nested (outer,inner), adjust to your cores - $(MAKE) run OMP_MAX_LEVELS=2 OMP_THREADS="4,8" OMP_PROC_BIND=close OMP_PLACES=cores +.PHONY: run-nested-multi-core +run-nested-multi-core: ## Two-level nested (outer,inner), adjust to your cores + $(MAKE) run OMP_MAX_LEVELS=2 OMP_THREADS="2,8" OMP_PROC_BIND=close OMP_PLACES=cores + + # Optional: print debug info .PHONY: info diff --git a/obj/main.d b/obj/main.d deleted file mode 100644 index 41fdff6..0000000 --- a/obj/main.d +++ /dev/null @@ -1,41 +0,0 @@ -obj/main.o: src/main.cpp include/./utils/utils.h include/./utils/vector.h \ - include/./utils/matrix.h include/./numerics/numerics.h \ - include/./numerics/transpose.h include/./numerics/inverse.h \ - include/./numerics/inverse/inverse_gauss_jordan.h \ - include/./numerics/inverse/inverse_lu.h include/./decomp/lu.h \ - include/./numerics/matmul.h include/./core/omp_config.h \ - include/./numerics/matvec.h include/./numerics/min.h \ - include/./numerics/max.h include/./numerics/abs.h \ - include/./numerics/interpolation1d_base.h \ - include/./numerics/interpolation1d/interpolation1d_linear.h \ - include/./numerics/interpolation1d/interpolation1d_polynomial.h \ - include/./numerics/interpolation1d/interpolation1d_cubic_spline.h \ - include/./numerics/interpolation1d/interpolation1d_rational.h \ - include/./numerics/interpolation1d/interpolation1d_barycentric.h \ - include/./decomp/decomp.h include/./modules/grid1d.h \ - include/utils/vector.h include/./modules/finitedifference1d.h -include/./utils/utils.h: -include/./utils/vector.h: -include/./utils/matrix.h: -include/./numerics/numerics.h: -include/./numerics/transpose.h: -include/./numerics/inverse.h: -include/./numerics/inverse/inverse_gauss_jordan.h: -include/./numerics/inverse/inverse_lu.h: -include/./decomp/lu.h: -include/./numerics/matmul.h: -include/./core/omp_config.h: -include/./numerics/matvec.h: -include/./numerics/min.h: -include/./numerics/max.h: -include/./numerics/abs.h: -include/./numerics/interpolation1d_base.h: -include/./numerics/interpolation1d/interpolation1d_linear.h: -include/./numerics/interpolation1d/interpolation1d_polynomial.h: -include/./numerics/interpolation1d/interpolation1d_cubic_spline.h: -include/./numerics/interpolation1d/interpolation1d_rational.h: -include/./numerics/interpolation1d/interpolation1d_barycentric.h: -include/./decomp/decomp.h: -include/./modules/grid1d.h: -include/utils/vector.h: -include/./modules/finitedifference1d.h: diff --git a/obj/main.o b/obj/main.o deleted file mode 100644 index 6280170..0000000 Binary files a/obj/main.o and /dev/null differ diff --git a/obj/test/test_all.d b/obj/test/test_all.d new file mode 100644 index 0000000..4f6694e --- /dev/null +++ b/obj/test/test_all.d @@ -0,0 +1,2 @@ +obj/test/test_all.o: test/test_all.cpp test/test_common.h +test/test_common.h: diff --git a/obj/test/test_all.o b/obj/test/test_all.o new file mode 100644 index 0000000..58bd61b Binary files /dev/null and b/obj/test/test_all.o differ diff --git a/obj/test/test_eye.d b/obj/test/test_eye.d new file mode 100644 index 0000000..843ba4f --- /dev/null +++ b/obj/test/test_eye.d @@ -0,0 +1,8 @@ +obj/test/test_eye.o: test/test_eye.cpp test/test_common.h \ + include/./utils/matrix.h include/./utils/vector.h \ + include/./numerics/initializers/eye.h include/./core/omp_config.h +test/test_common.h: +include/./utils/matrix.h: +include/./utils/vector.h: +include/./numerics/initializers/eye.h: +include/./core/omp_config.h: diff --git a/obj/test/test_eye.o b/obj/test/test_eye.o new file mode 100644 index 0000000..f563bff Binary files /dev/null and b/obj/test/test_eye.o differ diff --git a/obj/test/test_interpolation1d.d b/obj/test/test_interpolation1d.d new file mode 100644 index 0000000..461dc66 --- /dev/null +++ b/obj/test/test_interpolation1d.d @@ -0,0 +1,24 @@ +obj/test/test_interpolation1d.o: test/test_interpolation1d.cpp \ + test/test_common.h include/./utils/matrix.h include/./utils/vector.h \ + include/./numerics/interpolation1d.h \ + include/./numerics/interpolation1d/interpolation1d_barycentric.h \ + include/./numerics/interpolation1d/interpolation1d_base.h \ + include/./numerics/min.h include/./numerics/max.h \ + include/./numerics/abs.h \ + include/./numerics/interpolation1d/interpolation1d_cubic_spline.h \ + include/./numerics/interpolation1d/interpolation1d_linear.h \ + include/./numerics/interpolation1d/interpolation1d_polynomial.h \ + include/./numerics/interpolation1d/interpolation1d_rational.h +test/test_common.h: +include/./utils/matrix.h: +include/./utils/vector.h: +include/./numerics/interpolation1d.h: +include/./numerics/interpolation1d/interpolation1d_barycentric.h: +include/./numerics/interpolation1d/interpolation1d_base.h: +include/./numerics/min.h: +include/./numerics/max.h: +include/./numerics/abs.h: +include/./numerics/interpolation1d/interpolation1d_cubic_spline.h: +include/./numerics/interpolation1d/interpolation1d_linear.h: +include/./numerics/interpolation1d/interpolation1d_polynomial.h: +include/./numerics/interpolation1d/interpolation1d_rational.h: diff --git a/obj/test/test_interpolation1d.o b/obj/test/test_interpolation1d.o new file mode 100644 index 0000000..5839134 Binary files /dev/null and b/obj/test/test_interpolation1d.o differ diff --git a/obj/test/test_inverse.d b/obj/test/test_inverse.d new file mode 100644 index 0000000..4043e18 --- /dev/null +++ b/obj/test/test_inverse.d @@ -0,0 +1,17 @@ +obj/test/test_inverse.o: test/test_inverse.cpp test/test_common.h \ + include/./utils/matrix.h include/./utils/vector.h \ + include/./numerics/inverse.h \ + include/./numerics/inverse/inverse_gauss_jordan.h \ + include/./numerics/initializers/eye.h include/./core/omp_config.h \ + include/./numerics/inverse/inverse_lu.h include/./decomp/lu.h \ + include/./numerics/matmul.h +test/test_common.h: +include/./utils/matrix.h: +include/./utils/vector.h: +include/./numerics/inverse.h: +include/./numerics/inverse/inverse_gauss_jordan.h: +include/./numerics/initializers/eye.h: +include/./core/omp_config.h: +include/./numerics/inverse/inverse_lu.h: +include/./decomp/lu.h: +include/./numerics/matmul.h: diff --git a/obj/test/test_inverse.o b/obj/test/test_inverse.o new file mode 100644 index 0000000..81a7f18 Binary files /dev/null and b/obj/test/test_inverse.o differ diff --git a/obj/test/test_lu.d b/obj/test/test_lu.d new file mode 100644 index 0000000..b0d13ae --- /dev/null +++ b/obj/test/test_lu.d @@ -0,0 +1,13 @@ +obj/test/test_lu.o: test/test_lu.cpp test/test_common.h \ + include/./utils/matrix.h include/./utils/vector.h \ + include/./numerics/matmul.h include/./core/omp_config.h \ + include/./numerics/matvec.h include/./decomp/lu.h \ + include/./numerics/initializers/eye.h +test/test_common.h: +include/./utils/matrix.h: +include/./utils/vector.h: +include/./numerics/matmul.h: +include/./core/omp_config.h: +include/./numerics/matvec.h: +include/./decomp/lu.h: +include/./numerics/initializers/eye.h: diff --git a/obj/test/test_lu.o b/obj/test/test_lu.o new file mode 100644 index 0000000..e56ad76 Binary files /dev/null and b/obj/test/test_lu.o differ diff --git a/obj/test/test_matequal.d b/obj/test/test_matequal.d new file mode 100644 index 0000000..4bdef9c --- /dev/null +++ b/obj/test/test_matequal.d @@ -0,0 +1,10 @@ +obj/test/test_matequal.o: test/test_matequal.cpp test/test_common.h \ + include/./numerics/matequal.h include/./core/omp_config.h \ + include/./utils/matrix.h include/./utils/vector.h \ + include/./numerics/abs.h +test/test_common.h: +include/./numerics/matequal.h: +include/./core/omp_config.h: +include/./utils/matrix.h: +include/./utils/vector.h: +include/./numerics/abs.h: diff --git a/obj/test/test_matequal.o b/obj/test/test_matequal.o new file mode 100644 index 0000000..c287675 Binary files /dev/null and b/obj/test/test_matequal.o differ diff --git a/obj/test/test_matmul.d b/obj/test/test_matmul.d new file mode 100644 index 0000000..b90fd6d --- /dev/null +++ b/obj/test/test_matmul.d @@ -0,0 +1,10 @@ +obj/test/test_matmul.o: test/test_matmul.cpp test/test_common.h \ + include/./utils/utils.h include/./utils/vector.h \ + include/./utils/matrix.h include/./numerics/matmul.h \ + include/./core/omp_config.h +test/test_common.h: +include/./utils/utils.h: +include/./utils/vector.h: +include/./utils/matrix.h: +include/./numerics/matmul.h: +include/./core/omp_config.h: diff --git a/obj/test/test_matmul.o b/obj/test/test_matmul.o new file mode 100644 index 0000000..97fda5a Binary files /dev/null and b/obj/test/test_matmul.o differ diff --git a/obj/test/test_matrix.d b/obj/test/test_matrix.d new file mode 100644 index 0000000..26d2911 --- /dev/null +++ b/obj/test/test_matrix.d @@ -0,0 +1,5 @@ +obj/test/test_matrix.o: test/test_matrix.cpp test/test_common.h \ + include/./utils/matrix.h include/./utils/vector.h +test/test_common.h: +include/./utils/matrix.h: +include/./utils/vector.h: diff --git a/obj/test/test_matrix.o b/obj/test/test_matrix.o new file mode 100644 index 0000000..0ea854f Binary files /dev/null and b/obj/test/test_matrix.o differ diff --git a/obj/test/test_matvec.d b/obj/test/test_matvec.d new file mode 100644 index 0000000..ab50788 --- /dev/null +++ b/obj/test/test_matvec.d @@ -0,0 +1,8 @@ +obj/test/test_matvec.o: test/test_matvec.cpp test/test_common.h \ + include/./numerics/matvec.h include/./utils/matrix.h \ + include/./utils/vector.h include/./core/omp_config.h +test/test_common.h: +include/./numerics/matvec.h: +include/./utils/matrix.h: +include/./utils/vector.h: +include/./core/omp_config.h: diff --git a/obj/test/test_matvec.o b/obj/test/test_matvec.o new file mode 100644 index 0000000..0663012 Binary files /dev/null and b/obj/test/test_matvec.o differ diff --git a/obj/test/test_transpose.d b/obj/test/test_transpose.d new file mode 100644 index 0000000..647fd23 --- /dev/null +++ b/obj/test/test_transpose.d @@ -0,0 +1,8 @@ +obj/test/test_transpose.o: test/test_transpose.cpp test/test_common.h \ + include/./numerics/transpose.h include/./utils/matrix.h \ + include/./utils/vector.h include/./core/omp_config.h +test/test_common.h: +include/./numerics/transpose.h: +include/./utils/matrix.h: +include/./utils/vector.h: +include/./core/omp_config.h: diff --git a/obj/test/test_transpose.o b/obj/test/test_transpose.o new file mode 100644 index 0000000..3323e14 Binary files /dev/null and b/obj/test/test_transpose.o differ diff --git a/obj/test/test_vector.d b/obj/test/test_vector.d new file mode 100644 index 0000000..577556a --- /dev/null +++ b/obj/test/test_vector.d @@ -0,0 +1,7 @@ +obj/test/test_vector.o: test/test_vector.cpp test/test_common.h \ + include/./utils/utils.h include/./utils/vector.h \ + include/./utils/matrix.h +test/test_common.h: +include/./utils/utils.h: +include/./utils/vector.h: +include/./utils/matrix.h: diff --git a/obj/test/test_vector.o b/obj/test/test_vector.o new file mode 100644 index 0000000..605713f Binary files /dev/null and b/obj/test/test_vector.o differ diff --git a/src/main.cpp b/src/main.cpp index 26a06a7..0d20d31 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,6 +10,9 @@ #include #include + +#include + //#include "./numerics/interpolation/interpolation_linear.h" @@ -17,7 +20,67 @@ int main(int argc, char const *argv[]) { + utils::Md A; +/* + int hw = omp_get_max_active_levels(); + if (hw <= 1) return 0; + + const uint64_t m=512, k=512, p=512; // ~134M MACs; adjust if needed + utils::Md A(m,k,1), B(k,p,1), C(k,p,1); + + omp_set_max_active_levels(1); + + auto t0 = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < m*k*p; ++i){ + A==B + } + double t1 = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); + + + omp_set_max_active_levels(2); + auto t0 = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < m*k*p; ++i){ + A==B + } + double t1 = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); + + omp_set_num_threads(prev); + + // Must not be notably slower with many threads + CHECK(tN <= t1 * 1.05, "rows_omp: multi-thread slower than single-thread"); + + + + utils::Md A(5,5, 1); + utils::Md B(5,5, 1); + utils::Md C(5,5, 2); + + bool result1 = (A==B); + bool result2 = (A==C); + + omp_set_max_active_levels(1): + + for (int i = 0; i < 100; ++i){ + (A==B) + } + + omp_set_max_active_levels(2): + + for (int i = 0; i < 100; ++i){ + (A==B) + } + + std::cout << result1 << std::endl; + std::cout << result2 << std::endl; + + + + +*/ + + + /* utils::Vector x(100, 0), y(100,0); for (uint64_t i = 0; i < 100; ++i){ @@ -78,6 +141,6 @@ int main(int argc, char const *argv[]) std::cout << rational.interp(p) << std::endl; std::cout << barycentric.interp(p) << std::endl; - +*/ return 0; } \ No newline at end of file diff --git a/test/test_common.h b/test/test_common.h index c3d151c..8d92710 100644 --- a/test/test_common.h +++ b/test/test_common.h @@ -35,7 +35,7 @@ int main() { for (auto& t : TestRegistry::list()) { try { t.second(); - std::cout << "[PASS] " << t.first << "\n"; + //std::cout << "[PASS] " << t.first << "\n"; } catch (const TestFailure& e) { std::cerr << "[FAIL] " << t.first << " -> " << e.what() << "\n"; ++fails; diff --git a/test/test_eye.cpp b/test/test_eye.cpp new file mode 100644 index 0000000..db959ee --- /dev/null +++ b/test/test_eye.cpp @@ -0,0 +1,155 @@ +#include "test_common.h" +#include "./utils/matrix.h" +#include "./numerics/initializers/eye.h" + +#include + + + +// Tiny helper +template +static bool is_identity(const utils::Matrix& M) { + if (M.rows() != M.cols()) return false; + for (std::uint64_t i = 0; i < M.rows(); ++i) + for (std::uint64_t j = 0; j < M.cols(); ++j) + if ((i == j && M(i,j) != T{1}) || + (i != j && M(i,j) != T{0})) return false; + return true; +} + + +// ============ Basic correctness ============ +TEST_CASE(Generate_EYE_double) { + utils::Matrix I = numerics::eye(5); + CHECK_EQ(I.rows(), 5, "rows should be 5"); + CHECK_EQ(I.cols(), 5, "cols should be 5"); + CHECK(is_identity(I), "eye(5) is not identity"); +} + +TEST_CASE(Generate_EYE_int) { + utils::Matrix I = numerics::eye(3); + CHECK_EQ(I.rows(), 3, "rows should be 3"); + CHECK_EQ(I.cols(), 3, "cols should be 3"); + CHECK(is_identity(I), "eye(3) is not identity"); +} + +TEST_CASE(Inplace_EYE_resize_creates_identity) { + utils::Md A; // empty + numerics::inplace_eye(A, 7); // resize to 7x7 and set identity + CHECK_EQ(A.rows(), 7, "rows should be 7"); + CHECK_EQ(A.cols(), 7, "cols should be 7"); + CHECK(is_identity(A), "inplace_eye resize did not create identity"); +} + +TEST_CASE(Inplace_EYE_inplace_on_square) { + utils::Md A(4,4, 42.0); // junk values + numerics::inplace_eye(A); // N==0 → must be square, zero + diag + CHECK(is_identity(A), "inplace_eye on square did not produce identity"); +} + +TEST_CASE(Inplace_EYE_throws_on_non_square) { + utils::Md A(2,3, 5.0); + bool threw = false; + try { + numerics::inplace_eye(A); // N==0 and non-square → throws + } catch (const std::runtime_error&) { + threw = true; + } + CHECK(threw, "inplace_eye should throw on non-square when N==0"); +} + +// ============ OpenMP variants (compiled only when -fopenmp is used) ============ + +#ifdef _OPENMP +TEST_CASE(EYE_OMP_matches_serial) { + utils::Md I1 = numerics::eye(64); + utils::Md I2 = numerics::eye_omp(64); + + for (uint64_t i = 0; i < I1.rows(); ++i){ + for (uint64_t j = 0; j < I1.cols(); ++j){ + CHECK(I1(i,j) == I2(i,j), "eye_omp != eye"); + } + + } + utils::Md A(64,64, 3.14); + numerics::inplace_eye_omp(A); // N==0 → must be square + CHECK(is_identity(A), "inplace_eye_omp did not produce identity"); +} + + +TEST_CASE(EYE_OMP_speed) { + + uint64_t prev = omp_get_max_threads(); + if (prev <= 1) return; + + omp_set_num_threads(16); + uint64_t N = 16384; + + utils::Matrix I1(N,N,1), I2(N,N,1); + + auto t0 = std::chrono::high_resolution_clock::now(); + numerics::inplace_eye(I1); + double t1 = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); + + t0 = std::chrono::high_resolution_clock::now(); + numerics::inplace_eye_omp(I2); + double t2 = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); + + omp_set_num_threads(prev); + + CHECK(t2 <= t1, "collapse_omp: multi-thread slower than single-thread"); +} + + +TEST_CASE(EYE_OMP_nested_ok) { + // Ensure nested regions are allowed for this test (won't error if not) + + uint64_t prev_levels = omp_get_max_active_levels(); + uint64_t prev = omp_get_max_threads(); + + omp_set_max_active_levels(2); + // Outer team size (small); inner will be large when nesting is allowed + const int outer_threads = 1; + // Inner team size to request during nested-enabled run + const int inner_threads = 12; + uint64_t N = 4096*8; + utils::Matrix I1(N, N, 7); + utils::Matrix I2(N, N, 7); + + + // ---------- baseline: inner runs with a small team (no outer region) ---------- + omp_set_max_active_levels(1); // disable nesting for baseline + omp_set_num_threads(outer_threads); + + for (uint64_t i = 0; i < 2; ++i){ + numerics::inplace_eye_omp(I1); + } + + + // ---------- nested: outer×inner (only one outer thread launches inner) ---------- + omp_set_max_active_levels(2); // allow one nested level + + #pragma omp parallel num_threads(outer_threads) + { + #pragma omp single // avoid racing on I1 + { + omp_set_num_threads(inner_threads); + for (uint64_t i = 0; i < 2; ++i){ + numerics::inplace_eye_omp(I2); + } + } + } + omp_set_max_active_levels(prev_levels); + omp_set_num_threads(prev); + CHECK(is_identity(I1), "EYE_OMP_nested did not produce identity"); + CHECK(is_identity(I2), "EYE_OMP_nested did not produce identity"); + +} + + +TEST_CASE(EYE_OMP_auto_is_identity) { + auto I = numerics::eye_omp_auto(32); + CHECK(is_identity(I), "eye_omp_auto did not produce identity"); +} + +#endif diff --git a/test/test_interpolation.cpp b/test/test_interpolation.cpp deleted file mode 100644 index 6015842..0000000 --- a/test/test_interpolation.cpp +++ /dev/null @@ -1,115 +0,0 @@ -#include "test_common.h" -#include "./utils/utils.h" -#include "./numerics/inverse.h" -#include "./numerics/matmul.h" - - -TEST_CASE(Inverse_GJ_Basic_3x3) { - using T = double; - utils::Matrix A(3,3, T{0}); - // Well-conditioned 3x3 - A(0,0)=3; A(0,1)=0.2; A(0,2)=-0.1; - A(1,0)=0.1; A(1,1)=2.5; A(1,2)=0.3; - A(2,0)=-0.2;A(2,1)=0.4; A(2,2)=2.0; - - auto Ainv = numerics::inverse(A, "Gauss-Jordan"); - utils::Matrix I; - I.eye(3); - auto prod = numerics::matmul(A, Ainv); - - CHECK(prod.nearly_equal(I, (T)1e-10), "inverse(GJ): A*A^{-1} ≈ I"); -} - -TEST_CASE(Inverse_LU_Basic_3x3) { - using T = double; - utils::Matrix A(3,3, T{0}); - A(0,0)=3; A(0,1)=0.2; A(0,2)=-0.1; - A(1,0)=0.1; A(1,1)=2.5; A(1,2)=0.3; - A(2,0)=-0.2;A(2,1)=0.4; A(2,2)=2.0; - - auto Ainv = numerics::inverse(A, "LU"); - utils::Matrix I; - I.eye(3); - auto prod = numerics::matmul(A, Ainv); - - CHECK(prod.nearly_equal(I, (T)1e-10), "inverse(LU): A*A^{-1} ≈ I"); -} - -TEST_CASE(Inverse_GJ_vs_LU_Consistency) { - using T = double; - utils::Matrix A(3,3, T{0}); - A(0,0)=4; A(0,1)=1; A(0,2)=2; - A(1,0)=0; A(1,1)=3; A(1,2)=-1; - A(2,0)=0; A(2,1)=0; A(2,2)=2; - - auto GJ = numerics::inverse(A, "Gauss-Jordan"); - auto LU = numerics::inverse(A, "LU"); - - CHECK(GJ.nearly_equal(LU, (T)1e-12), "inverse: GJ and LU produce nearly the same result"); -} - - -TEST_CASE(Inplace_Inverse_LU) { - using T = double; - utils::Matrix A(3,3, T{0}); - A(0,0)=3; A(0,1)=0.2; A(0,2)=-0.1; - A(1,0)=0.1; A(1,1)=2.5; A(1,2)=0.3; - A(2,0)=-0.2;A(2,1)=0.4; A(2,2)=2.0; - - auto Ainv_ref = numerics::inverse(A, "LU"); // out-of-place - auto A_copy = A; - numerics::inplace_inverse(A_copy, "LU"); // in-place - - CHECK(A_copy.nearly_equal(Ainv_ref, (T)1e-12), "inplace_inverse(LU) equals out-of-place"); -} - -TEST_CASE(Inplace_Inverse_GJ) { - using T = double; - utils::Matrix A(2,2, T{0}); - A(0,0)=2; A(0,1)=1; - A(1,0)=1; A(1,1)=3; - - auto Ainv_ref = numerics::inverse(A, "Gauss-Jordan"); - auto A_copy = A; - numerics::inplace_inverse(A_copy, "Gauss-Jordan"); - - CHECK(A_copy.nearly_equal(Ainv_ref, (T)1e-12), "inplace_inverse(GJ) equals out-of-place"); -} - -TEST_CASE(Inverse_Identity) { - using T = double; - utils::Matrix I; - I.eye(3); - auto invI = numerics::inverse(I, "LU"); - CHECK(invI.nearly_equal(I, (T)0), "inverse(I) == I"); -} -TEST_CASE(Inverse_NonSquare_Throws) { - using T = double; - utils::Matrix A(2,3, T{0}); // non-square - bool threw1=false, threw2=false; - try { auto X = numerics::inverse(A, "LU"); (void)X; } catch(...) { threw1=true; } - try { numerics::inplace_inverse(A, "Gauss-Jordan"); } catch(...) { threw2=true; } - CHECK(threw1 && threw2, "inverse throws on non-square for both methods"); -} - -TEST_CASE(Inverse_Singular_Throws) { - using T = double; - utils::Matrix S(3,3, T{0}); - S(0,0)=1; S(0,1)=2; S(0,2)=3; - S(1,0)=1; S(1,1)=2; S(1,2)=3; // duplicate row -> singular - S(2,0)=0; S(2,1)=1; S(2,2)=0; - - bool threw_gj=false, threw_lu=false; - try { auto X = numerics::inverse(S, "Gauss-Jordan"); (void)X; } catch(...) { threw_gj=true; } - try { auto X = numerics::inverse(S, "LU"); (void)X; } catch(...) { threw_lu=true; } - CHECK(threw_gj && threw_lu, "inverse throws on singular for both methods"); -} - -TEST_CASE(Inverse_Unknown_Method_Throws) { - using T = double; - utils::Matrix A(2,2, T{0}); - A(0,0)=1; A(1,1)=1; - bool threw=false; - try { auto X = numerics::inverse(A, "Foobar"); (void)X; } catch(...) { threw=true; } - CHECK(threw, "inverse unknown method throws"); -} \ No newline at end of file diff --git a/test/test_interpolation1d.cpp b/test/test_interpolation1d.cpp new file mode 100644 index 0000000..756e644 --- /dev/null +++ b/test/test_interpolation1d.cpp @@ -0,0 +1,155 @@ +#include "test_common.h" + +#include "./utils/matrix.h" +#include "./utils/vector.h" + + +#include "./numerics/interpolation1d.h" + + +// ------------ helpers ------------ +template +static void make_uniform_xy(utils::Vector& x, utils::Vector& y, + std::uint64_t N, T x0, T dx, + T a, T b) { + x.resize(N, T(0)); + y.resize(N, T(0)); + for (std::uint64_t i=0; i +static void make_uniform_xy_fun(utils::Vector& x, utils::Vector& y, + std::uint64_t N, T x0, T dx, + T (*f)(T)) { + x.resize(N, T(0)); + y.resize(N, T(0)); + for (std::uint64_t i=0; i +static inline bool almost_eq(T a, T b, double tol=1e-12) { + return std::fabs(double(a)-double(b)) <= tol; +} + +// ------------ tests ------------ + +// 1) All interpolants reproduce the data points exactly (or to tiny FP error) +TEST_CASE(Interp_Reproduces_Data_Nodes) { + const std::uint64_t N = 25; + utils::Vd x, y; + // linear data: y = 0.1 x + 0.9 on x = 1..25 + make_uniform_xy(x, y, N, 1.0, 1.0, 0.1, 0.9); + + numerics::interp_linear lin(x,y); + numerics::interp_polynomial pol(x,y, 3); + numerics::interp_cubic_spline spl(x,y); + numerics::interp_rational rat(x,y, 2); + numerics::interp_barycentric bar(x,y, 2); + + for (std::uint64_t i=0; i(x, y, N, 1.0, 1.0, 0.1, 0.9); + + numerics::interp_linear lin(x,y); + numerics::interp_polynomial pol(x,y, 3); + numerics::interp_cubic_spline spl(x,y); + numerics::interp_rational rat(x,y, 3); + numerics::interp_barycentric bar(x,y, 3); + + std::vector qs = { 1.5, 5.5, 5.51, 50.01, 99.9 }; + for (double q : qs) { + const double ytrue = 0.1*q + 0.9; + CHECK(almost_eq(lin.interp(q), ytrue, 1e-9), "linear off-grid mismatch"); + CHECK(almost_eq(pol.interp(q), ytrue, 1e-9), "polynomial off-grid mismatch"); + CHECK(almost_eq(spl.interp(q), ytrue, 1e-9), "spline off-grid mismatch"); + CHECK(almost_eq(rat.interp(q), ytrue, 1e-9), "rational off-grid mismatch"); + CHECK(almost_eq(bar.interp(q), ytrue, 1e-9), "barycentric off-grid mismatch"); + } + + // endpoints should match exactly (no extrapolation) + CHECK(almost_eq(lin.interp(x[0]), y[0]), "linear endpoint"); + CHECK(almost_eq(lin.interp(x[N-1]), y[N-1]), "linear endpoint"); + CHECK(almost_eq(spl.interp(x[0]), y[0]), "spline endpoint"); + CHECK(almost_eq(spl.interp(x[N-1]), y[N-1]), "spline endpoint"); +} + +// 3) Quadratic dataset: degree-2 polynomial interpolation should be exact +static double f_quad(double t) { return t*t - 3.0*t + 2.0; } // (t-1)(t-2) + +TEST_CASE(Interp_Polynomial_Deg2_Exact_On_Quadratic) { + const std::uint64_t N = 21; + utils::Vd x, y; + make_uniform_xy_fun(x, y, N, -2.0, 0.5, &f_quad); + + numerics::interp_polynomial pol(x,y, 3); + + std::vector qs = { -1.75, -0.1, 0.3, 1.4, 3.9, 7.25 }; + for (double q : qs) { + // only test inside the data domain to avoid extrap behavior + if (q >= x[0] && q <= x[N-1]) { + const double ytrue = f_quad(q); + //std::cout << pol.interp(q) << ", " << ytrue << ", "<< q <(x, y, N, 0.0, 0.5, &f_cos); + + numerics::interp_linear lin(x,y); + numerics::interp_polynomial pol(x,y, 3); // small local degree + numerics::interp_cubic_spline spl(x,y); + numerics::interp_rational rat(x,y, 3); + numerics::interp_barycentric bar(x,y, 3); + + // sample a handful of interior points and require all methods to be mutually close + std::vector qs = { 1.25, 7.75, 12.1, 18.6, 22.75 }; + for (double q : qs) { + // skip if q is outside just in case + if (q < x[0] || q > x[N-1]) continue; + + double yl = lin.interp(q); + double yp = pol.interp(q); + double ys = spl.interp(q); + double yr = rat.interp(q); + double yb = bar.interp(q); + + //std::cout << "lin: " << yl << std::endl; + //std::cout << "pol: " << yp << std::endl; + //std::cout << "spl: " << ys << std::endl; + //std::cout << "rat: " << yr << std::endl; + //std::cout << "bar: " << yb << std::endl; + + // spline is usually the smoothest; use it as the anchor + CHECK(almost_eq(yl, ys, 5e-4), "linear vs spline"); + CHECK(almost_eq(yp, ys, 5e-4), "polynomial vs spline"); + CHECK(almost_eq(yr, ys, 5e-4), "rational vs spline"); + CHECK(almost_eq(yb, ys, 5e-4), "barycentric vs spline"); + } +} \ No newline at end of file diff --git a/test/test_inverse.cpp b/test/test_inverse.cpp index 6015842..f4de9ab 100644 --- a/test/test_inverse.cpp +++ b/test/test_inverse.cpp @@ -1,115 +1,141 @@ #include "test_common.h" -#include "./utils/utils.h" + +#include "./utils/matrix.h" +#include "./utils/vector.h" #include "./numerics/inverse.h" #include "./numerics/matmul.h" -TEST_CASE(Inverse_GJ_Basic_3x3) { - using T = double; - utils::Matrix A(3,3, T{0}); - // Well-conditioned 3x3 - A(0,0)=3; A(0,1)=0.2; A(0,2)=-0.1; - A(1,0)=0.1; A(1,1)=2.5; A(1,2)=0.3; - A(2,0)=-0.2;A(2,1)=0.4; A(2,2)=2.0; - auto Ainv = numerics::inverse(A, "Gauss-Jordan"); - utils::Matrix I; - I.eye(3); - auto prod = numerics::matmul(A, Ainv); - - CHECK(prod.nearly_equal(I, (T)1e-10), "inverse(GJ): A*A^{-1} ≈ I"); -} - -TEST_CASE(Inverse_LU_Basic_3x3) { - using T = double; - utils::Matrix A(3,3, T{0}); - A(0,0)=3; A(0,1)=0.2; A(0,2)=-0.1; - A(1,0)=0.1; A(1,1)=2.5; A(1,2)=0.3; - A(2,0)=-0.2;A(2,1)=0.4; A(2,2)=2.0; - - auto Ainv = numerics::inverse(A, "LU"); - utils::Matrix I; - I.eye(3); - auto prod = numerics::matmul(A, Ainv); - - CHECK(prod.nearly_equal(I, (T)1e-10), "inverse(LU): A*A^{-1} ≈ I"); -} - -TEST_CASE(Inverse_GJ_vs_LU_Consistency) { - using T = double; - utils::Matrix A(3,3, T{0}); - A(0,0)=4; A(0,1)=1; A(0,2)=2; - A(1,0)=0; A(1,1)=3; A(1,2)=-1; - A(2,0)=0; A(2,1)=0; A(2,2)=2; - - auto GJ = numerics::inverse(A, "Gauss-Jordan"); - auto LU = numerics::inverse(A, "LU"); - - CHECK(GJ.nearly_equal(LU, (T)1e-12), "inverse: GJ and LU produce nearly the same result"); +// ---------- helpers ---------- +template +static utils::Matrix identity(std::uint64_t n) { + utils::Matrix I(n,n,T(0)); + for (std::uint64_t i=0;i A(3,3, T{0}); - A(0,0)=3; A(0,1)=0.2; A(0,2)=-0.1; - A(1,0)=0.1; A(1,1)=2.5; A(1,2)=0.3; - A(2,0)=-0.2;A(2,1)=0.4; A(2,2)=2.0; - - auto Ainv_ref = numerics::inverse(A, "LU"); // out-of-place - auto A_copy = A; - numerics::inplace_inverse(A_copy, "LU"); // in-place - - CHECK(A_copy.nearly_equal(Ainv_ref, (T)1e-12), "inplace_inverse(LU) equals out-of-place"); +template +static bool mats_equal_tol(const utils::Matrix& X, + const utils::Matrix& Y, + double tol = 1e-12) { + if (X.rows()!=Y.rows() || X.cols()!=Y.cols()) return false; + for (std::uint64_t i=0;i tol) return false; + return true; } -TEST_CASE(Inplace_Inverse_GJ) { - using T = double; - utils::Matrix A(2,2, T{0}); - A(0,0)=2; A(0,1)=1; - A(1,0)=1; A(1,1)=3; - - auto Ainv_ref = numerics::inverse(A, "Gauss-Jordan"); - auto A_copy = A; - numerics::inplace_inverse(A_copy, "Gauss-Jordan"); - - CHECK(A_copy.nearly_equal(Ainv_ref, (T)1e-12), "inplace_inverse(GJ) equals out-of-place"); +// A small well-conditioned SPD 3x3 +static utils::Matrix make_A3() { + utils::Matrix A(3,3,0.0); + // [ 4 3 0 + // 3 4 -1 + // 0 -1 4 ] + A(0,0)=4; A(0,1)=3; A(0,2)=0; + A(1,0)=3; A(1,1)=4; A(1,2)=-1; + A(2,0)=0; A(2,1)=-1; A(2,2)=4; + return A; } -TEST_CASE(Inverse_Identity) { - using T = double; - utils::Matrix I; - I.eye(3); - auto invI = numerics::inverse(I, "LU"); - CHECK(invI.nearly_equal(I, (T)0), "inverse(I) == I"); +// A random-ish SPD 5x5 (constructed deterministically) +static utils::Matrix make_A5_spd() { + utils::Matrix R(5,5,0.0); + // Fill R with a simple pattern + double v=1.0; + for (std::uint64_t i=0;i<5;++i) + for (std::uint64_t j=0;j<5;++j, v+=0.37) + R(i,j) = std::fmod(v, 3.0) - 1.0; // values in [-1,2) + // A = R^T R + 5 I → SPD and well-conditioned + utils::Matrix Rt(5,5,0.0); + for (std::uint64_t i=0;i<5;++i) + for (std::uint64_t j=0;j<5;++j) + Rt(i,j) = R(j,i); + auto RtR = numerics::matmul(Rt, R); + for (std::uint64_t i=0;i<5;++i) RtR(i,i) += 5.0; + return RtR; } + +// ---------- tests ---------- + +TEST_CASE(Inverse_GJ_3x3) { + auto A = make_A3(); + auto A_copy = A; + + auto Inv = numerics::inverse(A, "Gauss-Jordan"); // non-inplace + auto I = identity(3); + + auto AInv = numerics::matmul(A, Inv); + auto InvA = numerics::matmul(Inv, A); + + CHECK(mats_equal_tol(AInv, I, 1e-11), "A*Inv != I (Gauss-Jordan)"); + CHECK(mats_equal_tol(InvA, I, 1e-11), "Inv*A != I (Gauss-Jordan)"); + + // A should be unchanged by numerics::inverse (it copies internally) + CHECK(mats_equal_tol(A, A_copy, 1e-15), "inverse() modified input A"); +} + +TEST_CASE(Inverse_LU_3x3) { + auto A = make_A3(); + + auto Inv = numerics::inverse(A, "LU"); + auto I = identity(3); + + auto AInv = numerics::matmul(A, Inv); + auto InvA = numerics::matmul(Inv, A); + + CHECK(mats_equal_tol(AInv, I, 1e-11), "A*Inv != I (LU)"); + CHECK(mats_equal_tol(InvA, I, 1e-11), "Inv*A != I (LU)"); +} + +TEST_CASE(Inplace_Inverse_Both_Methods_Agree_5x5) { + auto A = make_A5_spd(); + + auto GJ = A; numerics::inplace_inverse(GJ, "Gauss-Jordan"); + auto LU = A; numerics::inplace_inverse(LU, "LU"); + + // Both should be valid inverses + auto I = identity(5); + CHECK(mats_equal_tol(numerics::matmul(A, GJ), I, 1e-10), "A*GJ != I"); + CHECK(mats_equal_tol(numerics::matmul(GJ, A), I, 1e-10), "GJ*A != I"); + CHECK(mats_equal_tol(numerics::matmul(A, LU), I, 1e-10), "A*LU != I"); + CHECK(mats_equal_tol(numerics::matmul(LU, A), I, 1e-10), "LU*A != I"); + + // And they should be very close to each other + CHECK(mats_equal_tol(GJ, LU, 1e-10), "Gauss-Jordan inverse != LU inverse"); +} + TEST_CASE(Inverse_NonSquare_Throws) { - using T = double; - utils::Matrix A(2,3, T{0}); // non-square - bool threw1=false, threw2=false; - try { auto X = numerics::inverse(A, "LU"); (void)X; } catch(...) { threw1=true; } - try { numerics::inplace_inverse(A, "Gauss-Jordan"); } catch(...) { threw2=true; } - CHECK(threw1 && threw2, "inverse throws on non-square for both methods"); + utils::Matrix A(2,3,1.0); + bool threw=false; + try { auto B = numerics::inverse(A, "Gauss-Jordan"); (void)B; } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "inverse should throw on non-square (Gauss-Jordan)"); + + threw=false; + try { numerics::inplace_inverse(A, "LU"); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "inplace_inverse should throw on non-square (LU)"); } TEST_CASE(Inverse_Singular_Throws) { - using T = double; - utils::Matrix S(3,3, T{0}); - S(0,0)=1; S(0,1)=2; S(0,2)=3; - S(1,0)=1; S(1,1)=2; S(1,2)=3; // duplicate row -> singular - S(2,0)=0; S(2,1)=1; S(2,2)=0; + utils::Matrix A(3,3,0.0); + // Two identical rows → singular + A(0,0)=1; A(0,1)=2; A(0,2)=3; + A(1,0)=1; A(1,1)=2; A(1,2)=3; + A(2,0)=0; A(2,1)=1; A(2,2)=4; - bool threw_gj=false, threw_lu=false; - try { auto X = numerics::inverse(S, "Gauss-Jordan"); (void)X; } catch(...) { threw_gj=true; } - try { auto X = numerics::inverse(S, "LU"); (void)X; } catch(...) { threw_lu=true; } - CHECK(threw_gj && threw_lu, "inverse throws on singular for both methods"); + bool threw=false; + try { auto B = numerics::inverse(A, "Gauss-Jordan"); (void)B; } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "inverse(GJ) should throw on singular"); + + threw=false; + try { auto B = numerics::inverse(A, "LU"); (void)B; } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "inverse(LU) should throw on singular"); } -TEST_CASE(Inverse_Unknown_Method_Throws) { - using T = double; - utils::Matrix A(2,2, T{0}); - A(0,0)=1; A(1,1)=1; +TEST_CASE(Inverse_Invalid_Method_Throws) { + auto A = make_A3(); bool threw=false; - try { auto X = numerics::inverse(A, "Foobar"); (void)X; } catch(...) { threw=true; } - CHECK(threw, "inverse unknown method throws"); + try { auto B = numerics::inverse(A, "NotAThing"); (void)B; } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "inverse should throw on unknown method"); } \ No newline at end of file diff --git a/test/test_lu.cpp b/test/test_lu.cpp index 923505c..1255d3d 100644 --- a/test/test_lu.cpp +++ b/test/test_lu.cpp @@ -1,190 +1,169 @@ #include "test_common.h" -#include "./utils/utils.h" // brings in vector.h, matrix.h, etc. -#include "./numerics/matmul.h" // numerics::matmul +#include "./utils/matrix.h" +#include "./utils/vector.h" +#include "./numerics/matmul.h" +#include "./numerics/matvec.h" #include "./decomp/lu.h" //#include - -TEST_CASE(LU_Solve_Vector_Basic) { - using T = double; - - // A * x = b with exact solution x = [1, 1, 2]^T - utils::Matrix A(3,3, T{0}); - A(0,0)=2; A(0,1)=1; A(0,2)=1; - A(1,0)=4; A(1,1)=-6; A(1,2)=0; - A(2,0)=-2; A(2,1)=7; A(2,2)=2; - - utils::Vector b(3, T{0}); - b[0]=5; b[1]=-2; b[2]=9; - - decomp::LUdcmpd lu(A); - auto x = lu.solve(b); - - utils::Vector x_true(3, T{0}); - x_true[0]=1; x_true[1]=1; x_true[2]=2; - - CHECK( (x.nearly_equal(x_true,1e-12)), "LU solve (vector RHS) failed" ); +// ---------- helpers ---------- +template +static bool mats_equal_tol(const utils::Matrix& X, + const utils::Matrix& Y, + double tol = 1e-12) { + if (X.rows()!=Y.rows() || X.cols()!=Y.cols()) return false; + for (std::uint64_t i=0;i tol) return false; + return true; } -TEST_CASE(LU_Solve_MatrixRHS_TwoColumns) { - using T = double; - - // Same A, solve two RHS at once - utils::Matrix A(3,3, T{0}); - A(0,0)=2; A(0,1)=1; A(0,2)=1; - A(1,0)=4; A(1,1)=-6; A(1,2)=0; - A(2,0)=-2; A(2,1)=7; A(2,2)=2; - - utils::Matrix B(3,2, T{0}); - // First column b1 (same as previous test) - B(0,0)=5; B(1,0)=-2; B(2,0)=9; - // Second column b2 → choose solution x2 = [0, 2, 1]^T - // Compute b2 = A * x2 by hand: - // Row0: 2*0 + 1*2 + 1*1 = 3 - // Row1: 4*0 -6*2 + 0*1 = -12 - // Row2: -2*0 +7*2 + 2*1 = 16 - B(0,1)=3; B(1,1)=-12; B(2,1)=16; - - decomp::LUdcmpd lu(A); - auto X = lu.solve(B); - - // Check A*X ≈ B - auto AX = numerics::matmul(A, X); - CHECK( AX.nearly_equal(B, 1e-12), "A * X does not match B for matrix RHS" ); +template +static utils::Matrix identity(std::uint64_t n) { + utils::Matrix I(n,n,T(0)); + for (std::uint64_t i=0;i A(3,3, T{0}); - A(0,0)=1; A(0,1)=2; A(0,2)=3; - A(1,0)=0; A(1,1)=1; A(1,2)=4; - A(2,0)=5; A(2,1)=6; A(2,2)=0; - - decomp::LUdcmpd lu(A); - T det = lu.det(); - CHECK( std::fabs(det - T{1}) < 1e-12, "det(A) should be 1" ); -} - -TEST_CASE(LU_Pivoting_Handles_Zero_Leading) { - using T = double; - - // Requires pivoting (A(0,0)=0); system has solution x=[1,2]^T, b = A*x = [2,3]^T - utils::Matrix A(2,2, T{0}); - A(0,0)=0; A(0,1)=1; - A(1,0)=1; A(1,1)=1; - - utils::Vector b(2, T{0}); - b[0]=2; b[1]=3; - - decomp::LUdcmpd lu(A); - auto x = lu.solve(b); - - utils::Vector x_true(2, T{0}); x_true[0]=1; x_true[1]=2; - CHECK( (x.nearly_equal(x_true,1e-12)), "Pivoting failed on zero-leading matrix" ); -} - -TEST_CASE(LU_Input_Unchanged_By_NonInplace_Path) { - using T = double; - - utils::Matrix A(4,4, T{0}); - for (uint64_t i=0;i<4;++i) { - for (uint64_t j=0;j<4;++j) { - A(i,j) = (i==j) ? 3.0 : 0.1 * ((i+1)*(j+2) % 5 + 1); +template +static void split_LU(const utils::Matrix& lu, + utils::Matrix& L, + utils::Matrix& U) { + const std::uint64_t n = lu.rows(); + L.resize(n,n,T(0)); + U.resize(n,n,T(0)); + for (std::uint64_t i=0;ij) L(i,j) = lu(i,j); + else if (i==j){ L(i,i) = T(1); U(i,i) = lu(i,i); } + else U(i,j) = lu(i,j); } } - utils::Matrix A_copy = A; - - decomp::LUdcmpd lu(A); // constructor should not mutate input A - CHECK( A.nearly_equal(A_copy, 0.0), "LU constructor modified input matrix" ); - - // Also check solve doesn't mutate RHS copy when using out-of-place convenience - utils::Vector b(4, 0.0); - for (uint64_t i=0;i<4;++i) b[i] = double(i+1); - auto b_copy = b; - - auto x = lu.solve(b); - (void)x; - CHECK( (b.nearly_equal(b_copy, 1e-300)), "solve(b) modified its input vector" ); } -TEST_CASE(LU_Inplace_Equals_OutOfPlace_Solve_Vector) { - using T = double; +template +static utils::Matrix permutation_from_indx(const std::vector& indx) { + const std::uint64_t n = indx.size(); + auto P = identity(n); + // Apply the same sequence of row swaps that was applied during factorization + for (std::uint64_t k=0;k A(3,3, T{0}); - A(0,0)=4; A(0,1)=1; A(0,2)=2; - A(1,0)=0; A(1,1)=3; A(1,2)=-1; - A(2,0)=0; A(2,1)=0; A(2,2)=2; - - utils::Vector b(3, T{0}); b[0]=7; b[1]=5; b[2]=4; +// A well-conditioned 3x3 (symmetric positive definite) +static utils::Matrix make_A_spd() { + utils::Matrix A(3,3,0.0); + // [ 4 3 0 + // 3 4 -1 + // 0 -1 4 ] + A(0,0)=4; A(0,1)=3; A(0,2)=0; + A(1,0)=3; A(1,1)=4; A(1,2)=-1; + A(2,0)=0; A(2,1)=-1; A(2,2)=4; + return A; +} +TEST_CASE(LU_PA_equals_LU) { + auto A = make_A_spd(); decomp::LUdcmpd lu(A); - auto x1 = lu.solve(b); - utils::Vector x2(3, T{0}); - lu.inplace_solve(b, x2); + utils::Matrix L,U; + split_LU(lu.lu, L, U); + auto P = permutation_from_indx(lu.indx); - CHECK( (x1.nearly_equal(x2,1e-12)), "inplace_solve(b,x) differs from solve(b)" ); + auto PA = numerics::matmul(P, A); + auto LU = numerics::matmul(L, U); + + CHECK(mats_equal_tol(PA, LU, 1e-12), "PA should equal LU"); } -TEST_CASE(LU_Singular_Throws) { - using T = double; +TEST_CASE(LU_Solve_Vector) { + auto A = make_A_spd(); + decomp::LUdcmpd lu(A); - // Singular (row2 = 2 * row1) - utils::Matrix S(2,2, T{0}); - S(0,0)=1; S(0,1)=2; - S(1,0)=2; S(1,1)=4; + utils::Vd b(3,0.0); + b[0]=1.0; b[1]=2.0; b[2]=3.0; - bool threw=false; - try { - decomp::LUdcmpd lu(S); - (void)lu; - } catch (const std::runtime_error&) { threw = true; } - CHECK(threw, "LU should throw on singular matrix"); + auto x = lu.solve(b); + auto Ax = numerics::matvec(A, x); + + CHECK(b.nearly_equal_vec(Ax, 1e-12), "A*x should equal b"); +} + +TEST_CASE(LU_Solve_Matrix_MultiRHS) { + auto A = make_A_spd(); + decomp::LUdcmpd lu(A); + + utils::Matrix B(3,2,0.0); + // two RHS columns + B(0,0)=1; B(1,0)=2; B(2,0)=3; + B(0,1)=4; B(1,1)=5; B(2,1)=6; + + auto X = lu.solve(B); // 3x2 + + // Check A*X == B + auto AX = numerics::matmul(A, X); + CHECK(mats_equal_tol(AX, B, 1e-12), "A*X should equal B"); + + // And that column-wise solve agrees + utils::Vd b0(3,0.0), b1(3,0.0); + for (int i=0;i<3;++i){ b0[i]=B(i,0); b1[i]=B(i,1); } + auto x0 = lu.solve(b0); + auto x1 = lu.solve(b1); + + CHECK(std::fabs(double(X(0,0)-x0[0]))<1e-12 && + std::fabs(double(X(1,0)-x0[1]))<1e-12 && + std::fabs(double(X(2,0)-x0[2]))<1e-12, "column 0 mismatch"); + + CHECK(std::fabs(double(X(0,1)-x1[0]))<1e-12 && + std::fabs(double(X(1,1)-x1[1]))<1e-12 && + std::fabs(double(X(2,1)-x1[2]))<1e-12, "column 1 mismatch"); +} + +TEST_CASE(LU_Determinant) { + auto A = make_A_spd(); + decomp::LUdcmpd lu(A); + // For this A, det = 24 + double d = lu.det(); + CHECK(std::fabs(d - 24.0) < 1e-12, "determinant incorrect"); +} + +TEST_CASE(LU_Inverse_via_SolveI) { + auto A = make_A_spd(); + decomp::LUdcmpd lu(A); + + // Build identity and solve A * X = I + auto I = identity(3); + auto Inv = lu.solve(I); + + // Check A*Inv == I (and Inv*A == I for good measure) + auto AInv = numerics::matmul(A, Inv); + auto InvA = numerics::matmul(Inv, A); + + CHECK(mats_equal_tol(AInv, I, 1e-11), "A*Inv should be I"); + CHECK(mats_equal_tol(InvA, I, 1e-11), "Inv*A should be I"); } TEST_CASE(LU_NonSquare_Throws) { - using T = double; - - utils::Matrix A(3,2, T{0}); - bool threw = false; - try { - decomp::LUdcmpd lu(A); - (void)lu; - } catch (const std::runtime_error&) { threw = true; } - CHECK(threw, "LU should throw on non-square input"); + utils::Matrix A(2,3,1.0); + bool threw=false; + try { decomp::LUdcmpd lu(A); } catch (const std::runtime_error&) { threw = true; } + CHECK(threw, "LU should throw on non-square"); } -TEST_CASE(LU_Inverse_RoundTrip) { - using T = double; +TEST_CASE(LU_Singular_Throws) { + utils::Matrix A(3,3,0.0); + // Make two identical rows + A(0,0)=1; A(0,1)=2; A(0,2)=3; + A(1,0)=1; A(1,1)=2; A(1,2)=3; + A(2,0)=0; A(2,1)=1; A(2,2)=4; - // Build a strictly diagonally dominant 5x5 - utils::Matrix A(5,5, T{0}); - for (uint64_t i=0;i<5;++i) { - T rowsum = 0; - for (uint64_t j=0;j<5;++j) { - if (i==j) continue; - A(i,j) = T(0.01 * double(1 + ((i+2)*(j+3)) % 7)); - rowsum += std::fabs(A(i,j)); - } - A(i,i) = rowsum + T{1}; - } - - decomp::LUdcmpd lu(A); - auto Ainv = lu.inverse(); - - utils::Md I(5,5, 0.0); - for (uint64_t i=0;i +static void fill_seq(utils::Matrix& M, T start = T{0}, T step = T{1}) { + std::uint64_t k = 0; + for (std::uint64_t i = 0; i < M.rows(); ++i) + for (std::uint64_t j = 0; j < M.cols(); ++j, ++k) + M(i,j) = start + step * static_cast(k); +} + + + + + +// ---------- tests ---------- + +TEST_CASE(matequal_shape_mismatch) { + utils::Mi A(3,3,0), B(3,4,0); + CHECK(!numerics::matequal(A,B), "shape mismatch should be false (serial)"); + +#ifdef _OPENMP + CHECK(!numerics::matequal_omp(A,B), "shape mismatch should be false (omp)"); +#endif + CHECK(!numerics::matequal_auto(A,B), "shape mismatch should be false (auto)"); +} + +TEST_CASE(matequal_int_true_false) { + utils::Mi A(4,5,0), B(4,5,0); + fill_seq(A, int64_t(0), int64_t(1)); + fill_seq(B, int64_t(0), int64_t(1)); + CHECK(numerics::matequal(A,B), "ints equal (serial)"); +#ifdef _OPENMP + CHECK(numerics::matequal_omp(A,B), "ints equal (omp)"); +#endif + // flip one element + B(2,3) += 1; + CHECK(!numerics::matequal(A,B), "ints differ (serial)"); +#ifdef _OPENMP + CHECK(!numerics::matequal_omp(A,B), "ints differ (omp)"); // will FAIL if your omp branch uses '!=' +#endif +} + +TEST_CASE(matequal_double_tolerance) { + utils::Md A(3,3,0.0), B(3,3,0.0); + fill_seq(A, double(1.0), double(0.125)); + fill_seq(B, double(1.0), double(0.125)); + // tiny perturbation within default tol + B(1,1) += 1e-12; + CHECK(numerics::matequal(A,B), "double within tol (serial)"); +#ifdef _OPENMP + CHECK(numerics::matequal_omp(A,B), "double within tol (omp)"); +#endif + // larger perturbation exceeds tol + B(0,2) += 1e-6; + CHECK(!numerics::matequal(A,B, 1e-9), "double exceeds tol (serial)"); +#ifdef _OPENMP + CHECK(!numerics::matequal_omp(A,B, 1e-9), "double exceeds tol (omp)"); +#endif +} + +TEST_CASE(matequal_auto_agrees) { + // Choose size so auto likely takes the OMP path when available, + // but this test only checks correctness, not which path was taken. + utils::Md A(256,256,0.0), B(256,256,0.0); + fill_seq(A, double(0.0), double(0.01)); + fill_seq(B, double(0.0), double(0.01)); + CHECK(numerics::matequal_auto(A,B), "auto equal"); + + B(5,7) += 1e-3; + CHECK(!numerics::matequal_auto(A,B, 1e-9), "auto detects mismatch"); +} + +#ifdef _OPENMP +TEST_CASE(mateequal_omp_nested_callsite) { + // Verify correctness when called inside an outer parallel region. + utils::Mi A(128,128,0), B(128,128,0); + fill_seq(A, int64_t(0), int64_t(1)); + fill_seq(B, int64_t(0), int64_t(1)); + + // allow one nested level; inner region inside mateequal_omp may spawn a team + int prev_levels = omp_get_max_active_levels(); + omp_set_max_active_levels(2); + + bool ok_equal = false, ok_diff = false; + + #pragma omp parallel num_threads(2) shared(ok_equal, ok_diff) + { + #pragma omp single + { + ok_equal = numerics::matequal_omp(A,B); + B(10,10) += 1; // introduce a mismatch + ok_diff = !numerics::matequal_omp(A,B); + } + } + + omp_set_max_active_levels(prev_levels); + + CHECK(ok_equal, "nested equal should be true"); + CHECK(ok_diff, "nested mismatch should be false"); +} +#endif diff --git a/test/test_matmul.cpp b/test/test_matmul.cpp index 8808fdf..82be5aa 100644 --- a/test/test_matmul.cpp +++ b/test/test_matmul.cpp @@ -4,161 +4,124 @@ #include +// ---------- helpers ---------- +template +static bool mats_equal(const utils::Matrix& X, const utils::Matrix& Y, double tol = 0.0) { + if (X.rows()!=Y.rows() || X.cols()!=Y.cols()) return false; + if (std::is_floating_point::value) { + for (std::uint64_t i=0;i tol) return false; + } else { + for (std::uint64_t i=0;i(A,B); - // Hand-checked first row: - // row0 dot columns: - // c00 = 1*9 + 2*6 + 3*3 = 30 - // c01 = 1*8 + 2*5 + 3*2 = 24 - // c02 = 1*7 + 2*4 + 3*1 = 18 +template +static void fill_seq(utils::Matrix& M, T start = T(0), T step = T(1)) { + std::uint64_t k = 0; + for (std::uint64_t i=0;i(k); +} +// ---------- tests ---------- + +// Small known example: (3x2) · (2x3) +TEST_CASE(Matmul_Small_Known) { + utils::Mi A(3,2,0), B(2,3,0); + // A = [1 2; 3 4; 5 6] + A(0,0)=1; A(0,1)=2; + A(1,0)=3; A(1,1)=4; + A(2,0)=5; A(2,1)=6; + // B = [7 8 9; 10 11 12] + B(0,0)=7; B(0,1)=8; B(0,2)=9; + B(1,0)=10; B(1,1)=11; B(1,2)=12; + + auto C = numerics::matmul(A,B); CHECK(C.rows()==3 && C.cols()==3, "shape 3x3 wrong"); - CHECK(C(0,0)==30.0 && C(0,1)==24.0 && C(0,2)==18.0, "first row wrong"); + + // Expected C: + // [27 30 33] + // [61 68 75] + // [95 106 117] + CHECK(C(0,0)==27 && C(0,1)==30 && C(0,2)==33, "row 0 wrong"); + CHECK(C(1,0)==61 && C(1,1)==68 && C(1,2)==75, "row 1 wrong"); + CHECK(C(2,0)==95 && C(2,1)==106 && C(2,2)==117, "row 2 wrong"); } -TEST_CASE(Matmul_OMP_Equals_Serial) { - utils::Md A(4,5,0.0), B(5,3,0.0); - // Fill deterministic - for (uint64_t i=0;i(A,B); - auto Cr = numerics::matmul_rows_omp(A,B); - auto Cc = numerics::matmul_collapse_omp(A,B); - auto Ca = numerics::matmul_auto(A,B); - - CHECK((Cs.nearly_equal(Cr, 1e-12)), "rows_omp != serial"); - CHECK((Cs.nearly_equal(Cc, 1e-12)), "collapse_omp != serial"); - CHECK((Cs.nearly_equal(Ca, 1e-12)), "auto != serial"); -} - -// ============ Dimension mismatch ============ -TEST_CASE(Matmul_DimensionMismatch_Throws) { - utils::Md A(2,3,0.0), B(4,2,0.0); +TEST_CASE(Matmul_DimMismatch_Throws) { + utils::Md A(2,3,1.0), B(4,2,2.0); // A.cols()!=B.rows() bool threw=false; - try { auto _ = numerics::matmul(A,B); (void)_; } - catch (const std::runtime_error&) { threw=true; } - CHECK(threw, "serial should throw on dim mismatch"); - - threw=false; try { auto _ = numerics::matmul_rows_omp(A,B); (void)_; } - catch (const std::runtime_error&) { threw=true; } - CHECK(threw, "rows_omp should throw on dim mismatch"); - - threw=false; try { auto _ = numerics::matmul_collapse_omp(A,B); (void)_; } - catch (const std::runtime_error&) { threw=true; } - CHECK(threw, "collapse_omp should throw on dim mismatch"); + try { (void)numerics::matmul(A,B); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "matmul should throw on dim mismatch"); } -// ============ Edge cases ============ -TEST_CASE(Matmul_Edges_ZeroDims) { - // (0xK) * (KxP) -> (0xP) - utils::Md A0(0,5,0.0), B1(5,3,0.0); - auto C0 = numerics::matmul(A0,B1); - CHECK(C0.rows()==0 && C0.cols()==3, "0xK * KxP shape wrong"); +// Compare all variants vs serial on a moderate size +TEST_CASE(Matmul_Variants_Equal_Int) { + const std::uint64_t m=32, n=24, p=16; + utils::Mi A(m,n,0), B(n,p,0); - // (MxK) * (Kx0) -> (Mx0) - utils::Md A2(7,4,0.0), B0(4,0,0.0); - auto C1 = numerics::matmul(A2,B0); - CHECK(C1.rows()==7 && C1.cols()==0, "MxK * Kx0 shape wrong"); + // deterministic fill (no randomness) + fill_seq(A, int64_t(1), int64_t(1)); + fill_seq(B, int64_t(2), int64_t(3)); + + auto C_ref = numerics::matmul(A,B); + + auto C_rows = numerics::matmul_rows_omp(A,B); + auto C_collapse = numerics::matmul_collapse_omp(A,B); + auto C_auto = numerics::matmul_auto(A,B); + + CHECK(mats_equal(C_rows, C_ref), "rows_omp != serial"); + CHECK(mats_equal(C_collapse, C_ref), "collapse_omp != serial"); + CHECK(mats_equal(C_auto, C_ref), "auto != serial"); } -// ============ Identity sanity ============ -TEST_CASE(Matmul_Identity) { - const uint64_t n=5; - utils::Md I(n,n,0.0), A(n,n,0.0); - for (uint64_t i=0;i(I,A); - auto R = numerics::matmul(A,I); - CHECK(L == A, "I*A != A"); - CHECK(R == A, "A*I != A"); + fill_seq(A, 0.1, 0.01); + fill_seq(B, 1.0, 0.02); + + auto C_ref = numerics::matmul(A,B); + auto C_rows = numerics::matmul_rows_omp(A,B); + auto C_collapse = numerics::matmul_collapse_omp(A,B); + auto C_auto = numerics::matmul_auto(A,B); + + CHECK(mats_equal(C_rows, C_ref, 1e-9), "rows_omp != serial (double)"); + CHECK(mats_equal(C_collapse, C_ref, 1e-9), "collapse_omp != serial (double)"); + CHECK(mats_equal(C_auto, C_ref, 1e-9), "auto != serial (double)"); } -// ============ Perf sanity (same kernel: 1 thread vs many) ============ -template -static double time_it(F&& f, int iters=1) { - auto t0 = std::chrono::high_resolution_clock::now(); - for (int i=0;i(t1 - t0).count(); +// Nested callsite sanity: call OMP variant from within an outer region +#ifdef _OPENMP +TEST_CASE(Matmul_OMP_Nested_Callsite) { + const std::uint64_t m=48, n=24, p=32; + utils::Mi A(m,n,0), B(n,p,0); + fill_seq(A, int64_t(1), int64_t(2)); + fill_seq(B, int64_t(3), int64_t(1)); + + auto C_ref = numerics::matmul(A,B); + + int prev_levels = omp_get_max_active_levels(); + omp_set_max_active_levels(2); + + utils::Mi C_nested; + #pragma omp parallel num_threads(2) + { + #pragma omp single + { + // either variant is fine; collapse(2) has more parallelism + C_nested = numerics::matmul_collapse_omp(A,B); + } + } + + omp_set_max_active_levels(prev_levels); + + CHECK(mats_equal(C_nested, C_ref), "nested collapse_omp result mismatch"); } - -TEST_CASE(Matmul_Perf_Sanity_RowOMP) { -#ifndef _OPENMP - return; -#else - int hw = omp_get_max_threads(); - if (hw <= 1) return; - - const uint64_t m=512, k=512, p=512; // ~134M MACs; adjust if needed - utils::Md A(m,k,0.0), B(k,p,0.0); - for (uint64_t i=0;i(A,B); - - int prev = omp_get_max_threads(); - auto t0 = std::chrono::high_resolution_clock::now(); - omp_set_num_threads(1); - numerics::matmul_rows_omp(A,B); - double t1 = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); - - omp_set_num_threads(hw); - t0 = std::chrono::high_resolution_clock::now(); - numerics::matmul_rows_omp(A,B); - double tN = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); - - omp_set_num_threads(prev); - - // Must not be notably slower with many threads - CHECK(tN <= t1 * 1.05, "rows_omp: multi-thread slower than single-thread"); -#endif -} - -TEST_CASE(Matmul_Perf_Sanity_CollapseOMP) { -#ifndef _OPENMP - return; -#else - int hw = omp_get_max_threads(); - if (hw <= 1) return; - - const uint64_t m=512, k=512, p=512; - utils::Md A(m,k,0.0), B(k,p,0.0); - for (uint64_t i=0;i(A,B); // warm-up - - int prev = omp_get_max_threads(); - auto t0 = std::chrono::high_resolution_clock::now(); - omp_set_num_threads(1); - numerics::matmul_collapse_omp(A,B); - double t1 = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); - - omp_set_num_threads(hw); - t0 = std::chrono::high_resolution_clock::now(); - numerics::matmul_collapse_omp(A,B); - double tN = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); - - omp_set_num_threads(prev); - - CHECK(tN <= t1 * 1.05, "collapse_omp: multi-thread slower than single-thread"); -#endif -} \ No newline at end of file +#endif \ No newline at end of file diff --git a/test/test_matrix.cpp b/test/test_matrix.cpp index 5fc1b02..1a50dd4 100644 --- a/test/test_matrix.cpp +++ b/test/test_matrix.cpp @@ -1,142 +1,164 @@ #include "test_common.h" -#include "./utils/utils.h" +#include "./utils/matrix.h" using utils::Vf; using utils::Vd; using utils::Vi; using utils::Mf; using utils::Md; using utils::Mi; -// ---------- Construction & element access ---------- -TEST_CASE(Matrix_Construct_Access) { - Md M; // default - CHECK(M.rows()==0 && M.cols()==0, "default ctor dims wrong"); - - Mf A(2,3, 1.0f); - CHECK(A.rows()==2 && A.cols()==3, "ctor dims wrong"); - CHECK(A(0,0)==1.0f && A(1,2)==1.0f, "fill wrong"); - - A(0,1)=2.5f; A(1,0)=3.5f; - CHECK(A(0,1)==2.5f && A(1,0)==3.5f, "operator() set/get failed"); +// tiny helper +template +static bool mat_is_filled(const utils::Matrix& M, T v) { + for (std::uint64_t i = 0; i < M.rows(); ++i) + for (std::uint64_t j = 0; j < M.cols(); ++j) + if (M(i,j) != v) return false; + return true; } -// ---------- Equality, inequality, nearly_equal ---------- -TEST_CASE(Matrix_Equality) { - Mi A(2,2,0), B(2,2,0), C(2,2,1); - A(0,0)=1; A(1,1)=1; // A = I - B(0,0)=1; B(1,1)=1; // B = I - - CHECK(A == B, "== failed identical"); - CHECK(!(A != B), "!= failed identical"); - CHECK(A != C, "!= failed different"); - - Md F1(2,2,0.0), F2(2,2,0.0); - F1(0,0)=1.0; F1(1,1)=2.0; - F2(0,0)=1.0; F2(1,1)=2.0 + 5e-10; // tiny perturbation - CHECK(!(F1 == F2), "operator== is exact; should differ"); - CHECK(F1.nearly_equal(F2, 1e-9), "nearly_equal should accept tiny delta"); - CHECK(!F1.nearly_equal(F2, 1e-12), "nearly_equal too strict should fail"); +// ------------------- basic construction ------------------- +TEST_CASE(Matrix_Default_Construct) { + Md M; + CHECK_EQ(M.rows(), 0, "default rows should be 0"); + CHECK_EQ(M.cols(), 0, "default cols should be 0"); } -// ---------- Row helpers ---------- -TEST_CASE(Matrix_Row_Get_Set) { - Mf M(3,4, 0.0f); - Vf r(4, 0.0f); - for (uint64_t j=0;j<4;++j) r[j] = float(j+1); // [1,2,3,4] - - M.set_row(1, r); - auto out = M.get_row(1); - CHECK(out == r, "set_row/get_row mismatch"); - - // size mismatch should throw - bool threw=false; - try { Vf bad(3, 9.0f); M.set_row(2, bad); } catch (const std::exception&) { threw=true; } - CHECK(threw, "set_row should throw on size mismatch"); - - // out of range - threw=false; - try { (void)M.get_row(3); } catch (const std::out_of_range&) { threw=true; } - CHECK(threw, "get_row should throw on OOB index"); +TEST_CASE(Matrix_Filled_Construct) { + Md M(3, 4, 2.5); + CHECK_EQ(M.rows(), 3, "rows"); + CHECK_EQ(M.cols(), 4, "cols"); + CHECK(mat_is_filled(M, 2.5), "all elements should be 2.5"); } -// ---------- Column helpers ---------- -TEST_CASE(Matrix_Col_Get_Set) { - Md M(3,2, 0.0); - Vd c(3, 0.0); - c[0]=10; c[1]=20; c[2]=30; - - M.set_col(1, c); - auto out = M.get_col(1); - CHECK(out == c, "set_col/get_col mismatch"); - - // size mismatch should throw - bool threw=false; - try { Vd bad(2, 9.0); M.set_col(0, bad); } catch (const std::exception&) { threw=true; } - CHECK(threw, "set_col should throw on size mismatch"); - - // out of range - threw=false; - try { (void)M.get_col(2); } catch (const std::out_of_range&) { threw=true; } - CHECK(threw, "get_col should throw on OOB index"); +// ------------------- element access / write ------------------- +TEST_CASE(Matrix_Set_Get) { + Mi M(2, 3, 0); + M(0,0) = 42; + M(1,2) = -7; + CHECK_EQ(M(0,0), 42, "set/get (0,0)"); + CHECK_EQ(M(1,2), -7, "set/get (1,2)"); } -// ---------- swap_rows / swap_cols ---------- -TEST_CASE(Matrix_Swap_Rows_Cols) { +// ------------------- resize semantics ------------------- +TEST_CASE(Matrix_Resize_Grow) { + Mf M(2, 2, 1.0f); + M.resize(3, 4, 9.0f); // grow; newly appended elements get the fill value + CHECK_EQ(M.rows(), 3, "rows after resize"); + CHECK_EQ(M.cols(), 4, "cols after resize"); + CHECK(M(2,3) == 9.0f, "last element should be the fill value after grow"); +} + +TEST_CASE(Matrix_Resize_Shrink) { + Mi M(4, 4, 5); + M(0,0) = 11; + M.resize(2, 2, 999); // shrink; size reduces + CHECK_EQ(M.rows(), 2, "rows after shrink"); + CHECK_EQ(M.cols(), 2, "cols after shrink"); + // element mapping after shrink is implementation dependent; just check bounds usable + M(1,1) = 3; + CHECK_EQ(M(1,1), 3, "write after shrink works"); +} + +// ------------------- row helpers ------------------- +TEST_CASE(Matrix_Get_Row) { + Mi M(3,4,0); + // set row 1 to [10,20,30,40] + for (std::uint64_t j=0;j<4;++j) M(1,j) = (j+1)*10; + auto r = M.get_row(1); + CHECK_EQ(r.size(), 4, "row size"); + CHECK(r[0]==10 && r[1]==20 && r[2]==30 && r[3]==40, "row contents"); +} + +TEST_CASE(Matrix_Set_Row) { Mi M(2,3,0); - // Row 0: [1,2,3], Row 1: [4,5,6] - M(0,0)=1; M(0,1)=2; M(0,2)=3; - M(1,0)=4; M(1,1)=5; M(1,2)=6; + utils::Vector v(3, 0); + v[0]=7; v[1]=8; v[2]=9; + M.set_row(0, v); + CHECK(M(0,0)==7 && M(0,1)==8 && M(0,2)==9, "set_row contents"); +} - M.swap_rows(0,1); - CHECK(M(0,0)==4 && M(0,1)==5 && M(0,2)==6, "swap_rows row0 wrong"); - CHECK(M(1,0)==1 && M(1,1)==2 && M(1,2)==3, "swap_rows row1 wrong"); - - // swap back via cols - M.swap_cols(0,2); - // After swapping col0<->col2: - // Row0: [6,5,4], Row1: [3,2,1] - CHECK(M(0,0)==6 && M(0,1)==5 && M(0,2)==4, "swap_cols row0 wrong"); - CHECK(M(1,0)==3 && M(1,1)==2 && M(1,2)==1, "swap_cols row1 wrong"); - - // no-op swap (a==b) should not crash or change - M.swap_rows(1,1); - M.swap_cols(2,2); - - // OOB checks +TEST_CASE(Matrix_Row_OutOfRange_Throws) { + Mi M(2,2,0); bool threw=false; - try { M.swap_rows(5,1); } catch (const std::out_of_range&) { threw=true; } - CHECK(threw, "swap_rows should throw on OOB"); + try { (void)M.get_row(2); } catch(const std::out_of_range&) { threw=true; } + CHECK(threw, "get_row out-of-range should throw"); + threw=false; - try { M.swap_cols(0,9); } catch (const std::out_of_range&) { threw=true; } - CHECK(threw, "swap_cols should throw on OOB"); + try { + utils::Vector v(3,1); // wrong size + M.set_row(1, v); + } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "set_row size mismatch should throw"); } -// ---------- data() layout (contiguous row-major) ---------- -TEST_CASE(Matrix_Data_Layout) { - Md M(2,3, 0.0); - // Fill increasing sequence - double val=1.0; - for (uint64_t i=0;i v(3, 0); + v[0]=1; v[1]=4; v[2]=9; + M.set_col(0, v); + CHECK(M(0,0)==1 && M(1,0)==4 && M(2,0)==9, "set_col contents"); +} + +TEST_CASE(Matrix_Col_OutOfRange_Throws) { + Mi M(2,2,0); + bool threw=false; + try { (void)M.get_col(2); } catch(const std::out_of_range&) { threw=true; } + CHECK(threw, "get_col out-of-range should throw"); + + threw=false; + try { + utils::Vector v(1,1); // wrong size + M.set_col(1, v); + } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "set_col size mismatch should throw"); +} + +// ------------------- swap rows/cols ------------------- +TEST_CASE(Matrix_Swap_Rows) { + Mi M(2,3,0); + // row0: 1 2 3, row1: 4 5 6 + for (std::uint64_t j=0;j<3;++j){ M(0,j)=j+1; M(1,j)=j+4; } + M.swap_rows(0,1); + CHECK(M(0,0)==4 && M(0,1)==5 && M(0,2)==6, "row0 after swap"); + CHECK(M(1,0)==1 && M(1,1)==2 && M(1,2)==3, "row1 after swap"); +} + +TEST_CASE(Matrix_Swap_Cols) { + Mi M(3,3,0); + // col0: 9 8 7, col2: 1 2 3 + M(0,0)=9; M(1,0)=8; M(2,0)=7; + M(0,2)=1; M(1,2)=2; M(2,2)=3; + M.swap_cols(0,2); + CHECK(M(0,0)==1 && M(1,0)==2 && M(2,0)==3, "col0 after swap"); + CHECK(M(0,2)==9 && M(1,2)==8 && M(2,2)==7, "col2 after swap"); +} + +TEST_CASE(Matrix_Swap_OutOfRange_Throws) { + Mi M(2,2,0); + bool threw=false; + try { M.swap_rows(0,2); } catch(const std::out_of_range&) { threw=true; } + CHECK(threw, "swap_rows out-of-range should throw"); + threw=false; + try { M.swap_cols(0,3); } catch(const std::out_of_range&) { threw=true; } + CHECK(threw, "swap_cols out-of-range should throw"); +} + +// ------------------- stream output (basic sanity) ------------------- +TEST_CASE(Matrix_Stream_ToString) { Mf M(2,2,0.0f); - M(0,0)=1.0f; M(0,1)=2.0f; - M(1,0)=3.0f; M(1,1)=4.0f; - - std::ostringstream oss; - oss << M; - const std::string s = oss.str(); - // Format example: - // [[1.000, 2.000], - // [3.000, 4.000]] - CHECK(s.find("[[1.000, 2.000],") != std::string::npos, "ostream first row format"); - CHECK(s.find("[3.000, 4.000]]") != std::string::npos, "ostream second row format"); + M(0,0)=1.0f; M(0,1)=2.0f; M(1,0)=3.0f; M(1,1)=4.0f; + std::ostringstream os; + os << M; + auto s = os.str(); + CHECK(s.find("[[") != std::string::npos, "starts with [["); + CHECK(s.find("1.000") != std::string::npos, "contains formatted 1.000"); + CHECK(s.find("4.000") != std::string::npos, "contains formatted 4.000"); } \ No newline at end of file diff --git a/test/test_matvec.cpp b/test/test_matvec.cpp index 1cf461a..1b838e0 100644 --- a/test/test_matvec.cpp +++ b/test/test_matvec.cpp @@ -1,8 +1,7 @@ #include "test_common.h" -#include "./utils/utils.h" // matrix.h, vector.h -#include "./numerics/matvec.h" // numerics::matvec / inplace_transpose +#include "./numerics/matvec.h" // numerics::matvec / inplace_transpose #include using utils::Vi; using utils::Vf; using utils::Vd; @@ -107,7 +106,7 @@ TEST_CASE(Matvec_Speed_Sanity) { auto t0 = std::chrono::high_resolution_clock::now(); auto yS = numerics::matvec(A,x); - double tp = std::chrono::duration(t0 - std::chrono::high_resolution_clock::now()).count(); + double tp = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); #ifdef _OPENMP int threads = omp_get_max_threads(); @@ -117,13 +116,13 @@ TEST_CASE(Matvec_Speed_Sanity) { t0 = std::chrono::high_resolution_clock::now(); auto yP = numerics::matvec_omp(A,x); - double ts = std::chrono::duration(t0 - std::chrono::high_resolution_clock::now()).count(); + double ts = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); CHECK((yS.nearly_equal_vec(yP)), "matvec_omp != matvec_serial (large)"); // Only enforce basic sanity if we *can* use >1 threads: if (threads > 1) { // Be generous: just require not significantly slower. - CHECK(tp <= ts, "matvec_omp unexpectedly much slower than serial"); + CHECK(tp >= ts, "matvec_omp unexpectedly much slower than serial"); } } diff --git a/test/test_transpose.cpp b/test/test_transpose.cpp index 4a898de..98f900c 100644 --- a/test/test_transpose.cpp +++ b/test/test_transpose.cpp @@ -1,88 +1,151 @@ #include "test_common.h" -#include "./utils/utils.h" // matrix.h, vector.h +//#include "./utils/matrix.h" // matrix.h, vector.h #include "./numerics/transpose.h" // numerics::transpose / inplace_transpose using utils::Mi; using utils::Mf; using utils::Md; -// -// ---------- Out-of-place transpose (rectangular) ---------- -// -TEST_CASE(Transpose_Rectangular_OutOfPlace) { - // A = [ [1, 2, 3], - // [4, 5, 6] ] (2x3) - Md A(2,3,0.0); - A(0,0)=1; A(0,1)=2; A(0,2)=3; - A(1,0)=4; A(1,1)=5; A(1,2)=6; - - auto AT = numerics::transpose(A); // (3x2) - - CHECK(AT.rows()==3 && AT.cols()==2, "shape wrong after transpose"); - CHECK(AT(0,0)==1 && AT(1,0)==2 && AT(2,0)==3, "first column wrong"); - CHECK(AT(0,1)==4 && AT(1,1)==5 && AT(2,1)==6, "second column wrong"); - - // Involution: T(T(A)) == A - auto ATT = numerics::transpose(AT); - CHECK(ATT == A, "transpose(transpose(A)) != A"); +/// ---- helpers ---- +template +static void fill_seq(utils::Matrix& M, T start = T(0), T step = T(1)) { + std::uint64_t k = 0; + for (std::uint64_t i=0; i(k); } -// -// ---------- In-place transpose (square) ---------- -// -TEST_CASE(Transpose_Square_InPlace) { - // 3x3 with distinct values - Mf S(3,3,0.0f); - float val = 1.0f; - for (uint64_t i=0;i<3;++i) - for (uint64_t j=0;j<3;++j) - S(i,j) = val++; - // Make an explicit transpose to compare against - auto ST = numerics::transpose(S); - - // In-place should match the out-of-place result - numerics::inplace_transpose(S); - CHECK(S == ST, "inplace_transpose result mismatch"); - - // Involution: applying in-place again should return original - numerics::inplace_transpose(S); - // Now S should equal the original pre-inplace matrix (which was transposed above) - // We can reconstruct original by transposing ST: - auto orig = numerics::transpose(ST); - CHECK(S == orig, "inplace transpose twice should restore original"); +template +static bool mats_equal(const utils::Matrix& X, const utils::Matrix& Y) { + if (X.rows()!=Y.rows() || X.cols()!=Y.cols()) return false; + for (std::uint64_t i=0; i +static bool vec_equal_exact(const utils::Vector& a, const utils::Vector& b) { + if (a.size() != b.size()) return false; + for (std::uint64_t i=0;i) - CHECK(d[0] == 7.0f && d[1] == 7.0f && d[2] == 7.0f, "scalar + vector failed"); - - d = 3 * c; // friend operator*(U, Vector) - CHECK(d[0] == 6.0f && d[1] == 6.0f && d[2] == 6.0f, "scalar * vector failed"); + v /= 2; // [15,15,15] + CHECK(v[0]==15 && v[2]==15, "scalar /="); } -// -// ---------- Vector arithmetic: +, -, *, / (elementwise) ---------- -// + TEST_CASE(Vector_Vector_Arithmetic) { - Vd a(3, 1.0), b(3, 2.0); + utils::Vi a(4, 1), b(4, 2); - // returning - auto c = a + b; - CHECK(c[0]==3.0 && c[1]==3.0 && c[2]==3.0, "vec + vec failed"); + auto c = a + b; // [3,3,3,3] + CHECK(c[0]==3 && c[3]==3, "v+v"); - c = b - a; - CHECK(c[0]==1.0 && c[1]==1.0 && c[2]==1.0, "vec - vec failed"); + a += b; // [3,3,3,3] + CHECK(a[1]==3, "v+=v"); - c = a * b; - CHECK(c[0]==2.0 && c[1]==2.0 && c[2]==2.0, "vec * vec failed"); + auto d = a - b; // [1,1,1,1] + CHECK(d[2]==1, "v-v"); - c = b / b; - CHECK(c[0]==1.0 && c[1]==1.0 && c[2]==1.0, "vec / vec failed"); + a -= b; // [1,1,1,1] + CHECK(a[0]==1, "v-=v"); - // inplace - a = Vd(3, 1.0); - a += b; - CHECK(a[0]==3.0 && a[1]==3.0 && a[2]==3.0, "inplace vec + vec failed"); - a -= b; - CHECK(a[0]==1.0 && a[1]==1.0 && a[2]==1.0, "inplace vec - vec failed"); - a *= b; - CHECK(a[0]==2.0 && a[1]==2.0 && a[2]==2.0, "inplace vec * vec failed"); - a /= b; - CHECK(a[0]==1.0 && a[1]==1.0 && a[2]==1.0, "inplace vec / vec failed"); -} -// -// ---------- Size mismatch error paths ---------- -// -TEST_CASE(Vector_SizeMismatch_Throws) { - Vd a(3, 1.0), b(4, 2.0); + auto e = a * b; // [2,2,2,2] + CHECK(e[1]==2, "v*v (elemwise)"); - bool threw = false; - try { auto c = a + b; (void)c; } catch (const std::runtime_error&) { threw = true; } - CHECK(threw, "add should throw on size mismatch"); + a *= b; // [2,2,2,2] + CHECK(a[3]==2, "v*=v"); - threw = false; - try { a.inplace_subtract(b); } catch (const std::runtime_error&) { threw = true; } - CHECK(threw, "inplace_subtract should throw on size mismatch"); + auto f = e / b; // [1,1,1,1] + CHECK(f[0]==1 && f[3]==1, "v/v (elemwise)"); - threw = false; - try { auto d = a * b; (void)d; } catch (const std::runtime_error&) { threw = true; } - CHECK(threw, "multiply should throw on size mismatch"); - - threw = false; - try { auto s = a.dot(b); (void)s; } catch (const std::runtime_error&) { threw = true; } - CHECK(threw, "dot should throw on size mismatch"); + e /= b; // [1,1,1,1] + CHECK(e[2]==1, "v/=v"); } -// -// ---------- Power / sqrt ---------- -// -TEST_CASE(Vector_Power_Sqrt) { - Vd a(3, 2.0); // [2,2,2] - auto b = a.power(3.0); // [8,8,8] - CHECK(b[0]==8.0 && b[1]==8.0 && b[2]==8.0, "scalar power failed"); +TEST_CASE(Vector_Friend_Scalar_Left) { + utils::Vd v(3, 2.0); // [2,2,2] + auto s1 = 3.0 + v; // [5,5,5] + CHECK(s1[0]==5.0 && s1[2]==5.0, "left scalar +"); - Vd p(3, 3.0); // [3,3,3] - auto c = b.power(p); // 8^3 = 512 - CHECK(c[0]==512.0 && c[1]==512.0 && c[2]==512.0, "vector power failed"); - - Vd d(3, 9.0); - auto e = d.sqrt(); // [3,3,3] - CHECK(e[0]==3.0 && e[1]==3.0 && e[2]==3.0, "sqrt failed"); - - // inplace - d.inplace_sqrt(); // becomes [3,3,3] - CHECK(d == e, "inplace_sqrt failed"); + auto s2 = 4.0 * v; // [8,8,8] + CHECK(s2[1]==8.0, "left scalar *"); } -// -// ---------- Dot / Sum / Norm / Normalize ---------- -// -TEST_CASE(Vector_Dot_Sum_Norm_Normalize) { - Vd a(3, 0.0); - a[0]=1.0; a[1]=2.0; a[2]=2.0; +TEST_CASE(Vector_Power_and_Sqrt) { + utils::Vd v(3, 4.0); // [4,4,4] + auto p = v.power(2.0); // [16,16,16] + CHECK(p[0]==16.0 && p[2]==16.0, "power scalar"); - CHECK(a.sum() == 5.0, "sum failed"); - CHECK(a.dot(a) == 9.0, "dot self failed"); - - auto n = a.norm(); - CHECK(std::fabs(n - 3.0) < 1e-12, "norm failed"); - - auto b = a.normalize(); - CHECK(std::fabs(b.norm() - 1.0) < 1e-12, "normalize() not unit"); - - // inplace normalize - a.inplace_normalize(); - CHECK(std::fabs(a.norm() - 1.0) < 1e-12, "inplace_normalize not unit"); - - // zero-norm error - Vd z(3, 0.0); - bool threw = false; - try { z.inplace_normalize(); } catch (const std::runtime_error&) { threw = true; } - CHECK(threw, "normalize zero vector must throw"); + v.inplace_sqrt(); // sqrt([4,4,4]) -> [2,2,2] + CHECK(v[0]==2.0 && v[1]==2.0, "inplace_sqrt"); } -// -// ---------- Stream output (basic sanity) ---------- -// -TEST_CASE(Vector_StreamOutput) { - Vi a(3, 2); - std::ostringstream oss; - oss << a; - auto s = oss.str(); - CHECK(s == "[2, 2, 2]", "ostream<< wrong format"); + +TEST_CASE(Vector_Dot_Sum_Norm) { + utils::Vd a(3, 0.0), b(3, 0.0); + a[0]=1.0; a[1]=2.0; a[2]=3.0; // a = [1,2,3] + b[0]=4.0; b[1]=5.0; b[2]=6.0; // b = [4,5,6] + + double dot = a.dot(b); // 1*4 + 2*5 + 3*6 = 32 + CHECK(std::fabs(dot - 32.0) < 1e-12, "dot"); + + double s = a.sum(); // 6 + CHECK(std::fabs(s - 6.0) < 1e-12, "sum"); + + double n = a.norm(); // sqrt(14) + CHECK(std::fabs(n - std::sqrt(14.0)) < 1e-12, "norm"); +} + +TEST_CASE(Vector_Normalize_and_Throws) { + utils::Vd v(3, 0.0); + v[0]=3.0; v[1]=4.0; v[2]=0.0; // norm = 5 + auto u = v.normalize(); // returns new vector + CHECK(std::fabs(u.norm() - 1.0) < 1e-12, "normalize() unit length"); + + v.inplace_normalize(); + CHECK(std::fabs(v.norm() - 1.0) < 1e-12, "inplace_normalize unit length"); + + utils::Vd z(3, 0.0); + bool threw=false; + try { z.inplace_normalize(); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "normalize should throw on zero vector"); +} + +// Size mismatch throws (elementwise ops) +TEST_CASE(Vector_Size_Mismatch_Throws) { + utils::Vi a(3,1), b(4,2); + + bool threw=false; + try { (void)a.dot(b); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "dot size mismatch should throw"); + + threw=false; + try { a.inplace_add(b); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "add size mismatch should throw"); + + threw=false; + try { a.inplace_subtract(b); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "subtract size mismatch should throw"); + + threw=false; + try { a.inplace_multiply(b); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "multiply size mismatch should throw"); + + threw=false; + try { a.inplace_divide(b); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "divide size mismatch should throw"); + + threw=false; + try { a.inplace_power(b); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "power size mismatch should throw"); } \ No newline at end of file