diff --git a/assets/logo/logo_1600.png b/assets/logo/logo_1600.png new file mode 100644 index 0000000..3f17bda Binary files /dev/null and b/assets/logo/logo_1600.png differ diff --git a/makefile b/makefile new file mode 100644 index 0000000..3a3e3a6 --- /dev/null +++ b/makefile @@ -0,0 +1,169 @@ + +# Compiler and flags +CC := g++ +CXXFLAGS := -std=c++14 -Wall -Iinclude -O3 -march=native -fopenmp +LDFLAGS := -fopenmp +# Compiles .h files when there's been a change +DEPFLAGS := -MMD -MP + +CXX ?= g++ + + + + +# Directories +SRC_DIR := src +INC_DIR := include +OBJ_DIR := obj +BIN_DIR := bin +TEST_BIN := $(BIN_DIR)/tests + + + + + +# +# === Executable Name === +TARGET := $(BIN_DIR)/abc_lab + +# === Find all .cpp files recursively in src/ === +SRCS := $(shell find $(SRC_DIR) -name '*.cpp') + +# === Convert src/foo/bar.cpp → obj/foo/bar.o === +OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(SRCS)) + + +# === Test sources === +TEST_SRCS := $(shell find test -name 'test_*.cpp') +TEST_OBJS := $(patsubst test/%.cpp, $(OBJ_DIR)/test/%.o, $(TEST_SRCS)) + +# Compiles .h files when there's been a change +DEPS := $(OBJS:.o=.d) $(TEST_OBJS:.o=.d) $(TEST_MAIN:.o=.d) +-include $(DEPS) + +# The single file that defines TEST_MAIN / main() +TEST_MAIN := $(OBJ_DIR)/test/test_all.o + + +# === OpenMP runtime configuration (override-able) === +OMP_PROC_BIND ?= close # close|spread|master +OMP_PLACES ?= cores # cores|threads|sockets +OMP_MAX_LEVELS ?= 1 # 1 = no nested teams; set 2+ to allow nesting +OMP_THREADS ?= 16 # e.g. "16" or "8,4" for nested (outer,inner) +OMP_DYNAMIC ?= TRUE # TRUE/FALSE: let runtime adjust threads +OMP_SCHEDULE ?= STATIC # STATIC recommended for matvec/matmul +OMP_DISPLAY_ENV ?= FALSE # TRUE to print runtime config at startup + +# Export OMP defaults so child makes or tools see them (not strictly required) +export OMP_PROC_BIND +export OMP_PLACES +export OMP_MAX_LEVELS +export OMP_THREADS +export OMP_DYNAMIC +export OMP_SCHEDULE +export OMP_DISPLAY_ENV + +# What belongs to "run" vs "test" +RUN_DEPS := $(OBJS:.o=.d) +TEST_DEPS := $(TEST_OBJS:.o=.d) $(TEST_MAIN:.o=.d) + +RUN_ARTIFACTS := $(TARGET) $(OBJS) $(RUN_DEPS) +TEST_ARTIFACTS := $(TEST_BIN) $(TEST_OBJS) $(TEST_MAIN) $(TEST_DEPS) + + +# === Default Target === +all: $(TARGET) + +# === Linking final executable === +$(TARGET): $(OBJS) + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) + +# === Compiling source files to object files === +$(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) $(DEPFLAGS) -c $< -o $@ + +# === Run with OpenMP env set only for the run === +.PHONY: run +run: clean-test $(TARGET) + @echo ">>> OMP_PROC_BIND=$(OMP_PROC_BIND)" + @echo ">>> OMP_PLACES=$(OMP_PLACES)" + @echo ">>> OMP_MAX_ACTIVE_LEVELS=$(OMP_MAX_LEVELS)" + @echo ">>> OMP_NUM_THREADS=$(OMP_THREADS)" + @echo ">>> OMP_DYNAMIC=$(OMP_DYNAMIC)" + @echo ">>> OMP_SCHEDULE=$(OMP_SCHEDULE)" + @echo ">>> OMP_DISPLAY_ENV=$(OMP_DISPLAY_ENV)" + @OMP_PROC_BIND=$(OMP_PROC_BIND) \ + OMP_PLACES=$(OMP_PLACES) \ + OMP_MAX_ACTIVE_LEVELS=$(OMP_MAX_LEVELS) \ + OMP_NUM_THREADS="$(OMP_THREADS)" \ + OMP_DYNAMIC=$(OMP_DYNAMIC) \ + OMP_SCHEDULE=$(OMP_SCHEDULE) \ + OMP_DISPLAY_ENV=$(OMP_DISPLAY_ENV) \ + ./$(TARGET) + +# Handy presets +.PHONY: run-single-core +run-single-core: ## Single-level one core (good default) + $(MAKE) run OMP_MAX_LEVELS=1 OMP_THREADS=1 OMP_PROC_BIND=close OMP_PLACES=cores + +# Handy presets +.PHONY: run-multi-core +run-multi-core: ## Single-level parallel (good default) + $(MAKE) run OMP_MAX_LEVELS=1 OMP_THREADS=16 OMP_PROC_BIND=close OMP_PLACES=cores + +.PHONY: run-nested-multi-core +run-nested-multi-core: ## Two-level nested (outer,inner), adjust to your cores + $(MAKE) run OMP_MAX_LEVELS=2 OMP_THREADS="2,8" OMP_PROC_BIND=close OMP_PLACES=cores + + + +# Optional: print debug info +.PHONY: info +info: + @echo "Source files: $(SRCS)" + @echo "Object files: $(OBJS)" + @echo "CXXFLAGS: $(CXXFLAGS)" + @echo "LDFLAGS: $(LDFLAGS)" + + +.PHONY: test +test: clean-run $(TEST_BIN) + @echo ">>> OMP_PROC_BIND=$(OMP_PROC_BIND)" + @echo ">>> OMP_PLACES=$(OMP_PLACES)" + @echo ">>> OMP_MAX_ACTIVE_LEVELS=$(OMP_MAX_LEVELS)" + @echo ">>> OMP_NUM_THREADS=$(OMP_THREADS)" + @echo ">>> OMP_DYNAMIC=$(OMP_DYNAMIC)" + @echo ">>> OMP_SCHEDULE=$(OMP_SCHEDULE)" + @echo ">>> OMP_DISPLAY_ENV=$(OMP_DISPLAY_ENV)" + @OMP_PROC_BIND=$(OMP_PROC_BIND) \ + OMP_PLACES=$(OMP_PLACES) \ + OMP_MAX_ACTIVE_LEVELS=$(OMP_MAX_LEVELS) \ + OMP_NUM_THREADS="$(OMP_THREADS)" \ + OMP_DYNAMIC=$(OMP_DYNAMIC) \ + OMP_SCHEDULE=$(OMP_SCHEDULE) \ + OMP_DISPLAY_ENV=$(OMP_DISPLAY_ENV) \ + $(TEST_BIN) + +$(TEST_BIN): $(TEST_OBJS) $(TEST_MAIN) + @mkdir -p $(BIN_DIR) + $(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) + +$(OBJ_DIR)/test/%.o: test/%.cpp + @mkdir -p $(dir $@) + $(CXX) $(CXXFLAGS) $(DEPFLAGS) -c $< -o $@ + +# Clean up +.PHONY: clean +clean: + rm -rf $(OBJ_DIR) $(BIN_DIR) + +.PHONY: clean-run clean-test +clean-run: + @echo "Cleaning run artifacts..." + @rm -f $(RUN_ARTIFACTS) + +clean-test: + @echo "Cleaning test artifacts..." + @rm -f $(TEST_ARTIFACTS) \ No newline at end of file diff --git a/test/.gitkeep b/test/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/test/test_all.cpp b/test/test_all.cpp new file mode 100644 index 0000000..ed3c41b --- /dev/null +++ b/test/test_all.cpp @@ -0,0 +1,2 @@ +#define TEST_MAIN +#include "test_common.h" \ No newline at end of file diff --git a/test/test_common.h b/test/test_common.h new file mode 100644 index 0000000..8d92710 --- /dev/null +++ b/test/test_common.h @@ -0,0 +1,52 @@ +#ifndef _test_common_n_ +#define _test_common_n_ + +#pragma once +#include +#include +#include +#include + +struct TestFailure : public std::runtime_error { + using std::runtime_error::runtime_error; +}; + + +#define CHECK(cond, msg) do { if (!(cond)) throw TestFailure(msg); } while (0) +#define CHECK_EQ(a,b,msg) do { if (!((a)==(b))) { throw TestFailure(std::string(msg) + " (" #a " != " #b ")"); } } while (0) + +#define TEST_CASE(name) \ + static void name(); \ + struct name##_registrar { name##_registrar(){ TestRegistry::add(#name, &name);} } name##_registrar_instance; \ + static void name() + +struct TestRegistry { + using Fn = void(*)(); + static std::vector>& list() { + static std::vector> v; return v; + } + static void add(const std::string& name, Fn fn) { list().push_back({name, fn}); } +}; + +// Default test runner main() +#ifdef TEST_MAIN +int main() { + int fails = 0; + for (auto& t : TestRegistry::list()) { + try { + t.second(); + //std::cout << "[PASS] " << t.first << "\n"; + } catch (const TestFailure& e) { + std::cerr << "[FAIL] " << t.first << " -> " << e.what() << "\n"; + ++fails; + } catch (const std::exception& e) { + std::cerr << "[ERROR] " << t.first << " -> " << e.what() << "\n"; + ++fails; + } + } + std::cout << (fails ? "Some tests failed ❌\n" : "All tests passed ✅\n"); + return fails ? 1 : 0; +} +#endif + +#endif // _test_common_n_ \ No newline at end of file diff --git a/test/test_eye.cpp b/test/test_eye.cpp new file mode 100644 index 0000000..db959ee --- /dev/null +++ b/test/test_eye.cpp @@ -0,0 +1,155 @@ +#include "test_common.h" +#include "./utils/matrix.h" +#include "./numerics/initializers/eye.h" + +#include + + + +// Tiny helper +template +static bool is_identity(const utils::Matrix& M) { + if (M.rows() != M.cols()) return false; + for (std::uint64_t i = 0; i < M.rows(); ++i) + for (std::uint64_t j = 0; j < M.cols(); ++j) + if ((i == j && M(i,j) != T{1}) || + (i != j && M(i,j) != T{0})) return false; + return true; +} + + +// ============ Basic correctness ============ +TEST_CASE(Generate_EYE_double) { + utils::Matrix I = numerics::eye(5); + CHECK_EQ(I.rows(), 5, "rows should be 5"); + CHECK_EQ(I.cols(), 5, "cols should be 5"); + CHECK(is_identity(I), "eye(5) is not identity"); +} + +TEST_CASE(Generate_EYE_int) { + utils::Matrix I = numerics::eye(3); + CHECK_EQ(I.rows(), 3, "rows should be 3"); + CHECK_EQ(I.cols(), 3, "cols should be 3"); + CHECK(is_identity(I), "eye(3) is not identity"); +} + +TEST_CASE(Inplace_EYE_resize_creates_identity) { + utils::Md A; // empty + numerics::inplace_eye(A, 7); // resize to 7x7 and set identity + CHECK_EQ(A.rows(), 7, "rows should be 7"); + CHECK_EQ(A.cols(), 7, "cols should be 7"); + CHECK(is_identity(A), "inplace_eye resize did not create identity"); +} + +TEST_CASE(Inplace_EYE_inplace_on_square) { + utils::Md A(4,4, 42.0); // junk values + numerics::inplace_eye(A); // N==0 → must be square, zero + diag + CHECK(is_identity(A), "inplace_eye on square did not produce identity"); +} + +TEST_CASE(Inplace_EYE_throws_on_non_square) { + utils::Md A(2,3, 5.0); + bool threw = false; + try { + numerics::inplace_eye(A); // N==0 and non-square → throws + } catch (const std::runtime_error&) { + threw = true; + } + CHECK(threw, "inplace_eye should throw on non-square when N==0"); +} + +// ============ OpenMP variants (compiled only when -fopenmp is used) ============ + +#ifdef _OPENMP +TEST_CASE(EYE_OMP_matches_serial) { + utils::Md I1 = numerics::eye(64); + utils::Md I2 = numerics::eye_omp(64); + + for (uint64_t i = 0; i < I1.rows(); ++i){ + for (uint64_t j = 0; j < I1.cols(); ++j){ + CHECK(I1(i,j) == I2(i,j), "eye_omp != eye"); + } + + } + utils::Md A(64,64, 3.14); + numerics::inplace_eye_omp(A); // N==0 → must be square + CHECK(is_identity(A), "inplace_eye_omp did not produce identity"); +} + + +TEST_CASE(EYE_OMP_speed) { + + uint64_t prev = omp_get_max_threads(); + if (prev <= 1) return; + + omp_set_num_threads(16); + uint64_t N = 16384; + + utils::Matrix I1(N,N,1), I2(N,N,1); + + auto t0 = std::chrono::high_resolution_clock::now(); + numerics::inplace_eye(I1); + double t1 = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); + + t0 = std::chrono::high_resolution_clock::now(); + numerics::inplace_eye_omp(I2); + double t2 = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); + + omp_set_num_threads(prev); + + CHECK(t2 <= t1, "collapse_omp: multi-thread slower than single-thread"); +} + + +TEST_CASE(EYE_OMP_nested_ok) { + // Ensure nested regions are allowed for this test (won't error if not) + + uint64_t prev_levels = omp_get_max_active_levels(); + uint64_t prev = omp_get_max_threads(); + + omp_set_max_active_levels(2); + // Outer team size (small); inner will be large when nesting is allowed + const int outer_threads = 1; + // Inner team size to request during nested-enabled run + const int inner_threads = 12; + uint64_t N = 4096*8; + utils::Matrix I1(N, N, 7); + utils::Matrix I2(N, N, 7); + + + // ---------- baseline: inner runs with a small team (no outer region) ---------- + omp_set_max_active_levels(1); // disable nesting for baseline + omp_set_num_threads(outer_threads); + + for (uint64_t i = 0; i < 2; ++i){ + numerics::inplace_eye_omp(I1); + } + + + // ---------- nested: outer×inner (only one outer thread launches inner) ---------- + omp_set_max_active_levels(2); // allow one nested level + + #pragma omp parallel num_threads(outer_threads) + { + #pragma omp single // avoid racing on I1 + { + omp_set_num_threads(inner_threads); + for (uint64_t i = 0; i < 2; ++i){ + numerics::inplace_eye_omp(I2); + } + } + } + omp_set_max_active_levels(prev_levels); + omp_set_num_threads(prev); + CHECK(is_identity(I1), "EYE_OMP_nested did not produce identity"); + CHECK(is_identity(I2), "EYE_OMP_nested did not produce identity"); + +} + + +TEST_CASE(EYE_OMP_auto_is_identity) { + auto I = numerics::eye_omp_auto(32); + CHECK(is_identity(I), "eye_omp_auto did not produce identity"); +} + +#endif diff --git a/test/test_interpolation1d.cpp b/test/test_interpolation1d.cpp new file mode 100644 index 0000000..756e644 --- /dev/null +++ b/test/test_interpolation1d.cpp @@ -0,0 +1,155 @@ +#include "test_common.h" + +#include "./utils/matrix.h" +#include "./utils/vector.h" + + +#include "./numerics/interpolation1d.h" + + +// ------------ helpers ------------ +template +static void make_uniform_xy(utils::Vector& x, utils::Vector& y, + std::uint64_t N, T x0, T dx, + T a, T b) { + x.resize(N, T(0)); + y.resize(N, T(0)); + for (std::uint64_t i=0; i +static void make_uniform_xy_fun(utils::Vector& x, utils::Vector& y, + std::uint64_t N, T x0, T dx, + T (*f)(T)) { + x.resize(N, T(0)); + y.resize(N, T(0)); + for (std::uint64_t i=0; i +static inline bool almost_eq(T a, T b, double tol=1e-12) { + return std::fabs(double(a)-double(b)) <= tol; +} + +// ------------ tests ------------ + +// 1) All interpolants reproduce the data points exactly (or to tiny FP error) +TEST_CASE(Interp_Reproduces_Data_Nodes) { + const std::uint64_t N = 25; + utils::Vd x, y; + // linear data: y = 0.1 x + 0.9 on x = 1..25 + make_uniform_xy(x, y, N, 1.0, 1.0, 0.1, 0.9); + + numerics::interp_linear lin(x,y); + numerics::interp_polynomial pol(x,y, 3); + numerics::interp_cubic_spline spl(x,y); + numerics::interp_rational rat(x,y, 2); + numerics::interp_barycentric bar(x,y, 2); + + for (std::uint64_t i=0; i(x, y, N, 1.0, 1.0, 0.1, 0.9); + + numerics::interp_linear lin(x,y); + numerics::interp_polynomial pol(x,y, 3); + numerics::interp_cubic_spline spl(x,y); + numerics::interp_rational rat(x,y, 3); + numerics::interp_barycentric bar(x,y, 3); + + std::vector qs = { 1.5, 5.5, 5.51, 50.01, 99.9 }; + for (double q : qs) { + const double ytrue = 0.1*q + 0.9; + CHECK(almost_eq(lin.interp(q), ytrue, 1e-9), "linear off-grid mismatch"); + CHECK(almost_eq(pol.interp(q), ytrue, 1e-9), "polynomial off-grid mismatch"); + CHECK(almost_eq(spl.interp(q), ytrue, 1e-9), "spline off-grid mismatch"); + CHECK(almost_eq(rat.interp(q), ytrue, 1e-9), "rational off-grid mismatch"); + CHECK(almost_eq(bar.interp(q), ytrue, 1e-9), "barycentric off-grid mismatch"); + } + + // endpoints should match exactly (no extrapolation) + CHECK(almost_eq(lin.interp(x[0]), y[0]), "linear endpoint"); + CHECK(almost_eq(lin.interp(x[N-1]), y[N-1]), "linear endpoint"); + CHECK(almost_eq(spl.interp(x[0]), y[0]), "spline endpoint"); + CHECK(almost_eq(spl.interp(x[N-1]), y[N-1]), "spline endpoint"); +} + +// 3) Quadratic dataset: degree-2 polynomial interpolation should be exact +static double f_quad(double t) { return t*t - 3.0*t + 2.0; } // (t-1)(t-2) + +TEST_CASE(Interp_Polynomial_Deg2_Exact_On_Quadratic) { + const std::uint64_t N = 21; + utils::Vd x, y; + make_uniform_xy_fun(x, y, N, -2.0, 0.5, &f_quad); + + numerics::interp_polynomial pol(x,y, 3); + + std::vector qs = { -1.75, -0.1, 0.3, 1.4, 3.9, 7.25 }; + for (double q : qs) { + // only test inside the data domain to avoid extrap behavior + if (q >= x[0] && q <= x[N-1]) { + const double ytrue = f_quad(q); + //std::cout << pol.interp(q) << ", " << ytrue << ", "<< q <(x, y, N, 0.0, 0.5, &f_cos); + + numerics::interp_linear lin(x,y); + numerics::interp_polynomial pol(x,y, 3); // small local degree + numerics::interp_cubic_spline spl(x,y); + numerics::interp_rational rat(x,y, 3); + numerics::interp_barycentric bar(x,y, 3); + + // sample a handful of interior points and require all methods to be mutually close + std::vector qs = { 1.25, 7.75, 12.1, 18.6, 22.75 }; + for (double q : qs) { + // skip if q is outside just in case + if (q < x[0] || q > x[N-1]) continue; + + double yl = lin.interp(q); + double yp = pol.interp(q); + double ys = spl.interp(q); + double yr = rat.interp(q); + double yb = bar.interp(q); + + //std::cout << "lin: " << yl << std::endl; + //std::cout << "pol: " << yp << std::endl; + //std::cout << "spl: " << ys << std::endl; + //std::cout << "rat: " << yr << std::endl; + //std::cout << "bar: " << yb << std::endl; + + // spline is usually the smoothest; use it as the anchor + CHECK(almost_eq(yl, ys, 5e-4), "linear vs spline"); + CHECK(almost_eq(yp, ys, 5e-4), "polynomial vs spline"); + CHECK(almost_eq(yr, ys, 5e-4), "rational vs spline"); + CHECK(almost_eq(yb, ys, 5e-4), "barycentric vs spline"); + } +} \ No newline at end of file diff --git a/test/test_inverse.cpp b/test/test_inverse.cpp new file mode 100644 index 0000000..f4de9ab --- /dev/null +++ b/test/test_inverse.cpp @@ -0,0 +1,141 @@ +#include "test_common.h" + +#include "./utils/matrix.h" +#include "./utils/vector.h" +#include "./numerics/inverse.h" +#include "./numerics/matmul.h" + + + +// ---------- helpers ---------- +template +static utils::Matrix identity(std::uint64_t n) { + utils::Matrix I(n,n,T(0)); + for (std::uint64_t i=0;i +static bool mats_equal_tol(const utils::Matrix& X, + const utils::Matrix& Y, + double tol = 1e-12) { + if (X.rows()!=Y.rows() || X.cols()!=Y.cols()) return false; + for (std::uint64_t i=0;i tol) return false; + return true; +} + +// A small well-conditioned SPD 3x3 +static utils::Matrix make_A3() { + utils::Matrix A(3,3,0.0); + // [ 4 3 0 + // 3 4 -1 + // 0 -1 4 ] + A(0,0)=4; A(0,1)=3; A(0,2)=0; + A(1,0)=3; A(1,1)=4; A(1,2)=-1; + A(2,0)=0; A(2,1)=-1; A(2,2)=4; + return A; +} + +// A random-ish SPD 5x5 (constructed deterministically) +static utils::Matrix make_A5_spd() { + utils::Matrix R(5,5,0.0); + // Fill R with a simple pattern + double v=1.0; + for (std::uint64_t i=0;i<5;++i) + for (std::uint64_t j=0;j<5;++j, v+=0.37) + R(i,j) = std::fmod(v, 3.0) - 1.0; // values in [-1,2) + // A = R^T R + 5 I → SPD and well-conditioned + utils::Matrix Rt(5,5,0.0); + for (std::uint64_t i=0;i<5;++i) + for (std::uint64_t j=0;j<5;++j) + Rt(i,j) = R(j,i); + auto RtR = numerics::matmul(Rt, R); + for (std::uint64_t i=0;i<5;++i) RtR(i,i) += 5.0; + return RtR; +} + +// ---------- tests ---------- + +TEST_CASE(Inverse_GJ_3x3) { + auto A = make_A3(); + auto A_copy = A; + + auto Inv = numerics::inverse(A, "Gauss-Jordan"); // non-inplace + auto I = identity(3); + + auto AInv = numerics::matmul(A, Inv); + auto InvA = numerics::matmul(Inv, A); + + CHECK(mats_equal_tol(AInv, I, 1e-11), "A*Inv != I (Gauss-Jordan)"); + CHECK(mats_equal_tol(InvA, I, 1e-11), "Inv*A != I (Gauss-Jordan)"); + + // A should be unchanged by numerics::inverse (it copies internally) + CHECK(mats_equal_tol(A, A_copy, 1e-15), "inverse() modified input A"); +} + +TEST_CASE(Inverse_LU_3x3) { + auto A = make_A3(); + + auto Inv = numerics::inverse(A, "LU"); + auto I = identity(3); + + auto AInv = numerics::matmul(A, Inv); + auto InvA = numerics::matmul(Inv, A); + + CHECK(mats_equal_tol(AInv, I, 1e-11), "A*Inv != I (LU)"); + CHECK(mats_equal_tol(InvA, I, 1e-11), "Inv*A != I (LU)"); +} + +TEST_CASE(Inplace_Inverse_Both_Methods_Agree_5x5) { + auto A = make_A5_spd(); + + auto GJ = A; numerics::inplace_inverse(GJ, "Gauss-Jordan"); + auto LU = A; numerics::inplace_inverse(LU, "LU"); + + // Both should be valid inverses + auto I = identity(5); + CHECK(mats_equal_tol(numerics::matmul(A, GJ), I, 1e-10), "A*GJ != I"); + CHECK(mats_equal_tol(numerics::matmul(GJ, A), I, 1e-10), "GJ*A != I"); + CHECK(mats_equal_tol(numerics::matmul(A, LU), I, 1e-10), "A*LU != I"); + CHECK(mats_equal_tol(numerics::matmul(LU, A), I, 1e-10), "LU*A != I"); + + // And they should be very close to each other + CHECK(mats_equal_tol(GJ, LU, 1e-10), "Gauss-Jordan inverse != LU inverse"); +} + +TEST_CASE(Inverse_NonSquare_Throws) { + utils::Matrix A(2,3,1.0); + bool threw=false; + try { auto B = numerics::inverse(A, "Gauss-Jordan"); (void)B; } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "inverse should throw on non-square (Gauss-Jordan)"); + + threw=false; + try { numerics::inplace_inverse(A, "LU"); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "inplace_inverse should throw on non-square (LU)"); +} + +TEST_CASE(Inverse_Singular_Throws) { + utils::Matrix A(3,3,0.0); + // Two identical rows → singular + A(0,0)=1; A(0,1)=2; A(0,2)=3; + A(1,0)=1; A(1,1)=2; A(1,2)=3; + A(2,0)=0; A(2,1)=1; A(2,2)=4; + + bool threw=false; + try { auto B = numerics::inverse(A, "Gauss-Jordan"); (void)B; } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "inverse(GJ) should throw on singular"); + + threw=false; + try { auto B = numerics::inverse(A, "LU"); (void)B; } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "inverse(LU) should throw on singular"); +} + +TEST_CASE(Inverse_Invalid_Method_Throws) { + auto A = make_A3(); + bool threw=false; + try { auto B = numerics::inverse(A, "NotAThing"); (void)B; } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "inverse should throw on unknown method"); +} \ No newline at end of file diff --git a/test/test_lu.cpp b/test/test_lu.cpp new file mode 100644 index 0000000..1255d3d --- /dev/null +++ b/test/test_lu.cpp @@ -0,0 +1,169 @@ +#include "test_common.h" + +#include "./utils/matrix.h" +#include "./utils/vector.h" +#include "./numerics/matmul.h" +#include "./numerics/matvec.h" + +#include "./decomp/lu.h" + +//#include + +// ---------- helpers ---------- +template +static bool mats_equal_tol(const utils::Matrix& X, + const utils::Matrix& Y, + double tol = 1e-12) { + if (X.rows()!=Y.rows() || X.cols()!=Y.cols()) return false; + for (std::uint64_t i=0;i tol) return false; + return true; +} + +template +static utils::Matrix identity(std::uint64_t n) { + utils::Matrix I(n,n,T(0)); + for (std::uint64_t i=0;i +static void split_LU(const utils::Matrix& lu, + utils::Matrix& L, + utils::Matrix& U) { + const std::uint64_t n = lu.rows(); + L.resize(n,n,T(0)); + U.resize(n,n,T(0)); + for (std::uint64_t i=0;ij) L(i,j) = lu(i,j); + else if (i==j){ L(i,i) = T(1); U(i,i) = lu(i,i); } + else U(i,j) = lu(i,j); + } + } +} + +template +static utils::Matrix permutation_from_indx(const std::vector& indx) { + const std::uint64_t n = indx.size(); + auto P = identity(n); + // Apply the same sequence of row swaps that was applied during factorization + for (std::uint64_t k=0;k make_A_spd() { + utils::Matrix A(3,3,0.0); + // [ 4 3 0 + // 3 4 -1 + // 0 -1 4 ] + A(0,0)=4; A(0,1)=3; A(0,2)=0; + A(1,0)=3; A(1,1)=4; A(1,2)=-1; + A(2,0)=0; A(2,1)=-1; A(2,2)=4; + return A; +} + +TEST_CASE(LU_PA_equals_LU) { + auto A = make_A_spd(); + decomp::LUdcmpd lu(A); + + utils::Matrix L,U; + split_LU(lu.lu, L, U); + auto P = permutation_from_indx(lu.indx); + + auto PA = numerics::matmul(P, A); + auto LU = numerics::matmul(L, U); + + CHECK(mats_equal_tol(PA, LU, 1e-12), "PA should equal LU"); +} + +TEST_CASE(LU_Solve_Vector) { + auto A = make_A_spd(); + decomp::LUdcmpd lu(A); + + utils::Vd b(3,0.0); + b[0]=1.0; b[1]=2.0; b[2]=3.0; + + auto x = lu.solve(b); + auto Ax = numerics::matvec(A, x); + + CHECK(b.nearly_equal_vec(Ax, 1e-12), "A*x should equal b"); +} + +TEST_CASE(LU_Solve_Matrix_MultiRHS) { + auto A = make_A_spd(); + decomp::LUdcmpd lu(A); + + utils::Matrix B(3,2,0.0); + // two RHS columns + B(0,0)=1; B(1,0)=2; B(2,0)=3; + B(0,1)=4; B(1,1)=5; B(2,1)=6; + + auto X = lu.solve(B); // 3x2 + + // Check A*X == B + auto AX = numerics::matmul(A, X); + CHECK(mats_equal_tol(AX, B, 1e-12), "A*X should equal B"); + + // And that column-wise solve agrees + utils::Vd b0(3,0.0), b1(3,0.0); + for (int i=0;i<3;++i){ b0[i]=B(i,0); b1[i]=B(i,1); } + auto x0 = lu.solve(b0); + auto x1 = lu.solve(b1); + + CHECK(std::fabs(double(X(0,0)-x0[0]))<1e-12 && + std::fabs(double(X(1,0)-x0[1]))<1e-12 && + std::fabs(double(X(2,0)-x0[2]))<1e-12, "column 0 mismatch"); + + CHECK(std::fabs(double(X(0,1)-x1[0]))<1e-12 && + std::fabs(double(X(1,1)-x1[1]))<1e-12 && + std::fabs(double(X(2,1)-x1[2]))<1e-12, "column 1 mismatch"); +} + +TEST_CASE(LU_Determinant) { + auto A = make_A_spd(); + decomp::LUdcmpd lu(A); + // For this A, det = 24 + double d = lu.det(); + CHECK(std::fabs(d - 24.0) < 1e-12, "determinant incorrect"); +} + +TEST_CASE(LU_Inverse_via_SolveI) { + auto A = make_A_spd(); + decomp::LUdcmpd lu(A); + + // Build identity and solve A * X = I + auto I = identity(3); + auto Inv = lu.solve(I); + + // Check A*Inv == I (and Inv*A == I for good measure) + auto AInv = numerics::matmul(A, Inv); + auto InvA = numerics::matmul(Inv, A); + + CHECK(mats_equal_tol(AInv, I, 1e-11), "A*Inv should be I"); + CHECK(mats_equal_tol(InvA, I, 1e-11), "Inv*A should be I"); +} + +TEST_CASE(LU_NonSquare_Throws) { + utils::Matrix A(2,3,1.0); + bool threw=false; + try { decomp::LUdcmpd lu(A); } catch (const std::runtime_error&) { threw = true; } + CHECK(threw, "LU should throw on non-square"); +} + +TEST_CASE(LU_Singular_Throws) { + utils::Matrix A(3,3,0.0); + // Make two identical rows + A(0,0)=1; A(0,1)=2; A(0,2)=3; + A(1,0)=1; A(1,1)=2; A(1,2)=3; + A(2,0)=0; A(2,1)=1; A(2,2)=4; + + bool threw=false; + try { decomp::LUdcmpd lu(A); } catch (const std::runtime_error&) { threw = true; } + CHECK(threw, "LU should throw on singular matrix"); +} \ No newline at end of file diff --git a/test/test_matequal.cpp b/test/test_matequal.cpp new file mode 100644 index 0000000..c0b564d --- /dev/null +++ b/test/test_matequal.cpp @@ -0,0 +1,108 @@ + +#include "test_common.h" +#include "./numerics/matequal.h" + +using utils::Vf; using utils::Vd; using utils::Vi; +using utils::Mf; using utils::Md; using utils::Mi; + + +// ---------- helpers ---------- +template +static void fill_seq(utils::Matrix& M, T start = T{0}, T step = T{1}) { + std::uint64_t k = 0; + for (std::uint64_t i = 0; i < M.rows(); ++i) + for (std::uint64_t j = 0; j < M.cols(); ++j, ++k) + M(i,j) = start + step * static_cast(k); +} + + + + + +// ---------- tests ---------- + +TEST_CASE(matequal_shape_mismatch) { + utils::Mi A(3,3,0), B(3,4,0); + CHECK(!numerics::matequal(A,B), "shape mismatch should be false (serial)"); + +#ifdef _OPENMP + CHECK(!numerics::matequal_omp(A,B), "shape mismatch should be false (omp)"); +#endif + CHECK(!numerics::matequal_auto(A,B), "shape mismatch should be false (auto)"); +} + +TEST_CASE(matequal_int_true_false) { + utils::Mi A(4,5,0), B(4,5,0); + fill_seq(A, int64_t(0), int64_t(1)); + fill_seq(B, int64_t(0), int64_t(1)); + CHECK(numerics::matequal(A,B), "ints equal (serial)"); +#ifdef _OPENMP + CHECK(numerics::matequal_omp(A,B), "ints equal (omp)"); +#endif + // flip one element + B(2,3) += 1; + CHECK(!numerics::matequal(A,B), "ints differ (serial)"); +#ifdef _OPENMP + CHECK(!numerics::matequal_omp(A,B), "ints differ (omp)"); // will FAIL if your omp branch uses '!=' +#endif +} + +TEST_CASE(matequal_double_tolerance) { + utils::Md A(3,3,0.0), B(3,3,0.0); + fill_seq(A, double(1.0), double(0.125)); + fill_seq(B, double(1.0), double(0.125)); + // tiny perturbation within default tol + B(1,1) += 1e-12; + CHECK(numerics::matequal(A,B), "double within tol (serial)"); +#ifdef _OPENMP + CHECK(numerics::matequal_omp(A,B), "double within tol (omp)"); +#endif + // larger perturbation exceeds tol + B(0,2) += 1e-6; + CHECK(!numerics::matequal(A,B, 1e-9), "double exceeds tol (serial)"); +#ifdef _OPENMP + CHECK(!numerics::matequal_omp(A,B, 1e-9), "double exceeds tol (omp)"); +#endif +} + +TEST_CASE(matequal_auto_agrees) { + // Choose size so auto likely takes the OMP path when available, + // but this test only checks correctness, not which path was taken. + utils::Md A(256,256,0.0), B(256,256,0.0); + fill_seq(A, double(0.0), double(0.01)); + fill_seq(B, double(0.0), double(0.01)); + CHECK(numerics::matequal_auto(A,B), "auto equal"); + + B(5,7) += 1e-3; + CHECK(!numerics::matequal_auto(A,B, 1e-9), "auto detects mismatch"); +} + +#ifdef _OPENMP +TEST_CASE(mateequal_omp_nested_callsite) { + // Verify correctness when called inside an outer parallel region. + utils::Mi A(128,128,0), B(128,128,0); + fill_seq(A, int64_t(0), int64_t(1)); + fill_seq(B, int64_t(0), int64_t(1)); + + // allow one nested level; inner region inside mateequal_omp may spawn a team + int prev_levels = omp_get_max_active_levels(); + omp_set_max_active_levels(2); + + bool ok_equal = false, ok_diff = false; + + #pragma omp parallel num_threads(2) shared(ok_equal, ok_diff) + { + #pragma omp single + { + ok_equal = numerics::matequal_omp(A,B); + B(10,10) += 1; // introduce a mismatch + ok_diff = !numerics::matequal_omp(A,B); + } + } + + omp_set_max_active_levels(prev_levels); + + CHECK(ok_equal, "nested equal should be true"); + CHECK(ok_diff, "nested mismatch should be false"); +} +#endif diff --git a/test/test_matmul.cpp b/test/test_matmul.cpp new file mode 100644 index 0000000..82be5aa --- /dev/null +++ b/test/test_matmul.cpp @@ -0,0 +1,127 @@ +#include "test_common.h" +#include "./utils/utils.h" +#include "./numerics/matmul.h" + +#include + +// ---------- helpers ---------- +template +static bool mats_equal(const utils::Matrix& X, const utils::Matrix& Y, double tol = 0.0) { + if (X.rows()!=Y.rows() || X.cols()!=Y.cols()) return false; + if (std::is_floating_point::value) { + for (std::uint64_t i=0;i tol) return false; + } else { + for (std::uint64_t i=0;i +static void fill_seq(utils::Matrix& M, T start = T(0), T step = T(1)) { + std::uint64_t k = 0; + for (std::uint64_t i=0;i(k); +} +// ---------- tests ---------- + +// Small known example: (3x2) · (2x3) +TEST_CASE(Matmul_Small_Known) { + utils::Mi A(3,2,0), B(2,3,0); + // A = [1 2; 3 4; 5 6] + A(0,0)=1; A(0,1)=2; + A(1,0)=3; A(1,1)=4; + A(2,0)=5; A(2,1)=6; + // B = [7 8 9; 10 11 12] + B(0,0)=7; B(0,1)=8; B(0,2)=9; + B(1,0)=10; B(1,1)=11; B(1,2)=12; + + auto C = numerics::matmul(A,B); + CHECK(C.rows()==3 && C.cols()==3, "shape 3x3 wrong"); + + // Expected C: + // [27 30 33] + // [61 68 75] + // [95 106 117] + CHECK(C(0,0)==27 && C(0,1)==30 && C(0,2)==33, "row 0 wrong"); + CHECK(C(1,0)==61 && C(1,1)==68 && C(1,2)==75, "row 1 wrong"); + CHECK(C(2,0)==95 && C(2,1)==106 && C(2,2)==117, "row 2 wrong"); +} + +TEST_CASE(Matmul_DimMismatch_Throws) { + utils::Md A(2,3,1.0), B(4,2,2.0); // A.cols()!=B.rows() + bool threw=false; + try { (void)numerics::matmul(A,B); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "matmul should throw on dim mismatch"); +} + +// Compare all variants vs serial on a moderate size +TEST_CASE(Matmul_Variants_Equal_Int) { + const std::uint64_t m=32, n=24, p=16; + utils::Mi A(m,n,0), B(n,p,0); + + // deterministic fill (no randomness) + fill_seq(A, int64_t(1), int64_t(1)); + fill_seq(B, int64_t(2), int64_t(3)); + + auto C_ref = numerics::matmul(A,B); + + auto C_rows = numerics::matmul_rows_omp(A,B); + auto C_collapse = numerics::matmul_collapse_omp(A,B); + auto C_auto = numerics::matmul_auto(A,B); + + CHECK(mats_equal(C_rows, C_ref), "rows_omp != serial"); + CHECK(mats_equal(C_collapse, C_ref), "collapse_omp != serial"); + CHECK(mats_equal(C_auto, C_ref), "auto != serial"); +} + +TEST_CASE(Matmul_Variants_Equal_Double) { + const std::uint64_t m=33, n=17, p=19; + utils::Md A(m,n,0.0), B(n,p,0.0); + + fill_seq(A, 0.1, 0.01); + fill_seq(B, 1.0, 0.02); + + auto C_ref = numerics::matmul(A,B); + auto C_rows = numerics::matmul_rows_omp(A,B); + auto C_collapse = numerics::matmul_collapse_omp(A,B); + auto C_auto = numerics::matmul_auto(A,B); + + CHECK(mats_equal(C_rows, C_ref, 1e-9), "rows_omp != serial (double)"); + CHECK(mats_equal(C_collapse, C_ref, 1e-9), "collapse_omp != serial (double)"); + CHECK(mats_equal(C_auto, C_ref, 1e-9), "auto != serial (double)"); +} + +// Nested callsite sanity: call OMP variant from within an outer region +#ifdef _OPENMP +TEST_CASE(Matmul_OMP_Nested_Callsite) { + const std::uint64_t m=48, n=24, p=32; + utils::Mi A(m,n,0), B(n,p,0); + fill_seq(A, int64_t(1), int64_t(2)); + fill_seq(B, int64_t(3), int64_t(1)); + + auto C_ref = numerics::matmul(A,B); + + int prev_levels = omp_get_max_active_levels(); + omp_set_max_active_levels(2); + + utils::Mi C_nested; + #pragma omp parallel num_threads(2) + { + #pragma omp single + { + // either variant is fine; collapse(2) has more parallelism + C_nested = numerics::matmul_collapse_omp(A,B); + } + } + + omp_set_max_active_levels(prev_levels); + + CHECK(mats_equal(C_nested, C_ref), "nested collapse_omp result mismatch"); +} +#endif \ No newline at end of file diff --git a/test/test_matrix.cpp b/test/test_matrix.cpp new file mode 100644 index 0000000..1a50dd4 --- /dev/null +++ b/test/test_matrix.cpp @@ -0,0 +1,164 @@ + +#include "test_common.h" +#include "./utils/matrix.h" + +using utils::Vf; using utils::Vd; using utils::Vi; +using utils::Mf; using utils::Md; using utils::Mi; + + +// tiny helper +template +static bool mat_is_filled(const utils::Matrix& M, T v) { + for (std::uint64_t i = 0; i < M.rows(); ++i) + for (std::uint64_t j = 0; j < M.cols(); ++j) + if (M(i,j) != v) return false; + return true; +} + +// ------------------- basic construction ------------------- +TEST_CASE(Matrix_Default_Construct) { + Md M; + CHECK_EQ(M.rows(), 0, "default rows should be 0"); + CHECK_EQ(M.cols(), 0, "default cols should be 0"); +} + +TEST_CASE(Matrix_Filled_Construct) { + Md M(3, 4, 2.5); + CHECK_EQ(M.rows(), 3, "rows"); + CHECK_EQ(M.cols(), 4, "cols"); + CHECK(mat_is_filled(M, 2.5), "all elements should be 2.5"); +} + +// ------------------- element access / write ------------------- +TEST_CASE(Matrix_Set_Get) { + Mi M(2, 3, 0); + M(0,0) = 42; + M(1,2) = -7; + CHECK_EQ(M(0,0), 42, "set/get (0,0)"); + CHECK_EQ(M(1,2), -7, "set/get (1,2)"); +} + +// ------------------- resize semantics ------------------- +TEST_CASE(Matrix_Resize_Grow) { + Mf M(2, 2, 1.0f); + M.resize(3, 4, 9.0f); // grow; newly appended elements get the fill value + CHECK_EQ(M.rows(), 3, "rows after resize"); + CHECK_EQ(M.cols(), 4, "cols after resize"); + CHECK(M(2,3) == 9.0f, "last element should be the fill value after grow"); +} + +TEST_CASE(Matrix_Resize_Shrink) { + Mi M(4, 4, 5); + M(0,0) = 11; + M.resize(2, 2, 999); // shrink; size reduces + CHECK_EQ(M.rows(), 2, "rows after shrink"); + CHECK_EQ(M.cols(), 2, "cols after shrink"); + // element mapping after shrink is implementation dependent; just check bounds usable + M(1,1) = 3; + CHECK_EQ(M(1,1), 3, "write after shrink works"); +} + +// ------------------- row helpers ------------------- +TEST_CASE(Matrix_Get_Row) { + Mi M(3,4,0); + // set row 1 to [10,20,30,40] + for (std::uint64_t j=0;j<4;++j) M(1,j) = (j+1)*10; + auto r = M.get_row(1); + CHECK_EQ(r.size(), 4, "row size"); + CHECK(r[0]==10 && r[1]==20 && r[2]==30 && r[3]==40, "row contents"); +} + +TEST_CASE(Matrix_Set_Row) { + Mi M(2,3,0); + utils::Vector v(3, 0); + v[0]=7; v[1]=8; v[2]=9; + M.set_row(0, v); + CHECK(M(0,0)==7 && M(0,1)==8 && M(0,2)==9, "set_row contents"); +} + +TEST_CASE(Matrix_Row_OutOfRange_Throws) { + Mi M(2,2,0); + bool threw=false; + try { (void)M.get_row(2); } catch(const std::out_of_range&) { threw=true; } + CHECK(threw, "get_row out-of-range should throw"); + + threw=false; + try { + utils::Vector v(3,1); // wrong size + M.set_row(1, v); + } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "set_row size mismatch should throw"); +} + +// ------------------- col helpers ------------------- +TEST_CASE(Matrix_Get_Col) { + Mi M(3,2,0); + M(0,1)=5; M(1,1)=6; M(2,1)=7; + auto c = M.get_col(1); + CHECK_EQ(c.size(), 3, "col size"); + CHECK(c[0]==5 && c[1]==6 && c[2]==7, "col contents"); +} + +TEST_CASE(Matrix_Set_Col) { + Mi M(3,2,0); + utils::Vector v(3, 0); + v[0]=1; v[1]=4; v[2]=9; + M.set_col(0, v); + CHECK(M(0,0)==1 && M(1,0)==4 && M(2,0)==9, "set_col contents"); +} + +TEST_CASE(Matrix_Col_OutOfRange_Throws) { + Mi M(2,2,0); + bool threw=false; + try { (void)M.get_col(2); } catch(const std::out_of_range&) { threw=true; } + CHECK(threw, "get_col out-of-range should throw"); + + threw=false; + try { + utils::Vector v(1,1); // wrong size + M.set_col(1, v); + } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "set_col size mismatch should throw"); +} + +// ------------------- swap rows/cols ------------------- +TEST_CASE(Matrix_Swap_Rows) { + Mi M(2,3,0); + // row0: 1 2 3, row1: 4 5 6 + for (std::uint64_t j=0;j<3;++j){ M(0,j)=j+1; M(1,j)=j+4; } + M.swap_rows(0,1); + CHECK(M(0,0)==4 && M(0,1)==5 && M(0,2)==6, "row0 after swap"); + CHECK(M(1,0)==1 && M(1,1)==2 && M(1,2)==3, "row1 after swap"); +} + +TEST_CASE(Matrix_Swap_Cols) { + Mi M(3,3,0); + // col0: 9 8 7, col2: 1 2 3 + M(0,0)=9; M(1,0)=8; M(2,0)=7; + M(0,2)=1; M(1,2)=2; M(2,2)=3; + M.swap_cols(0,2); + CHECK(M(0,0)==1 && M(1,0)==2 && M(2,0)==3, "col0 after swap"); + CHECK(M(0,2)==9 && M(1,2)==8 && M(2,2)==7, "col2 after swap"); +} + +TEST_CASE(Matrix_Swap_OutOfRange_Throws) { + Mi M(2,2,0); + bool threw=false; + try { M.swap_rows(0,2); } catch(const std::out_of_range&) { threw=true; } + CHECK(threw, "swap_rows out-of-range should throw"); + threw=false; + try { M.swap_cols(0,3); } catch(const std::out_of_range&) { threw=true; } + CHECK(threw, "swap_cols out-of-range should throw"); +} + +// ------------------- stream output (basic sanity) ------------------- +TEST_CASE(Matrix_Stream_ToString) { + Mf M(2,2,0.0f); + M(0,0)=1.0f; M(0,1)=2.0f; M(1,0)=3.0f; M(1,1)=4.0f; + std::ostringstream os; + os << M; + auto s = os.str(); + CHECK(s.find("[[") != std::string::npos, "starts with [["); + CHECK(s.find("1.000") != std::string::npos, "contains formatted 1.000"); + CHECK(s.find("4.000") != std::string::npos, "contains formatted 4.000"); +} \ No newline at end of file diff --git a/test/test_matvec.cpp b/test/test_matvec.cpp new file mode 100644 index 0000000..1b838e0 --- /dev/null +++ b/test/test_matvec.cpp @@ -0,0 +1,236 @@ + +#include "test_common.h" + +#include "./numerics/matvec.h" // numerics::matvec / inplace_transpose +#include + +using utils::Vi; using utils::Vf; using utils::Vd; +using utils::Mi; using utils::Mf; using utils::Md; + + + +// ------------------------------------------------------------ +// matvec: y = A * x +// ------------------------------------------------------------ +TEST_CASE(Matvec_Serial_Simple) { + // A = [[1,2,3], + // [4,5,6]] + Md A(2,3,0.0); + A(0,0)=1; A(0,1)=2; A(0,2)=3; + A(1,0)=4; A(1,1)=5; A(1,2)=6; + Vd x(3,0.0); x[0]=7; x[1]=8; x[2]=9; + + auto y = numerics::matvec(A,x); // [ 1*7+2*8+3*9 , 4*7+5*8+6*9 ] = [50, 122] + CHECK(y.size()==2, "matvec size wrong"); + CHECK(y[0]==50.0 && y[1]==122.0, "matvec values wrong"); +} + +TEST_CASE(Matvec_OMP_Equals_Serial) { + Md A(3,3,0.0); + // A = I * 2 + for (uint64_t i=0;i<3;++i) A(i,i)=2.0; + Vd x(3,0.0); x[0]=1; x[1]=2; x[2]=3; + + auto ys = numerics::matvec(A,x); + auto yp = numerics::matvec_omp(A,x); + + CHECK((ys.nearly_equal_vec(yp)), "matvec_omp != matvec"); +} + +TEST_CASE(Matvec_Auto_Equals_Serial) { + Md A(2,2,0.0); A(0,0)=2; A(0,1)=1; A(1,0)=0.5; A(1,1)=3; + Vd x(2,0.0); x[0]=4; x[1]=5; + + auto ys = numerics::matvec(A,x); + auto ya = numerics::matvec_auto(A,x); + + CHECK((ys.nearly_equal_vec(ya)), "matvec_auto != serial"); +} + +TEST_CASE(Matvec_DimensionMismatch_Throws) { + Md A(2,3,0.0); + Vd x(4,0.0); + bool threw=false; + try { auto _ = numerics::matvec(A,x); (void)_; } + catch (const std::runtime_error&) { threw=true; } + CHECK(threw, "matvec must throw on dimension mismatch"); +} + +TEST_CASE(Matvec_Zero_Edges) { + Md A(0,3,0.0); // 0x3 + Vd x(3,1.0); + auto y = numerics::matvec(A,x); + CHECK(y.size()==0, "0xN * x should return size 0 vector"); + + Md B(2,0,0.0); // 2x0 + Vd z(0,0.0); + auto y2 = numerics::matvec(B,z); + CHECK(y2.size()==2 && y2[0]==0.0 && y2[1]==0.0, "N×0 * 0 should return zeros of size N"); +} + +TEST_CASE(Matvec_Float_Tolerance) { + Mf A(2,2,0.0f); A(0,0)=1.0f; A(0,1)=2.0f; A(1,0)=3.0f; A(1,1)=4.0f; + Vf x(2,0.0f); x[0]=0.1f; x[1]=0.2f; + + auto y1 = numerics::matvec(A,x); + auto y2 = numerics::matvec_omp(A,x); + + CHECK((y1.nearly_equal_vec(y2,1e-6f)), "matvec float omp mismatch"); +} +// +// ---------- Auto inside an outer parallel region (no accidental nested teams) ---------- +// We just check correctness; performance is environment-dependent. +// +TEST_CASE(Matvec_Auto_Inside_Outer_Parallel_Correctness) { + const uint64_t m=64, n=64; + Md A(m,n,1.0); Vd x(n,2.0); + //fill_deterministic(A); fill_deterministic(x); + Vd ref = numerics::matvec(A,x); + + // Call auto inside an outer team + #ifdef _OPENMP + #pragma omp parallel for schedule(static) + #endif + for (int rep=0; rep<32; ++rep) { + auto y = numerics::matvec_auto(A,x); + // Each thread checks its own result equals reference + if (!(y.nearly_equal_vec(ref))) { + throw TestFailure("matvec_auto wrong under outer parallel region"); + } + } +} +TEST_CASE(Matvec_Speed_Sanity) { + const uint64_t m=4096, n=4096; // ~16M MACs; adjust if needed + Md A(m,n,1.0); Vd x(n,2.0); + //fill_deterministic(A); fill_deterministic(x); + + auto t0 = std::chrono::high_resolution_clock::now(); + auto yS = numerics::matvec(A,x); + double tp = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); + + #ifdef _OPENMP + int threads = omp_get_max_threads(); + #else + int threads = 1; + #endif + + t0 = std::chrono::high_resolution_clock::now(); + auto yP = numerics::matvec_omp(A,x); + double ts = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); + + CHECK((yS.nearly_equal_vec(yP)), "matvec_omp != matvec_serial (large)"); + // Only enforce basic sanity if we *can* use >1 threads: + if (threads > 1) { + // Be generous: just require not significantly slower. + CHECK(tp >= ts, "matvec_omp unexpectedly much slower than serial"); + } +} + +// ------------------------------------------------------------ +// vecmat: y = x * A +// ------------------------------------------------------------ +TEST_CASE(Vecmat_Serial_Simple) { + // A = [[1,2], + // [3,4], + // [5,6]] (3x2) + Md A(3,2,0.0); + A(0,0)=1; A(0,1)=2; + A(1,0)=3; A(1,1)=4; + A(2,0)=5; A(2,1)=6; + + Vd x(3,0.0); x[0]=7; x[1]=8; x[2]=9; + + auto y = numerics::vecmat(x,A); // 1*7+3*8+5*9= 76 ; 2*7+4*8+6*9=100 + CHECK(y.size()==2, "vecmat size wrong"); + CHECK(y[0]==76.0 && y[1]==100.0, "vecmat values wrong"); +} + +TEST_CASE(Vecmat_OMP_Equals_Serial) { + Md A(2,2,0.0); A(0,0)=2; A(0,1)=1; A(1,0)=5; A(1,1)=-1; + Vd x(2,0.0); x[0]=0.5; x[1]=1.5; + + auto ys = numerics::vecmat(x,A); + auto yp = numerics::vecmat_omp(x,A); + + CHECK((ys.nearly_equal_vec(yp)), "vecmat_omp != vecmat"); +} + +TEST_CASE(Vecmat_Auto_Equals_Serial) { + Md A(2,3,0.0); + A(0,0)=1; A(0,1)=2; A(0,2)=3; + A(1,0)=4; A(1,1)=5; A(1,2)=6; + Vd x(2,0.0); x[0]=1; x[1]=2; + + auto ys = numerics::vecmat(x,A); + auto ya = numerics::vecmat_auto(x,A); + + CHECK((ys.nearly_equal_vec(ya)), "vecmat_auto != serial"); +} + +TEST_CASE(Vecmat_DimensionMismatch_Throws) { + Md A(2,2,0.0); + Vd x(3,0.0); + bool threw=false; + try { auto _ = numerics::vecmat(x,A); (void)_; } + catch (const std::runtime_error&) { threw=true; } + CHECK(threw, "vecmat must throw on dimension mismatch"); +} + +TEST_CASE(Vecmat_Zero_Edges) { + Md A(0,3,0.0); + Vd x(0,0.0); + auto y = numerics::vecmat(x,A); // 0×N times N×M → 0×M + CHECK(y.size()==3 && y[0]==0.0 && y[1]==0.0 && y[2]==0.0, "0-length x times A wrong"); + + Md B(3,0,0.0); + Vd z(3,1.0); + auto y2 = numerics::vecmat(z,B); // 1x3 * 3x0 → 1x0 + CHECK(y2.size()==0, "vecmat with N×0 result size wrong"); +} + +// +// ---------- Auto inside an outer parallel region (no accidental nested teams) ---------- +// We just check correctness; performance is environment-dependent. +// +TEST_CASE(Vecmat_Auto_Inside_Outer_Parallel_Correctness) { + const uint64_t m=64, n=64; + Md A(m,n,1.0); Vd x(m,2.0); + //fill_deterministic(A); fill_deterministic(x); + Vd ref = numerics::vecmat(x,A); + + #ifdef _OPENMP + #pragma omp parallel for schedule(static) + #endif + for (int rep=0; rep<32; ++rep) { + auto y = numerics::vecmat_auto(x,A); + if (!(y.nearly_equal_vec(ref))) { + throw TestFailure("vecmat_auto wrong under outer parallel region"); + } + } +} + + +TEST_CASE(Vecmat_Speed_Sanity) { + const uint64_t m=4096, n=4096; + Md A(m,n,1.0); Vd x(m,2.0); + //fill_deterministic(A); fill_deterministic(x); + + auto t0 = std::chrono::high_resolution_clock::now(); + auto yS = numerics::vecmat(x,A); + double ts = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); + + #ifdef _OPENMP + int threads = omp_get_max_threads(); + #else + int threads = 1; + #endif + + t0 = std::chrono::high_resolution_clock::now(); + auto yP = numerics::vecmat_omp(x,A); + double tp = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); + + CHECK((yS.nearly_equal_vec(yP)), "vecmat_omp != vecmat_serial (large)"); + if (threads > 1) { + CHECK(tp <= ts, "vecmat_omp unexpectedly much slower than serial"); + } +} diff --git a/test/test_transpose.cpp b/test/test_transpose.cpp new file mode 100644 index 0000000..98f900c --- /dev/null +++ b/test/test_transpose.cpp @@ -0,0 +1,151 @@ + +#include "test_common.h" +//#include "./utils/matrix.h" // matrix.h, vector.h +#include "./numerics/transpose.h" // numerics::transpose / inplace_transpose + +using utils::Mi; using utils::Mf; using utils::Md; + +/// ---- helpers ---- +template +static void fill_seq(utils::Matrix& M, T start = T(0), T step = T(1)) { + std::uint64_t k = 0; + for (std::uint64_t i=0; i(k); +} + + +template +static bool mats_equal(const utils::Matrix& X, const utils::Matrix& Y) { + if (X.rows()!=Y.rows() || X.cols()!=Y.cols()) return false; + for (std::uint64_t i=0; i +static bool vec_equal_exact(const utils::Vector& a, const utils::Vector& b) { + if (a.size() != b.size()) return false; + for (std::uint64_t i=0;i [2,2,2] + CHECK(v[0]==2.0 && v[1]==2.0, "inplace_sqrt"); +} + +TEST_CASE(Vector_Dot_Sum_Norm) { + utils::Vd a(3, 0.0), b(3, 0.0); + a[0]=1.0; a[1]=2.0; a[2]=3.0; // a = [1,2,3] + b[0]=4.0; b[1]=5.0; b[2]=6.0; // b = [4,5,6] + + double dot = a.dot(b); // 1*4 + 2*5 + 3*6 = 32 + CHECK(std::fabs(dot - 32.0) < 1e-12, "dot"); + + double s = a.sum(); // 6 + CHECK(std::fabs(s - 6.0) < 1e-12, "sum"); + + double n = a.norm(); // sqrt(14) + CHECK(std::fabs(n - std::sqrt(14.0)) < 1e-12, "norm"); +} + +TEST_CASE(Vector_Normalize_and_Throws) { + utils::Vd v(3, 0.0); + v[0]=3.0; v[1]=4.0; v[2]=0.0; // norm = 5 + auto u = v.normalize(); // returns new vector + CHECK(std::fabs(u.norm() - 1.0) < 1e-12, "normalize() unit length"); + + v.inplace_normalize(); + CHECK(std::fabs(v.norm() - 1.0) < 1e-12, "inplace_normalize unit length"); + + utils::Vd z(3, 0.0); + bool threw=false; + try { z.inplace_normalize(); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "normalize should throw on zero vector"); +} + +// Size mismatch throws (elementwise ops) +TEST_CASE(Vector_Size_Mismatch_Throws) { + utils::Vi a(3,1), b(4,2); + + bool threw=false; + try { (void)a.dot(b); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "dot size mismatch should throw"); + + threw=false; + try { a.inplace_add(b); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "add size mismatch should throw"); + + threw=false; + try { a.inplace_subtract(b); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "subtract size mismatch should throw"); + + threw=false; + try { a.inplace_multiply(b); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "multiply size mismatch should throw"); + + threw=false; + try { a.inplace_divide(b); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "divide size mismatch should throw"); + + threw=false; + try { a.inplace_power(b); } catch(const std::runtime_error&) { threw=true; } + CHECK(threw, "power size mismatch should throw"); +} \ No newline at end of file