#include "test_common.h" #include "./numerics/matvec.h" // numerics::matvec / inplace_transpose #include using utils::Vi; using utils::Vf; using utils::Vd; using utils::Mi; using utils::Mf; using utils::Md; // ------------------------------------------------------------ // matvec: y = A * x // ------------------------------------------------------------ TEST_CASE(Matvec_Serial_Simple) { // A = [[1,2,3], // [4,5,6]] Md A(2,3,0.0); A(0,0)=1; A(0,1)=2; A(0,2)=3; A(1,0)=4; A(1,1)=5; A(1,2)=6; Vd x(3,0.0); x[0]=7; x[1]=8; x[2]=9; auto y = numerics::matvec(A,x); // [ 1*7+2*8+3*9 , 4*7+5*8+6*9 ] = [50, 122] CHECK(y.size()==2, "matvec size wrong"); CHECK(y[0]==50.0 && y[1]==122.0, "matvec values wrong"); } TEST_CASE(Matvec_OMP_Equals_Serial) { Md A(3,3,0.0); // A = I * 2 for (uint64_t i=0;i<3;++i) A(i,i)=2.0; Vd x(3,0.0); x[0]=1; x[1]=2; x[2]=3; auto ys = numerics::matvec(A,x); auto yp = numerics::matvec_omp(A,x); CHECK((ys.nearly_equal_vec(yp)), "matvec_omp != matvec"); } TEST_CASE(Matvec_Auto_Equals_Serial) { Md A(2,2,0.0); A(0,0)=2; A(0,1)=1; A(1,0)=0.5; A(1,1)=3; Vd x(2,0.0); x[0]=4; x[1]=5; auto ys = numerics::matvec(A,x); auto ya = numerics::matvec_auto(A,x); CHECK((ys.nearly_equal_vec(ya)), "matvec_auto != serial"); } TEST_CASE(Matvec_DimensionMismatch_Throws) { Md A(2,3,0.0); Vd x(4,0.0); bool threw=false; try { auto _ = numerics::matvec(A,x); (void)_; } catch (const std::runtime_error&) { threw=true; } CHECK(threw, "matvec must throw on dimension mismatch"); } TEST_CASE(Matvec_Zero_Edges) { Md A(0,3,0.0); // 0x3 Vd x(3,1.0); auto y = numerics::matvec(A,x); CHECK(y.size()==0, "0xN * x should return size 0 vector"); Md B(2,0,0.0); // 2x0 Vd z(0,0.0); auto y2 = numerics::matvec(B,z); CHECK(y2.size()==2 && y2[0]==0.0 && y2[1]==0.0, "N×0 * 0 should return zeros of size N"); } TEST_CASE(Matvec_Float_Tolerance) { Mf A(2,2,0.0f); A(0,0)=1.0f; A(0,1)=2.0f; A(1,0)=3.0f; A(1,1)=4.0f; Vf x(2,0.0f); x[0]=0.1f; x[1]=0.2f; auto y1 = numerics::matvec(A,x); auto y2 = numerics::matvec_omp(A,x); CHECK((y1.nearly_equal_vec(y2,1e-6f)), "matvec float omp mismatch"); } // // ---------- Auto inside an outer parallel region (no accidental nested teams) ---------- // We just check correctness; performance is environment-dependent. // TEST_CASE(Matvec_Auto_Inside_Outer_Parallel_Correctness) { const uint64_t m=64, n=64; Md A(m,n,1.0); Vd x(n,2.0); //fill_deterministic(A); fill_deterministic(x); Vd ref = numerics::matvec(A,x); // Call auto inside an outer team #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int rep=0; rep<32; ++rep) { auto y = numerics::matvec_auto(A,x); // Each thread checks its own result equals reference if (!(y.nearly_equal_vec(ref))) { throw TestFailure("matvec_auto wrong under outer parallel region"); } } } TEST_CASE(Matvec_Speed_Sanity) { const uint64_t m=4096, n=4096; // ~16M MACs; adjust if needed Md A(m,n,1.0); Vd x(n,2.0); //fill_deterministic(A); fill_deterministic(x); auto t0 = std::chrono::high_resolution_clock::now(); auto yS = numerics::matvec(A,x); double tp = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); #ifdef _OPENMP int threads = omp_get_max_threads(); #else int threads = 1; #endif t0 = std::chrono::high_resolution_clock::now(); auto yP = numerics::matvec_omp(A,x); double ts = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); CHECK((yS.nearly_equal_vec(yP)), "matvec_omp != matvec_serial (large)"); // Only enforce basic sanity if we *can* use >1 threads: if (threads > 1) { // Be generous: just require not significantly slower. CHECK(tp >= ts, "matvec_omp unexpectedly much slower than serial"); } } // ------------------------------------------------------------ // vecmat: y = x * A // ------------------------------------------------------------ TEST_CASE(Vecmat_Serial_Simple) { // A = [[1,2], // [3,4], // [5,6]] (3x2) Md A(3,2,0.0); A(0,0)=1; A(0,1)=2; A(1,0)=3; A(1,1)=4; A(2,0)=5; A(2,1)=6; Vd x(3,0.0); x[0]=7; x[1]=8; x[2]=9; auto y = numerics::vecmat(x,A); // 1*7+3*8+5*9= 76 ; 2*7+4*8+6*9=100 CHECK(y.size()==2, "vecmat size wrong"); CHECK(y[0]==76.0 && y[1]==100.0, "vecmat values wrong"); } TEST_CASE(Vecmat_OMP_Equals_Serial) { Md A(2,2,0.0); A(0,0)=2; A(0,1)=1; A(1,0)=5; A(1,1)=-1; Vd x(2,0.0); x[0]=0.5; x[1]=1.5; auto ys = numerics::vecmat(x,A); auto yp = numerics::vecmat_omp(x,A); CHECK((ys.nearly_equal_vec(yp)), "vecmat_omp != vecmat"); } TEST_CASE(Vecmat_Auto_Equals_Serial) { Md A(2,3,0.0); A(0,0)=1; A(0,1)=2; A(0,2)=3; A(1,0)=4; A(1,1)=5; A(1,2)=6; Vd x(2,0.0); x[0]=1; x[1]=2; auto ys = numerics::vecmat(x,A); auto ya = numerics::vecmat_auto(x,A); CHECK((ys.nearly_equal_vec(ya)), "vecmat_auto != serial"); } TEST_CASE(Vecmat_DimensionMismatch_Throws) { Md A(2,2,0.0); Vd x(3,0.0); bool threw=false; try { auto _ = numerics::vecmat(x,A); (void)_; } catch (const std::runtime_error&) { threw=true; } CHECK(threw, "vecmat must throw on dimension mismatch"); } TEST_CASE(Vecmat_Zero_Edges) { Md A(0,3,0.0); Vd x(0,0.0); auto y = numerics::vecmat(x,A); // 0×N times N×M → 0×M CHECK(y.size()==3 && y[0]==0.0 && y[1]==0.0 && y[2]==0.0, "0-length x times A wrong"); Md B(3,0,0.0); Vd z(3,1.0); auto y2 = numerics::vecmat(z,B); // 1x3 * 3x0 → 1x0 CHECK(y2.size()==0, "vecmat with N×0 result size wrong"); } // // ---------- Auto inside an outer parallel region (no accidental nested teams) ---------- // We just check correctness; performance is environment-dependent. // TEST_CASE(Vecmat_Auto_Inside_Outer_Parallel_Correctness) { const uint64_t m=64, n=64; Md A(m,n,1.0); Vd x(m,2.0); //fill_deterministic(A); fill_deterministic(x); Vd ref = numerics::vecmat(x,A); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for (int rep=0; rep<32; ++rep) { auto y = numerics::vecmat_auto(x,A); if (!(y.nearly_equal_vec(ref))) { throw TestFailure("vecmat_auto wrong under outer parallel region"); } } } TEST_CASE(Vecmat_Speed_Sanity) { const uint64_t m=4096, n=4096; Md A(m,n,1.0); Vd x(m,2.0); //fill_deterministic(A); fill_deterministic(x); auto t0 = std::chrono::high_resolution_clock::now(); auto yS = numerics::vecmat(x,A); double ts = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); #ifdef _OPENMP int threads = omp_get_max_threads(); #else int threads = 1; #endif t0 = std::chrono::high_resolution_clock::now(); auto yP = numerics::vecmat_omp(x,A); double tp = std::chrono::duration(std::chrono::high_resolution_clock::now() - t0).count(); CHECK((yS.nearly_equal_vec(yP)), "vecmat_omp != vecmat_serial (large)"); if (threads > 1) { CHECK(tp <= ts, "vecmat_omp unexpectedly much slower than serial"); } }