Files
Flux-openbuild/test/test_matvec.cpp
2025-10-06 20:21:40 +00:00

237 lines
7.4 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#include "test_common.h"
#include "./numerics/matvec.h" // numerics::matvec / inplace_transpose
#include <chrono>
using utils::Vi; using utils::Vf; using utils::Vd;
using utils::Mi; using utils::Mf; using utils::Md;
// ------------------------------------------------------------
// matvec: y = A * x
// ------------------------------------------------------------
TEST_CASE(Matvec_Serial_Simple) {
// A = [[1,2,3],
// [4,5,6]]
Md A(2,3,0.0);
A(0,0)=1; A(0,1)=2; A(0,2)=3;
A(1,0)=4; A(1,1)=5; A(1,2)=6;
Vd x(3,0.0); x[0]=7; x[1]=8; x[2]=9;
auto y = numerics::matvec<double>(A,x); // [ 1*7+2*8+3*9 , 4*7+5*8+6*9 ] = [50, 122]
CHECK(y.size()==2, "matvec size wrong");
CHECK(y[0]==50.0 && y[1]==122.0, "matvec values wrong");
}
TEST_CASE(Matvec_OMP_Equals_Serial) {
Md A(3,3,0.0);
// A = I * 2
for (uint64_t i=0;i<3;++i) A(i,i)=2.0;
Vd x(3,0.0); x[0]=1; x[1]=2; x[2]=3;
auto ys = numerics::matvec<double>(A,x);
auto yp = numerics::matvec_omp<double>(A,x);
CHECK((ys.nearly_equal_vec(yp)), "matvec_omp != matvec");
}
TEST_CASE(Matvec_Auto_Equals_Serial) {
Md A(2,2,0.0); A(0,0)=2; A(0,1)=1; A(1,0)=0.5; A(1,1)=3;
Vd x(2,0.0); x[0]=4; x[1]=5;
auto ys = numerics::matvec<double>(A,x);
auto ya = numerics::matvec_auto<double>(A,x);
CHECK((ys.nearly_equal_vec(ya)), "matvec_auto != serial");
}
TEST_CASE(Matvec_DimensionMismatch_Throws) {
Md A(2,3,0.0);
Vd x(4,0.0);
bool threw=false;
try { auto _ = numerics::matvec<double>(A,x); (void)_; }
catch (const std::runtime_error&) { threw=true; }
CHECK(threw, "matvec must throw on dimension mismatch");
}
TEST_CASE(Matvec_Zero_Edges) {
Md A(0,3,0.0); // 0x3
Vd x(3,1.0);
auto y = numerics::matvec<double>(A,x);
CHECK(y.size()==0, "0xN * x should return size 0 vector");
Md B(2,0,0.0); // 2x0
Vd z(0,0.0);
auto y2 = numerics::matvec<double>(B,z);
CHECK(y2.size()==2 && y2[0]==0.0 && y2[1]==0.0, "N×0 * 0 should return zeros of size N");
}
TEST_CASE(Matvec_Float_Tolerance) {
Mf A(2,2,0.0f); A(0,0)=1.0f; A(0,1)=2.0f; A(1,0)=3.0f; A(1,1)=4.0f;
Vf x(2,0.0f); x[0]=0.1f; x[1]=0.2f;
auto y1 = numerics::matvec<float>(A,x);
auto y2 = numerics::matvec_omp<float>(A,x);
CHECK((y1.nearly_equal_vec(y2,1e-6f)), "matvec float omp mismatch");
}
//
// ---------- Auto inside an outer parallel region (no accidental nested teams) ----------
// We just check correctness; performance is environment-dependent.
//
TEST_CASE(Matvec_Auto_Inside_Outer_Parallel_Correctness) {
const uint64_t m=64, n=64;
Md A(m,n,1.0); Vd x(n,2.0);
//fill_deterministic(A); fill_deterministic(x);
Vd ref = numerics::matvec<double>(A,x);
// Call auto inside an outer team
#ifdef _OPENMP
#pragma omp parallel for schedule(static)
#endif
for (int rep=0; rep<32; ++rep) {
auto y = numerics::matvec_auto<double>(A,x);
// Each thread checks its own result equals reference
if (!(y.nearly_equal_vec(ref))) {
throw TestFailure("matvec_auto wrong under outer parallel region");
}
}
}
TEST_CASE(Matvec_Speed_Sanity) {
const uint64_t m=4096, n=4096; // ~16M MACs; adjust if needed
Md A(m,n,1.0); Vd x(n,2.0);
//fill_deterministic(A); fill_deterministic(x);
auto t0 = std::chrono::high_resolution_clock::now();
auto yS = numerics::matvec(A,x);
double tp = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - t0).count();
#ifdef _OPENMP
int threads = omp_get_max_threads();
#else
int threads = 1;
#endif
t0 = std::chrono::high_resolution_clock::now();
auto yP = numerics::matvec_omp(A,x);
double ts = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - t0).count();
CHECK((yS.nearly_equal_vec(yP)), "matvec_omp != matvec_serial (large)");
// Only enforce basic sanity if we *can* use >1 threads:
if (threads > 1) {
// Be generous: just require not significantly slower.
CHECK(tp >= ts, "matvec_omp unexpectedly much slower than serial");
}
}
// ------------------------------------------------------------
// vecmat: y = x * A
// ------------------------------------------------------------
TEST_CASE(Vecmat_Serial_Simple) {
// A = [[1,2],
// [3,4],
// [5,6]] (3x2)
Md A(3,2,0.0);
A(0,0)=1; A(0,1)=2;
A(1,0)=3; A(1,1)=4;
A(2,0)=5; A(2,1)=6;
Vd x(3,0.0); x[0]=7; x[1]=8; x[2]=9;
auto y = numerics::vecmat<double>(x,A); // 1*7+3*8+5*9= 76 ; 2*7+4*8+6*9=100
CHECK(y.size()==2, "vecmat size wrong");
CHECK(y[0]==76.0 && y[1]==100.0, "vecmat values wrong");
}
TEST_CASE(Vecmat_OMP_Equals_Serial) {
Md A(2,2,0.0); A(0,0)=2; A(0,1)=1; A(1,0)=5; A(1,1)=-1;
Vd x(2,0.0); x[0]=0.5; x[1]=1.5;
auto ys = numerics::vecmat<double>(x,A);
auto yp = numerics::vecmat_omp<double>(x,A);
CHECK((ys.nearly_equal_vec(yp)), "vecmat_omp != vecmat");
}
TEST_CASE(Vecmat_Auto_Equals_Serial) {
Md A(2,3,0.0);
A(0,0)=1; A(0,1)=2; A(0,2)=3;
A(1,0)=4; A(1,1)=5; A(1,2)=6;
Vd x(2,0.0); x[0]=1; x[1]=2;
auto ys = numerics::vecmat<double>(x,A);
auto ya = numerics::vecmat_auto<double>(x,A);
CHECK((ys.nearly_equal_vec(ya)), "vecmat_auto != serial");
}
TEST_CASE(Vecmat_DimensionMismatch_Throws) {
Md A(2,2,0.0);
Vd x(3,0.0);
bool threw=false;
try { auto _ = numerics::vecmat<double>(x,A); (void)_; }
catch (const std::runtime_error&) { threw=true; }
CHECK(threw, "vecmat must throw on dimension mismatch");
}
TEST_CASE(Vecmat_Zero_Edges) {
Md A(0,3,0.0);
Vd x(0,0.0);
auto y = numerics::vecmat<double>(x,A); // 0×N times N×M → 0×M
CHECK(y.size()==3 && y[0]==0.0 && y[1]==0.0 && y[2]==0.0, "0-length x times A wrong");
Md B(3,0,0.0);
Vd z(3,1.0);
auto y2 = numerics::vecmat<double>(z,B); // 1x3 * 3x0 → 1x0
CHECK(y2.size()==0, "vecmat with N×0 result size wrong");
}
//
// ---------- Auto inside an outer parallel region (no accidental nested teams) ----------
// We just check correctness; performance is environment-dependent.
//
TEST_CASE(Vecmat_Auto_Inside_Outer_Parallel_Correctness) {
const uint64_t m=64, n=64;
Md A(m,n,1.0); Vd x(m,2.0);
//fill_deterministic(A); fill_deterministic(x);
Vd ref = numerics::vecmat<double>(x,A);
#ifdef _OPENMP
#pragma omp parallel for schedule(static)
#endif
for (int rep=0; rep<32; ++rep) {
auto y = numerics::vecmat_auto<double>(x,A);
if (!(y.nearly_equal_vec(ref))) {
throw TestFailure("vecmat_auto wrong under outer parallel region");
}
}
}
TEST_CASE(Vecmat_Speed_Sanity) {
const uint64_t m=4096, n=4096;
Md A(m,n,1.0); Vd x(m,2.0);
//fill_deterministic(A); fill_deterministic(x);
auto t0 = std::chrono::high_resolution_clock::now();
auto yS = numerics::vecmat<double>(x,A);
double ts = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - t0).count();
#ifdef _OPENMP
int threads = omp_get_max_threads();
#else
int threads = 1;
#endif
t0 = std::chrono::high_resolution_clock::now();
auto yP = numerics::vecmat_omp<double>(x,A);
double tp = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - t0).count();
CHECK((yS.nearly_equal_vec(yP)), "vecmat_omp != vecmat_serial (large)");
if (threads > 1) {
CHECK(tp <= ts, "vecmat_omp unexpectedly much slower than serial");
}
}