Sync public subset from Flux
This commit is contained in:
236
test/test_matvec.cpp
Normal file
236
test/test_matvec.cpp
Normal file
@@ -0,0 +1,236 @@
|
||||
|
||||
#include "test_common.h"
|
||||
|
||||
#include "./numerics/matvec.h" // numerics::matvec / inplace_transpose
|
||||
#include <chrono>
|
||||
|
||||
using utils::Vi; using utils::Vf; using utils::Vd;
|
||||
using utils::Mi; using utils::Mf; using utils::Md;
|
||||
|
||||
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// matvec: y = A * x
|
||||
// ------------------------------------------------------------
|
||||
TEST_CASE(Matvec_Serial_Simple) {
|
||||
// A = [[1,2,3],
|
||||
// [4,5,6]]
|
||||
Md A(2,3,0.0);
|
||||
A(0,0)=1; A(0,1)=2; A(0,2)=3;
|
||||
A(1,0)=4; A(1,1)=5; A(1,2)=6;
|
||||
Vd x(3,0.0); x[0]=7; x[1]=8; x[2]=9;
|
||||
|
||||
auto y = numerics::matvec<double>(A,x); // [ 1*7+2*8+3*9 , 4*7+5*8+6*9 ] = [50, 122]
|
||||
CHECK(y.size()==2, "matvec size wrong");
|
||||
CHECK(y[0]==50.0 && y[1]==122.0, "matvec values wrong");
|
||||
}
|
||||
|
||||
TEST_CASE(Matvec_OMP_Equals_Serial) {
|
||||
Md A(3,3,0.0);
|
||||
// A = I * 2
|
||||
for (uint64_t i=0;i<3;++i) A(i,i)=2.0;
|
||||
Vd x(3,0.0); x[0]=1; x[1]=2; x[2]=3;
|
||||
|
||||
auto ys = numerics::matvec<double>(A,x);
|
||||
auto yp = numerics::matvec_omp<double>(A,x);
|
||||
|
||||
CHECK((ys.nearly_equal_vec(yp)), "matvec_omp != matvec");
|
||||
}
|
||||
|
||||
TEST_CASE(Matvec_Auto_Equals_Serial) {
|
||||
Md A(2,2,0.0); A(0,0)=2; A(0,1)=1; A(1,0)=0.5; A(1,1)=3;
|
||||
Vd x(2,0.0); x[0]=4; x[1]=5;
|
||||
|
||||
auto ys = numerics::matvec<double>(A,x);
|
||||
auto ya = numerics::matvec_auto<double>(A,x);
|
||||
|
||||
CHECK((ys.nearly_equal_vec(ya)), "matvec_auto != serial");
|
||||
}
|
||||
|
||||
TEST_CASE(Matvec_DimensionMismatch_Throws) {
|
||||
Md A(2,3,0.0);
|
||||
Vd x(4,0.0);
|
||||
bool threw=false;
|
||||
try { auto _ = numerics::matvec<double>(A,x); (void)_; }
|
||||
catch (const std::runtime_error&) { threw=true; }
|
||||
CHECK(threw, "matvec must throw on dimension mismatch");
|
||||
}
|
||||
|
||||
TEST_CASE(Matvec_Zero_Edges) {
|
||||
Md A(0,3,0.0); // 0x3
|
||||
Vd x(3,1.0);
|
||||
auto y = numerics::matvec<double>(A,x);
|
||||
CHECK(y.size()==0, "0xN * x should return size 0 vector");
|
||||
|
||||
Md B(2,0,0.0); // 2x0
|
||||
Vd z(0,0.0);
|
||||
auto y2 = numerics::matvec<double>(B,z);
|
||||
CHECK(y2.size()==2 && y2[0]==0.0 && y2[1]==0.0, "N×0 * 0 should return zeros of size N");
|
||||
}
|
||||
|
||||
TEST_CASE(Matvec_Float_Tolerance) {
|
||||
Mf A(2,2,0.0f); A(0,0)=1.0f; A(0,1)=2.0f; A(1,0)=3.0f; A(1,1)=4.0f;
|
||||
Vf x(2,0.0f); x[0]=0.1f; x[1]=0.2f;
|
||||
|
||||
auto y1 = numerics::matvec<float>(A,x);
|
||||
auto y2 = numerics::matvec_omp<float>(A,x);
|
||||
|
||||
CHECK((y1.nearly_equal_vec(y2,1e-6f)), "matvec float omp mismatch");
|
||||
}
|
||||
//
|
||||
// ---------- Auto inside an outer parallel region (no accidental nested teams) ----------
|
||||
// We just check correctness; performance is environment-dependent.
|
||||
//
|
||||
TEST_CASE(Matvec_Auto_Inside_Outer_Parallel_Correctness) {
|
||||
const uint64_t m=64, n=64;
|
||||
Md A(m,n,1.0); Vd x(n,2.0);
|
||||
//fill_deterministic(A); fill_deterministic(x);
|
||||
Vd ref = numerics::matvec<double>(A,x);
|
||||
|
||||
// Call auto inside an outer team
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for schedule(static)
|
||||
#endif
|
||||
for (int rep=0; rep<32; ++rep) {
|
||||
auto y = numerics::matvec_auto<double>(A,x);
|
||||
// Each thread checks its own result equals reference
|
||||
if (!(y.nearly_equal_vec(ref))) {
|
||||
throw TestFailure("matvec_auto wrong under outer parallel region");
|
||||
}
|
||||
}
|
||||
}
|
||||
TEST_CASE(Matvec_Speed_Sanity) {
|
||||
const uint64_t m=4096, n=4096; // ~16M MACs; adjust if needed
|
||||
Md A(m,n,1.0); Vd x(n,2.0);
|
||||
//fill_deterministic(A); fill_deterministic(x);
|
||||
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
auto yS = numerics::matvec(A,x);
|
||||
double tp = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - t0).count();
|
||||
|
||||
#ifdef _OPENMP
|
||||
int threads = omp_get_max_threads();
|
||||
#else
|
||||
int threads = 1;
|
||||
#endif
|
||||
|
||||
t0 = std::chrono::high_resolution_clock::now();
|
||||
auto yP = numerics::matvec_omp(A,x);
|
||||
double ts = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - t0).count();
|
||||
|
||||
CHECK((yS.nearly_equal_vec(yP)), "matvec_omp != matvec_serial (large)");
|
||||
// Only enforce basic sanity if we *can* use >1 threads:
|
||||
if (threads > 1) {
|
||||
// Be generous: just require not significantly slower.
|
||||
CHECK(tp >= ts, "matvec_omp unexpectedly much slower than serial");
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// vecmat: y = x * A
|
||||
// ------------------------------------------------------------
|
||||
TEST_CASE(Vecmat_Serial_Simple) {
|
||||
// A = [[1,2],
|
||||
// [3,4],
|
||||
// [5,6]] (3x2)
|
||||
Md A(3,2,0.0);
|
||||
A(0,0)=1; A(0,1)=2;
|
||||
A(1,0)=3; A(1,1)=4;
|
||||
A(2,0)=5; A(2,1)=6;
|
||||
|
||||
Vd x(3,0.0); x[0]=7; x[1]=8; x[2]=9;
|
||||
|
||||
auto y = numerics::vecmat<double>(x,A); // 1*7+3*8+5*9= 76 ; 2*7+4*8+6*9=100
|
||||
CHECK(y.size()==2, "vecmat size wrong");
|
||||
CHECK(y[0]==76.0 && y[1]==100.0, "vecmat values wrong");
|
||||
}
|
||||
|
||||
TEST_CASE(Vecmat_OMP_Equals_Serial) {
|
||||
Md A(2,2,0.0); A(0,0)=2; A(0,1)=1; A(1,0)=5; A(1,1)=-1;
|
||||
Vd x(2,0.0); x[0]=0.5; x[1]=1.5;
|
||||
|
||||
auto ys = numerics::vecmat<double>(x,A);
|
||||
auto yp = numerics::vecmat_omp<double>(x,A);
|
||||
|
||||
CHECK((ys.nearly_equal_vec(yp)), "vecmat_omp != vecmat");
|
||||
}
|
||||
|
||||
TEST_CASE(Vecmat_Auto_Equals_Serial) {
|
||||
Md A(2,3,0.0);
|
||||
A(0,0)=1; A(0,1)=2; A(0,2)=3;
|
||||
A(1,0)=4; A(1,1)=5; A(1,2)=6;
|
||||
Vd x(2,0.0); x[0]=1; x[1]=2;
|
||||
|
||||
auto ys = numerics::vecmat<double>(x,A);
|
||||
auto ya = numerics::vecmat_auto<double>(x,A);
|
||||
|
||||
CHECK((ys.nearly_equal_vec(ya)), "vecmat_auto != serial");
|
||||
}
|
||||
|
||||
TEST_CASE(Vecmat_DimensionMismatch_Throws) {
|
||||
Md A(2,2,0.0);
|
||||
Vd x(3,0.0);
|
||||
bool threw=false;
|
||||
try { auto _ = numerics::vecmat<double>(x,A); (void)_; }
|
||||
catch (const std::runtime_error&) { threw=true; }
|
||||
CHECK(threw, "vecmat must throw on dimension mismatch");
|
||||
}
|
||||
|
||||
TEST_CASE(Vecmat_Zero_Edges) {
|
||||
Md A(0,3,0.0);
|
||||
Vd x(0,0.0);
|
||||
auto y = numerics::vecmat<double>(x,A); // 0×N times N×M → 0×M
|
||||
CHECK(y.size()==3 && y[0]==0.0 && y[1]==0.0 && y[2]==0.0, "0-length x times A wrong");
|
||||
|
||||
Md B(3,0,0.0);
|
||||
Vd z(3,1.0);
|
||||
auto y2 = numerics::vecmat<double>(z,B); // 1x3 * 3x0 → 1x0
|
||||
CHECK(y2.size()==0, "vecmat with N×0 result size wrong");
|
||||
}
|
||||
|
||||
//
|
||||
// ---------- Auto inside an outer parallel region (no accidental nested teams) ----------
|
||||
// We just check correctness; performance is environment-dependent.
|
||||
//
|
||||
TEST_CASE(Vecmat_Auto_Inside_Outer_Parallel_Correctness) {
|
||||
const uint64_t m=64, n=64;
|
||||
Md A(m,n,1.0); Vd x(m,2.0);
|
||||
//fill_deterministic(A); fill_deterministic(x);
|
||||
Vd ref = numerics::vecmat<double>(x,A);
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for schedule(static)
|
||||
#endif
|
||||
for (int rep=0; rep<32; ++rep) {
|
||||
auto y = numerics::vecmat_auto<double>(x,A);
|
||||
if (!(y.nearly_equal_vec(ref))) {
|
||||
throw TestFailure("vecmat_auto wrong under outer parallel region");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE(Vecmat_Speed_Sanity) {
|
||||
const uint64_t m=4096, n=4096;
|
||||
Md A(m,n,1.0); Vd x(m,2.0);
|
||||
//fill_deterministic(A); fill_deterministic(x);
|
||||
|
||||
auto t0 = std::chrono::high_resolution_clock::now();
|
||||
auto yS = numerics::vecmat<double>(x,A);
|
||||
double ts = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - t0).count();
|
||||
|
||||
#ifdef _OPENMP
|
||||
int threads = omp_get_max_threads();
|
||||
#else
|
||||
int threads = 1;
|
||||
#endif
|
||||
|
||||
t0 = std::chrono::high_resolution_clock::now();
|
||||
auto yP = numerics::vecmat_omp<double>(x,A);
|
||||
double tp = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - t0).count();
|
||||
|
||||
CHECK((yS.nearly_equal_vec(yP)), "vecmat_omp != vecmat_serial (large)");
|
||||
if (threads > 1) {
|
||||
CHECK(tp <= ts, "vecmat_omp unexpectedly much slower than serial");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user