Ready for fvm steady case
This commit is contained in:
@@ -85,21 +85,23 @@ utils::Matrix<T> matmul_auto(const utils::Matrix<T>& A,
|
||||
const uint64_t m=A.rows(), p=B.cols();
|
||||
const uint64_t work = m * p;
|
||||
|
||||
bool can_parallel = omp_config::omp_parallel_allowed();
|
||||
|
||||
|
||||
#ifdef _OPENMP
|
||||
int threads = omp_get_max_threads();
|
||||
bool can_parallel = omp_config::omp_parallel_allowed();
|
||||
uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
|
||||
#else
|
||||
int threads = 1;
|
||||
bool can_parallel = false;
|
||||
uint64_t threads = 1;
|
||||
#endif
|
||||
|
||||
|
||||
// Tiny problems: serial is cheapest.
|
||||
if (!can_parallel || work < static_cast<uint64_t>(threads)*4ull) {
|
||||
if (!can_parallel || work < threads*4ull) {
|
||||
return matmul(A,B);
|
||||
}
|
||||
// Plenty of (i,j) work → collapse(2) is a great default.
|
||||
else if (work >= 8ull * static_cast<uint64_t>(threads)) {
|
||||
else if (work >= 8ull * threads) {
|
||||
return matmul_collapse_omp(A,B);
|
||||
}
|
||||
// Many rows and very few columns → rows-only cheaper overhead.
|
||||
|
||||
Reference in New Issue
Block a user