Ready for fvm steady case

This commit is contained in:
2025-09-21 20:57:02 +02:00
parent 3a53b6ebf7
commit 513f071748
59 changed files with 1813 additions and 983 deletions
+3 -1
View File
@@ -3,6 +3,8 @@
#include "./utils/vector.h"
#include "./utils/matrix.h"
#include "./numerics/initializers/eye.h"
namespace decomp{
// Stores PA = LU with partial pivoting (row permutations).
@@ -150,7 +152,7 @@ namespace decomp{
}
void inplace_inverse(utils::Matrix<T>& Ainv){
Ainv.eye(rows);
numerics::inplace_eye<T>(Ainv);
inplace_solve(Ainv, Ainv);
}
+138
View File
@@ -0,0 +1,138 @@
#pragma once
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics {
template <typename T>
void inplace_eye(utils::Matrix<T>& A, uint64_t N = 0){
bool need_full_zero = true;
if (N != 0){
A.resize(N,N,T{0});
need_full_zero = false;
}else{
N = A.rows();
if (N != A.cols()) {
throw std::runtime_error("inplace_eye: non-square matrix");
}
}
// 1) Zero the whole matrix if we didn't just resize with zeros
if (need_full_zero){
for (uint64_t i = 0; i < N; ++i){
for (uint64_t j = 0; j < N; ++j){
if (i==j){
A(i,j) = T{1};
}else{
A(i,j) = T{0};
}
}
}
}else{
for (uint64_t i = 0; i < N; ++i){
A(i,i) = T{1};
}
}
}
template <typename T>
void inplace_eye_omp(utils::Matrix<T>& A, uint64_t N = 0){
bool need_full_zero = true;
if (N != 0){
A.resize(N,N,T{0});
need_full_zero = false;
}else{
N = A.rows();
if (N != A.cols()) {
throw std::runtime_error("inplace_eye_omp: non-square matrix");
}
}
// 1) Zero the whole matrix if we didn't just resize with zeros
if (need_full_zero){
T* ptr = A.data();
uint64_t NN = N*N;
#pragma omp parallel for schedule(static)
for (uint64_t i = 0; i < NN; ++i){
ptr[i] = T{0};
}
}
// 2) Set the diagonal to 1
#pragma omp parallel for schedule(static)
for (uint64_t i = 0; i < N; ++i){
A(i,i) = T{1};
}
}
template <typename T>
utils::Matrix<T> eye(uint64_t N){
utils::Matrix<T> A;
inplace_eye(A, N);
return A;
}
template <typename T>
utils::Matrix<T> eye_omp(uint64_t N){
utils::Matrix<T> A;
inplace_eye_omp(A, N);
return A;
}
template <typename T>
utils::Matrix<T> eye_omp_auto(uint64_t N){
uint64_t work = N*N;
utils::Matrix<T> A(N,N,T{0});
#ifdef _OPENMP
bool can_parallel = omp_config::omp_parallel_allowed();
uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
#else
bool can_parallel = false;
uint64_t threads = 1;
#endif
if (can_parallel || work > threads * 4ull) {
inplace_eye_omp(A, 0);
}
else{
// Safe fallback
inplace_eye(A, 0);
}
return A;
}
// Untested:
template <typename T>
void inplace_eye_omp_auto(utils::Matrix<T>& A, uint64_t N = 0){
uint64_t work = N*N;
#ifdef _OPENMP
bool can_parallel = omp_config::omp_parallel_allowed();
uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
#else
bool can_parallel = false;
uint64_t threads = 1;
#endif
if (can_parallel || work > threads * 4ull) {
inplace_eye_omp(A, 0);
}
else{
// Safe fallback
inplace_eye(A, 0);
}
}
} // namespace utils
+9
View File
@@ -0,0 +1,9 @@
#pragma once
//#include "./numerics/interpolation1d/interpolation1d_base.h"
#include "./numerics/interpolation1d/interpolation1d_barycentric.h"
#include "./numerics/interpolation1d/interpolation1d_cubic_spline.h"
#include "./numerics/interpolation1d/interpolation1d_linear.h"
#include "./numerics/interpolation1d/interpolation1d_polynomial.h"
#include "./numerics/interpolation1d/interpolation1d_rational.h"
@@ -1,6 +1,6 @@
#pragma once
#include "./numerics/interpolation1d_base.h"
#include "./numerics/interpolation1d/interpolation1d_base.h"
#include "./utils/vector.h"
#include "./numerics/min.h"
@@ -43,11 +43,9 @@ namespace numerics{
T interp(T x){
int64_t jlo;
if (cor){
std::cout << "Uses hunt()" << std::endl;
jlo = hunt(x);
}
else{
std::cout << "Uses locate()" << std::endl;
jlo = locate(x);
}
return rawinterp(jlo,x);
@@ -1,6 +1,6 @@
#pragma once
#include "./numerics/interpolation1d_base.h"
#include "./numerics/interpolation1d/interpolation1d_base.h"
//#include "./numerics/abs.h"
#include "./utils/vector.h"
@@ -1,6 +1,6 @@
#pragma once
#include "./numerics/interpolation1d_base.h"
#include "./numerics/interpolation1d/interpolation1d_base.h"
namespace numerics{
@@ -1,6 +1,6 @@
#pragma once
#include "./numerics/interpolation1d_base.h"
#include "./numerics/interpolation1d/interpolation1d_base.h"
#include "./numerics/abs.h"
#include "./utils/vector.h"
@@ -1,6 +1,6 @@
#pragma once
#include "./numerics/interpolation1d_base.h"
#include "./numerics/interpolation1d/interpolation1d_base.h"
#include "./utils/vector.h"
#include "./numerics/abs.h"
@@ -5,6 +5,8 @@
#include "./utils/vector.h"
#include "./utils/matrix.h"
#include "./numerics/initializers/eye.h"
#include <omp.h>
namespace numerics{
@@ -13,7 +15,7 @@ namespace numerics{
void inverse_gj(utils::Matrix<T>& A){
//utils::Matrix<T> B(A.rows(),A.cols(), T{0});
utils::Matrix<T> B;
B.eye(A.rows());
B = eye_omp_auto<T>(A.rows());
uint64_t icol{0}, irow{0}, rows{A.rows()}, cols{A.cols()};
-2
View File
@@ -3,8 +3,6 @@
#include "./decomp/lu.h"
namespace numerics{
template <typename T>
+85
View File
@@ -0,0 +1,85 @@
#pragma once
#include "./core/omp_config.h"
#include "./utils/matrix.h"
#include "./numerics/abs.h"
namespace numerics{
// -------------- Serial ----------------
template <typename T>
bool matequal(const utils::Matrix<T>& A, const utils::Matrix<T>& B, double tol = 1e-9) {
if (A.rows() != B.rows() || A.cols() != B.cols()) {
return false;
}
bool decimal = std::is_floating_point<T>::value;
const uint64_t rows=A.rows(), cols=A.cols();
for (uint64_t i = 0; i < rows; ++i)
for (uint64_t j = 0; j < cols; ++j)
if (decimal) {
if (numerics::abs(A(i,j) - B(i,j)) > static_cast<T>(tol)){
return false;
}
} else {
if (A(i,j) != B(i,j)){
return false;
}
}
return true;
}
// -------------- Parallel ----------------
template <typename T>
bool matequal_omp(const utils::Matrix<T>& A, const utils::Matrix<T>& B, double tol = 1e-9) {
if (A.rows() != B.rows() || A.cols() != B.cols()) {
return false;
}
bool decimal = std::is_floating_point<T>::value;
bool eq = true;
const uint64_t rows=A.rows(), cols=A.cols();
#pragma omp parallel for collapse(2) schedule(static) reduction(&&:eq)
for (uint64_t i = 0; i < rows; ++i)
for (uint64_t j = 0; j < cols; ++j)
if (decimal) {
eq = eq && (numerics::abs(A(i,j) - B(i,j)) <= static_cast<T>(tol));
} else {
eq = eq && (A(i,j) == B(i,j));
}
return eq;
}
// -------------- Auto OpenMP ----------------
template <typename T>
bool matequal_auto(const utils::Matrix<T>& A, const utils::Matrix<T>& B, double tol = 1e-9) {
if (A.rows() != B.rows() || A.cols() != B.cols()) {
return false;
}
uint64_t work = A.rows() * A.cols();
#ifdef _OPENMP
bool can_parallel = omp_config::omp_parallel_allowed();
uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
#else
bool can_parallel = false;
uint64_t threads = 1;
#endif
if (can_parallel || work > threads * 4ull) {
return matequal_omp(A,B,tol);
}
else{
// Safe fallback
return matequal(A,B,tol);
}
}
} // namespace numerics
+7 -5
View File
@@ -85,21 +85,23 @@ utils::Matrix<T> matmul_auto(const utils::Matrix<T>& A,
const uint64_t m=A.rows(), p=B.cols();
const uint64_t work = m * p;
bool can_parallel = omp_config::omp_parallel_allowed();
#ifdef _OPENMP
int threads = omp_get_max_threads();
bool can_parallel = omp_config::omp_parallel_allowed();
uint64_t threads = static_cast<uint64_t>(omp_get_max_threads());
#else
int threads = 1;
bool can_parallel = false;
uint64_t threads = 1;
#endif
// Tiny problems: serial is cheapest.
if (!can_parallel || work < static_cast<uint64_t>(threads)*4ull) {
if (!can_parallel || work < threads*4ull) {
return matmul(A,B);
}
// Plenty of (i,j) work → collapse(2) is a great default.
else if (work >= 8ull * static_cast<uint64_t>(threads)) {
else if (work >= 8ull * threads) {
return matmul_collapse_omp(A,B);
}
// Many rows and very few columns → rows-only cheaper overhead.
+12 -4
View File
@@ -38,16 +38,24 @@ namespace numerics{
const uint64_t m = A.rows();
const uint64_t n = A.cols();
utils::Vector<T> y(m, T{0});
utils::Vector<T> y(m, T{0}); // <-- y has length m (rows)
const T* xptr = x.data();
const T* Aptr = A.data(); // row-major: A(i,j) == Aptr[i*n + j]
// Each row i is an independent dot product: y[i] = dot(A[i,*], x)
#pragma omp parallel for schedule(static)
for (uint64_t i = 0; i < m; ++i) {
T acc = T{0};
const T* row = Aptr + i * n; // contiguous row i
T acc = T{0};
#pragma omp simd reduction(+:acc)
for (uint64_t j = 0; j < n; ++j) {
acc += A(i, j) * x[j];
acc += row[j] * xptr[j];
}
y[i] = acc;
}
}
return y;
}
+2
View File
@@ -1,6 +1,8 @@
// "./numerics/numerics.h"
#pragma once
#include "./numerics/initializers/eye.h"
#include "./numerics/matequal.h"
#include "./numerics/transpose.h"
#include "./numerics/inverse.h"
#include "./numerics/matmul.h"
+107 -2
View File
@@ -3,12 +3,13 @@
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics{
template <typename T>
void inplace_transpose(utils::Matrix<T>& A){
void inplace_transpose_square(utils::Matrix<T>& A){
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
@@ -27,13 +28,54 @@ namespace numerics{
}
}
template <typename T>
void inplace_transpose_square_omp(utils::Matrix<T>& A){
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
if (rows != cols){
throw std::runtime_error("inplace_transpose only valid for square matrices");
}
#pragma omp parallel for schedule(static)
for (uint64_t i = 0; i < rows; ++i){
for (uint64_t j = i + 1; j < cols; ++j){
T tmp = A(j,i);
A(j,i) = A(i,j);
A(i,j) = tmp;
//std::swap(A(j,i), A(i,j));
}
}
}
template <typename T>
utils::Matrix<T> transpose(const utils::Matrix<T>& A){
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
utils::Matrix<T> B(cols, rows, T{});
utils::Matrix<T> B(cols, rows, T{0});
for (uint64_t i = 0; i < rows; ++i){
for (uint64_t j = 0; j < cols; ++j){
B(j,i) = A(i,j);
}
}
return B;
}
template <typename T>
utils::Matrix<T> transpose_omp(const utils::Matrix<T>& A){
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
utils::Matrix<T> B(cols, rows, T{0});
#pragma omp parallel for collapse(2) schedule(static)
for (uint64_t i = 0; i < rows; ++i){
for (uint64_t j = 0; j < cols; ++j){
B(j,i) = A(i,j);
@@ -43,6 +85,69 @@ namespace numerics{
}
// -------- Auto selectors --------
template <typename T>
void inplace_transpose_square_auto(utils::Matrix<T>& A) {
const uint64_t rows = A.rows(), cols = A.cols();
if (rows != cols) {
throw std::runtime_error("inplace_transpose_auto: only valid for square matrices");
}
const std::uint64_t work = static_cast<std::uint64_t>((rows * (rows - 1)) / 2); // number of swaps
#ifdef _OPENMP
bool can_parallel = omp_config::omp_parallel_allowed();
uint64_t threads = static_cast<std::uint64_t>(omp_get_max_threads());
#else
bool can_parallel = false;
uint64_t threads = 1;
#endif
if (can_parallel && work > threads * 4ull) {
inplace_transpose_square_omp(A);
}else {
inplace_transpose_square(A);
}
}
template <typename T>
utils::Matrix<T> transpose_auto(const utils::Matrix<T>& A) {
const uint64_t rows = A.rows();
const uint64_t cols = A.cols();
uint64_t work = A.rows() * A.cols();
if (rows==cols){
utils::Matrix<T> B(rows, cols, T{0});
inplace_transpose_square_auto(B);
return B;
}
#ifdef _OPENMP
bool can_parallel = omp_config::omp_parallel_allowed();
uint64_t threads = static_cast<std::uint64_t>(omp_get_max_threads());
#else
bool can_parallel = false;
uint64_t threads = 1;
#endif
if (!can_parallel || work > threads * 4ull) {
return transpose_omp(A);
} else {
return transpose(A);
}
}
} // namespace numerics
#endif // _transpose_n_
+6 -37
View File
@@ -3,6 +3,11 @@
#include "./utils/vector.h"
#ifdef _OPENMP
#include <omp.h>
#endif
#include <iomanip>
namespace utils{
@@ -32,42 +37,6 @@ public:
T* data() noexcept { return data_.data(); }
const T* data() const noexcept { return data_.data(); }
//# MATRIX: equal operator #
bool operator==(const Matrix<T>& A) const {
if (rows_ != A.rows_ || cols_ != A.cols_) return false;
for (uint64_t i = 0; i < rows_; ++i)
for (uint64_t j = 0; j < cols_; ++j)
if (data_[i*cols_ + j] != A(i,j))
return false;
return true;
}
bool operator!=(const Matrix<T>& A) const {
return !(*this == A);
}
bool nearly_equal(const Matrix<T>& A, T tol = static_cast<T>(1e-9)) const {
if (rows_ != A.rows() || cols_ != A.cols()) return false;
for (uint64_t i = 0; i < rows_; ++i)
for (uint64_t j = 0; j < cols_; ++j) {
T a = (*this)(i,j);
T b = A(i,j);
if (std::is_floating_point<T>::value) {
if (std::fabs(a - b) > tol) return false;
} else {
if (a != b) return false;
}
}
return true;
}
void eye(uint64_t rows_cols){
rows_ = cols_ = rows_cols;
data_.clear();
data_.resize(rows_cols*rows_cols, T{0});
for (uint64_t i = 0; i < rows_; ++i){
data_[i * cols_ + i] = T{1};
}
}
void resize(uint64_t rows, uint64_t cols, const T& value = T(0)){
rows_ = rows;
@@ -186,7 +155,7 @@ private:
std::vector<T> data_;
};
typedef Matrix<int> Mi;
typedef Matrix<int64_t> Mi;
typedef Matrix<float> Mf;
typedef Matrix<double> Md;
@@ -0,0 +1,22 @@
#pragma once
#include "./utils/matrix.h"
#include "./numerics/matequal.h"
namespace utils {
// definitions of the previously-declared members
template <typename T>
inline bool Matrix<T>::equals(const Matrix<T>& B, T tol) const {
return numerics::matequal(*this, B, tol);
}
template <typename T>
inline bool Matrix<T>::equals_omp(const Matrix<T>& B, T tol) const {
return numerics::matequal_omp(*this, B, tol);
}
template <typename T>
inline bool Matrix<T>::equals_auto(const Matrix<T>& B, T tol) const {
return numerics::matequal_auto(*this, B, tol);
}
} // namespace utils
+9
View File
@@ -6,6 +6,11 @@
#include <random>
#include <cstdint>
#include <type_traits>
#include <stdexcept>
#include <cmath>
namespace utils{
//######################################
//# VECTOR TYPE #
@@ -49,6 +54,10 @@ public:
v.resize(new_size, value);
}
T* data() noexcept { return v.data(); }
const T* data() const noexcept { return v.data(); }
//###########################################
//# VECTOR: == and != #
//###########################################