Finishing up and starting lu decomp

This commit is contained in:
2025-09-13 21:44:20 +02:00
parent 320436ce98
commit 88087ea6a6
24 changed files with 1502 additions and 699 deletions
+17 -78
View File
@@ -5,100 +5,39 @@
#include "./utils/vector.h"
#include "./utils/matrix.h"
#include "./numerics/inverse/inverse_gauss_jordan.h"
#include "./numerics/inverse/inverse_lu.h"
#include <omp.h>
namespace numerics{
template <typename T>
void inplace_inverse(utils::Matrix<T>& A, std::string method = "Gauss-Jordan"){
if (A.rows() != A.cols()) {
throw std::runtime_error("inplace_inverse: non-square matrix");
}
if (method == "Gauss-Jordan"){
utils::Matrix<T> B(A.rows(),A.cols(), T{0});
uint64_t icol{0}, irow{0}, rows{A.rows()}, cols{A.cols()};
double big, dum, pivinv, temp;
utils::Vi indxc(rows,0), indxr(rows,0), ipiv(rows,0);
//for (uint64_t j = 0; j < N; ++j){ ipiv[j] = 0;}
for (uint64_t i = 0; i < rows; i++){
big = 0.0;
for (uint64_t j = 0; j < rows; j++){
if (ipiv[j] != 1){
for (uint64_t k = 0; k < rows; k++){
if (ipiv[k] == 0){
if (abs(A(j,k)) >= big){
big = abs(A(j,k));
irow = j;
icol = k;
}
}
}
}
}
ipiv[icol]++;
if (irow != icol){
for (uint64_t l = 0; l < rows; l++){ // SWAP
temp = A(irow,l);
A(irow,l) = A(icol,l);
A(icol,l) = temp;
}
for (uint64_t l = 0; l < cols; l++){ // SWAP temp matrix
temp = B(irow,l);
B(irow,l) = B(icol,l);
B(icol,l) = temp;
}
}
indxr[i] = irow;
indxc[i] = icol;
if (A(icol,icol) == 0.0){
throw std::runtime_error("utill:inplace_inverse('Gauss-Jordan' - Singular Matrix");
}
pivinv= 1.0/A(icol,icol);
A(icol,icol)=1.0;
for (uint64_t l = 0; l < rows; l++){
A(icol,l) *= pivinv;
}
for (uint64_t l = 0; l < cols; l++){
B(icol,l) *= pivinv;
}
for (uint64_t ll = 0; ll < rows; ll++){
if (ll != icol){
dum = A(ll,icol);
A(ll,icol) = 0;
for (uint64_t l = 0; l < rows; l++){
A(ll,l) -= A(icol,l)*dum;
}
for (uint64_t l = 0; l < rows; l++){
B(ll,l) -= B(icol,l)*dum;
}
}
}
}
//m = temp_m;
for (int64_t l = rows-1; l >= 0; l--){
if (indxr[l] != indxc[l]){
for (uint64_t k = 0; k < rows; k++){
temp = A(k,indxr[l]);
A(k,indxr[l]) = A(k,indxc[l]);
A(k,indxc[l]) = temp;
}
}
}
}
inverse_gj(A);
}
else{
throw std::runtime_error("numerics::inplace_inverse(" + method + ") - Not implemented yet \r \nImplemented: 'Gauss-Jordan',");
}
}
}
template <typename T>
utils::Matrix<T> inverse(utils::Matrix<T>& A, std::string method = "Gauss-Jordan"){
utils::Matrix<T> B = A;
inplace_inverse(B, method);
return B;
}
@@ -0,0 +1,94 @@
#ifndef _inverse_gj_n_
#define _inverse_gj_n_
#include "./utils/vector.h"
#include "./utils/matrix.h"
#include <omp.h>
namespace numerics{
template <typename T>
void inverse_gj(utils::Matrix<T>& A){
utils::Matrix<T> B(A.rows(),A.cols(), T{0});
uint64_t icol{0}, irow{0}, rows{A.rows()}, cols{A.cols()};
double big, dum, pivinv, temp;
utils::Vi indxc(rows,0), indxr(rows,0), ipiv(rows,0);
//for (uint64_t j = 0; j < N; ++j){ ipiv[j] = 0;}
for (uint64_t i = 0; i < rows; i++){
big = 0.0;
for (uint64_t j = 0; j < rows; j++){
if (ipiv[j] != 1){
for (uint64_t k = 0; k < rows; k++){
if (ipiv[k] == 0){
if (abs(A(j,k)) >= big){
big = abs(A(j,k));
irow = j;
icol = k;
}
}
}
}
}
ipiv[icol]++;
if (irow != icol){
for (uint64_t l = 0; l < rows; l++){ // SWAP
temp = A(irow,l);
A(irow,l) = A(icol,l);
A(icol,l) = temp;
}
for (uint64_t l = 0; l < cols; l++){ // SWAP temp matrix
temp = B(irow,l);
B(irow,l) = B(icol,l);
B(icol,l) = temp;
}
}
indxr[i] = irow;
indxc[i] = icol;
if (A(icol,icol) == 0.0){
throw std::runtime_error("utill:inplace_inverse('Gauss-Jordan' - Singular Matrix");
}
pivinv= 1.0/A(icol,icol);
A(icol,icol)=1.0;
for (uint64_t l = 0; l < rows; l++){
A(icol,l) *= pivinv;
}
for (uint64_t l = 0; l < cols; l++){
B(icol,l) *= pivinv;
}
for (uint64_t ll = 0; ll < rows; ll++){
if (ll != icol){
dum = A(ll,icol);
A(ll,icol) = 0;
for (uint64_t l = 0; l < rows; l++){
A(ll,l) -= A(icol,l)*dum;
}
for (uint64_t l = 0; l < rows; l++){
B(ll,l) -= B(icol,l)*dum;
}
}
}
}
//m = temp_m;
for (int64_t l = rows-1; l >= 0; l--){
if (indxr[l] != indxc[l]){
for (uint64_t k = 0; k < rows; k++){
temp = A(k,indxr[l]);
A(k,indxr[l]) = A(k,indxc[l]);
A(k,indxc[l]) = temp;
}
}
}
}
} // namespace numerics
#endif // _inverse_gj_n_
+83 -4
View File
@@ -3,10 +3,12 @@
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics{
// ---------------- Serial baseline ----------------
template <typename T>
utils::Matrix<T> matmul(const utils::Matrix<T>& A, const utils::Matrix<T>& B){
@@ -19,10 +21,8 @@ namespace numerics{
const uint64_t p = B.cols();
T tmp;
utils::Matrix<T> C(m, n, T{0});
utils::Matrix<T> C(m, p, T{0});
//#pragma omp parallel for collapse(2) schedule(static)
#pragma omp parallel for
for (uint64_t i = 0; i < m; ++i){
for (uint64_t j = 0; j < n; ++j){
tmp = A(i,j);
@@ -34,6 +34,85 @@ namespace numerics{
return C;
}
// ---------------- Rows-only OpenMP ----------------
template <typename T>
utils::Matrix<T> matmul_rows_omp(const utils::Matrix<T>& A,
const utils::Matrix<T>& B) {
if (A.cols() != B.rows()) throw std::runtime_error("matmul_rows_omp: dim mismatch");
const uint64_t m=A.rows(), n=A.cols(), p=B.cols();
utils::Matrix<T> C(m, p, T{0});
#pragma omp parallel for schedule(static)
for (uint64_t i=0;i<m;++i) {
for (uint64_t j=0;j<p;++j) {
T acc=T{0};
for (uint64_t k=0;k<n;++k) {
acc += A(i,k)*B(k,j);
}
C(i,j)=acc;
}
}
return C;
}
// -------------- Collapse(2) OpenMP ----------------
template <typename T>
utils::Matrix<T> matmul_collapse_omp(const utils::Matrix<T>& A,
const utils::Matrix<T>& B) {
if (A.cols() != B.rows()) throw std::runtime_error("matmul_collapse_omp: dim mismatch");
const uint64_t m=A.rows(), n=A.cols(), p=B.cols();
utils::Matrix<T> C(m, p, T{0});
#pragma omp parallel for collapse(2) schedule(static)
for (uint64_t i=0;i<m;++i) {
for (uint64_t j=0;j<p;++j) {
T acc=T{0};
for (uint64_t k=0;k<n;++k){
acc += A(i,k)*B(k,j);
}
C(i,j)=acc;
}
}
return C;
}
// -------------------- Auto selector ---------------------
template <typename T>
utils::Matrix<T> matmul_auto(const utils::Matrix<T>& A,
const utils::Matrix<T>& B) {
const uint64_t m=A.rows(), p=B.cols();
const uint64_t work = m * p;
bool can_parallel = omp_config::omp_parallel_allowed();
#ifdef _OPENMP
int threads = omp_get_max_threads();
#else
int threads = 1;
#endif
// Tiny problems: serial is cheapest.
if (!can_parallel || work < static_cast<uint64_t>(threads)*4ull) {
return matmul(A,B);
}
// Plenty of (i,j) work → collapse(2) is a great default.
else if (work >= 8ull * static_cast<uint64_t>(threads)) {
return matmul_collapse_omp(A,B);
}
// Many rows and very few columns → rows-only cheaper overhead.
else if (m >= static_cast<uint64_t>(threads) && p <= 4) {
return matmul_rows_omp(A,B);
}
else{
// Safe fallback
return matmul(A,B);
}
}
+97 -3
View File
@@ -3,11 +3,13 @@
#include "./utils/matrix.h"
#include "./core/omp_config.h"
namespace numerics{
// y = A * x, where A is (m×n) and x is length n and y is length m
// =================================================
// y = A * x (MatrixVector product)
// =================================================
template <typename T>
utils::Vector<T> matvec(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
if (A.cols() != x.size()) {
@@ -18,6 +20,27 @@ namespace numerics{
const uint64_t n = A.cols();
utils::Vector<T> y(m, T{0});
for (uint64_t i = 0; i < m; ++i) {
for (uint64_t j = 0; j < n; ++j) {
y[i] += A(i, j) * x[j];
}
}
return y;
}
// -------------- Collapse(2) OpenMP ----------------
template <typename T>
utils::Vector<T> matvec_omp(const utils::Matrix<T>& A, const utils::Vector<T>& x) {
if (A.cols() != x.size()) {
throw std::runtime_error("matvec: dimension mismatch");
}
const uint64_t m = A.rows();
const uint64_t n = A.cols();
utils::Vector<T> y(m, T{0});
#pragma omp parallel for schedule(static)
for (uint64_t i = 0; i < m; ++i) {
T acc = T{0};
for (uint64_t j = 0; j < n; ++j) {
@@ -28,7 +51,34 @@ namespace numerics{
return y;
}
// y = x * A, where x is length m and A is (m×n) -> y is length n
// -------------- Auto OpenMP ----------------
template <typename T>
utils::Vector<T> matvec_auto(const utils::Matrix<T>& A,
const utils::Vector<T>& x) {
uint64_t work = A.rows() * A.cols();
bool can_parallel = omp_config::omp_parallel_allowed();
#ifdef _OPENMP
int threads = omp_get_max_threads();
#else
int threads = 1;
#endif
if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
return matvec_omp(A,x);
}
else{
// Safe fallback
return matvec(A,x);
}
}
// =================================================
// y = x * A (VectorMatrix product)
// =================================================
template <typename T>
utils::Vector<T> vecmat(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
if (x.size() != A.rows()) {
@@ -38,6 +88,26 @@ namespace numerics{
const uint64_t n = A.cols();
utils::Vector<T> y(n, T{0});
for (uint64_t j = 0; j < n; ++j) {
for (uint64_t i = 0; i < m; ++i) {
y[j] += x[i] * A(i, j);
}
}
return y;
}
// -------------- Collapse(2) OpenMP ----------------
template <typename T>
utils::Vector<T> vecmat_omp(const utils::Vector<T>& x, const utils::Matrix<T>& A) {
if (x.size() != A.rows()) {
throw std::runtime_error("vecmat: dimension mismatch");
}
const uint64_t m = A.rows();
const uint64_t n = A.cols();
utils::Vector<T> y(n, T{0});
#pragma omp parallel for schedule(static)
for (uint64_t j = 0; j < n; ++j) {
T acc = T{0};
for (uint64_t i = 0; i < m; ++i) {
@@ -48,6 +118,30 @@ namespace numerics{
return y;
}
// -------------- Auto OpenMP ----------------
template <typename T>
utils::Vector<T> vecmat_auto(const utils::Vector<T>& x,
const utils::Matrix<T>& A) {
uint64_t work = A.rows() * A.cols();
bool can_parallel = omp_config::omp_parallel_allowed();
#ifdef _OPENMP
int threads = omp_get_max_threads();
#else
int threads = 1;
#endif
if (can_parallel || work > static_cast<uint64_t>(threads) * 4ull) {
return vecmat_omp(x,A);
}
else{
// Safe fallback
return vecmat(x,A);
}
}
} // namespace numerics
-22
View File
@@ -43,28 +43,6 @@ namespace numerics{
}
} // namespace numerics
#endif // _transpose_n_