diff --git a/bin/abc_lab b/bin/abc_lab index 92b6239..05cd2c8 100755 Binary files a/bin/abc_lab and b/bin/abc_lab differ diff --git a/include/core/omp_config.h b/include/core/omp_config.h new file mode 100644 index 0000000..b452cdf --- /dev/null +++ b/include/core/omp_config.h @@ -0,0 +1,34 @@ + + +#pragma once +#include + + +// Configure OpenMP behavior at runtime. +inline void omp_configure(int max_active_levels, + bool dynamic_threads, + const std::vector& threads_per_level = {}, + bool bind_close = true) +{ + // 1) Allow nested parallel regions (levels of teams) + // Example: outer #pragma omp parallel ... { inner #pragma omp parallel ... } + omp_set_max_active_levels(max_active_levels); // 1 = only top-level; 2+ enables nesting + + // 2) Let the runtime shrink/grow thread counts if it thinks it should + // (helps avoid oversubscription when you accidentally ask for too many threads) + omp_set_dynamic(dynamic_threads ? 1 : 0); + + // 3) Thread binding (keep threads near their cores) is controlled via env vars, + // so here we just *recommend* a good default (see below). You *can* setenv() + // in code, but it’s cleaner to do it outside the program. + (void)bind_close; // documented below in env var section + + // 4) Top-level default thread count (inner levels are usually set per region) + if (!threads_per_level.empty()) { + omp_set_num_threads(threads_per_level[0]); // e.g. 16 for the outermost team + // Inner levels: + // - Use num_threads(threads_per_level[L]) on the inner #pragma omp parallel + // - or set OMP_NUM_THREADS="outer,inner,inner2" as an environment variable + } +} + diff --git a/include/numerics/inverse.h b/include/numerics/inverse.h new file mode 100644 index 0000000..031c904 --- /dev/null +++ b/include/numerics/inverse.h @@ -0,0 +1,113 @@ +#ifndef _inverse_n_ +#define _inverse_n_ + + +#include "./utils/vector.h" +#include "./utils/matrix.h" + + +namespace numerics{ + + template + void inplace_inverse(utils::Matrix& A, std::string method = "Gauss-Jordan"){ + if (method == "Gauss-Jordan"){ + + utils::Matrix B(A.rows(),A.cols(), T{0}); + + + uint64_t icol{0}, irow{0}, rows{A.rows()}, cols{A.cols()}; + double big, dum, pivinv, temp; + utils::Vi indxc(rows,0), indxr(rows,0), ipiv(rows,0); + + //for (uint64_t j = 0; j < N; ++j){ ipiv[j] = 0;} + for (uint64_t i = 0; i < rows; i++){ + big = 0.0; + for (uint64_t j = 0; j < rows; j++){ + if (ipiv[j] != 1){ + for (uint64_t k = 0; k < rows; k++){ + if (ipiv[k] == 0){ + if (abs(A(j,k)) >= big){ + big = abs(A(j,k)); + irow = j; + icol = k; + } + } + } + } + } + ipiv[icol]++; + if (irow != icol){ + for (uint64_t l = 0; l < rows; l++){ // SWAP + temp = A(irow,l); + A(irow,l) = A(icol,l); + A(icol,l) = temp; + } + for (uint64_t l = 0; l < cols; l++){ // SWAP temp matrix + temp = B(irow,l); + B(irow,l) = B(icol,l); + B(icol,l) = temp; + } + } + + indxr[i] = irow; + indxc[i] = icol; + if (A(icol,icol) == 0.0){ + throw std::runtime_error("utill:inplace_inverse('Gauss-Jordan' - Singular Matrix"); + } + pivinv= 1.0/A(icol,icol); + A(icol,icol)=1.0; + + for (uint64_t l = 0; l < rows; l++){ + A(icol,l) *= pivinv; + } + for (uint64_t l = 0; l < cols; l++){ + B(icol,l) *= pivinv; + } + for (uint64_t ll = 0; ll < rows; ll++){ + if (ll != icol){ + dum = A(ll,icol); + A(ll,icol) = 0; + for (uint64_t l = 0; l < rows; l++){ + A(ll,l) -= A(icol,l)*dum; + } + for (uint64_t l = 0; l < rows; l++){ + B(ll,l) -= B(icol,l)*dum; + } + } + } + + } + //m = temp_m; + for (int64_t l = rows-1; l >= 0; l--){ + if (indxr[l] != indxc[l]){ + for (uint64_t k = 0; k < rows; k++){ + temp = A(k,indxr[l]); + A(k,indxr[l]) = A(k,indxc[l]); + A(k,indxc[l]) = temp; + } + } + } + } + else{ + throw std::runtime_error("numerics::inplace_inverse(" + method + ") - Not implemented yet \r \nImplemented: 'Gauss-Jordan',"); + } + } + + + + template + utils::Matrix inverse(utils::Matrix& A, std::string method = "Gauss-Jordan"){ + utils::Matrix B = A; + inplace_inverse(B, method); + return B; + } + + + + + + + +} // namespace numerics + +#endif // _inverse_n_ \ No newline at end of file diff --git a/include/numerics/matmul.h b/include/numerics/matmul.h new file mode 100644 index 0000000..f7026e3 --- /dev/null +++ b/include/numerics/matmul.h @@ -0,0 +1,42 @@ +#ifndef _matmul_n_ +#define _matmul_n_ + + +#include "./utils/matrix.h" + + +namespace numerics{ + + template + utils::Matrix matmul(const utils::Matrix& A, const utils::Matrix& B){ + + if(A.cols() != B.rows()){ + throw std::runtime_error("matmul: dimension mismatch"); + } + + const uint64_t m = A.rows(); + const uint64_t n = A.cols(); // also B.rows() + const uint64_t p = B.cols(); + T tmp; + + utils::Matrix C(m, n, T{0}); + + //#pragma omp parallel for collapse(2) schedule(static) + #pragma omp parallel for + for (uint64_t i = 0; i < m; ++i){ + for (uint64_t j = 0; j < n; ++j){ + tmp = A(i,j); + for (uint64_t k = 0; k < p; ++k){ + C(i,k) += tmp * B(j,k); + } + } + } + return C; + } + + + + +} // namespace numerics + +#endif // _matmul_n_ \ No newline at end of file diff --git a/include/numerics/matvec.h b/include/numerics/matvec.h new file mode 100644 index 0000000..0f9d50c --- /dev/null +++ b/include/numerics/matvec.h @@ -0,0 +1,54 @@ +#ifndef _matvec_n_ +#define _matvec_n_ + + +#include "./utils/matrix.h" + + +namespace numerics{ + + // y = A * x, where A is (m×n) and x is length n and y is length m + template + utils::Vector matvec(const utils::Matrix& A, const utils::Vector& x) { + if (A.cols() != x.size()) { + throw std::runtime_error("matvec: dimension mismatch"); + } + + const uint64_t m = A.rows(); + const uint64_t n = A.cols(); + + utils::Vector y(m, T{0}); + for (uint64_t i = 0; i < m; ++i) { + T acc = T{0}; + for (uint64_t j = 0; j < n; ++j) { + acc += A(i, j) * x[j]; + } + y[i] = acc; + } + return y; + } + + // y = x * A, where x is length m and A is (m×n) -> y is length n + template + utils::Vector vecmat(const utils::Vector& x, const utils::Matrix& A) { + if (x.size() != A.rows()) { + throw std::runtime_error("vecmat: dimension mismatch"); + } + const uint64_t m = A.rows(); + const uint64_t n = A.cols(); + + utils::Vector y(n, T{0}); + for (uint64_t j = 0; j < n; ++j) { + T acc = T{0}; + for (uint64_t i = 0; i < m; ++i) { + acc += x[i] * A(i, j); + } + y[j] = acc; + } + return y; + } + + +} // namespace numerics + +#endif // _matvec_n_ \ No newline at end of file diff --git a/include/numerics/numerics.h b/include/numerics/numerics.h new file mode 100644 index 0000000..34c22af --- /dev/null +++ b/include/numerics/numerics.h @@ -0,0 +1,7 @@ +// "./numerics/numerics.h" +#pragma once + +#include "./numerics/transpose.h" +#include "./numerics/inverse.h" +#include "./numerics/matmul.h" +#include "./numerics/matvec.h" \ No newline at end of file diff --git a/include/numerics/transpose.h b/include/numerics/transpose.h new file mode 100644 index 0000000..efdca2e --- /dev/null +++ b/include/numerics/transpose.h @@ -0,0 +1,70 @@ +#ifndef _transpose_n_ +#define _transpose_n_ + + +#include "./utils/matrix.h" + + +namespace numerics{ + + template + void inplace_transpose(utils::Matrix& A){ + + const uint64_t rows = A.rows(); + const uint64_t cols = A.cols(); + + if (rows != cols){ + throw std::runtime_error("inplace_transpose only valid for square matrices"); + } + + for (uint64_t i = 0; i < rows; ++i){ + for (uint64_t j = i + 1; j < cols; ++j){ + T tmp = A(j,i); + A(j,i) = A(i,j); + A(i,j) = tmp; + //std::swap(A(j,i), A(i,j)); + } + } + } + + template + utils::Matrix transpose(const utils::Matrix& A){ + + const uint64_t rows = A.rows(); + const uint64_t cols = A.cols(); + + utils::Matrix B(cols, rows, T{}); + for (uint64_t i = 0; i < rows; ++i){ + for (uint64_t j = 0; j < cols; ++j){ + B(j,i) = A(i,j); + } + } + return B; + } + + + + + + + + + + + + + +} // namespace numerics + + + + + + + + + + + + +#endif // _transpose_n_ \ No newline at end of file diff --git a/include/utils/Grid1D.h b/include/utils/Grid1D.h deleted file mode 100644 index bd91615..0000000 --- a/include/utils/Grid1D.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef _grid1d_n_ -#define _grid1d_n_ - -#include "./utils/matrix.h" - - -namespace utils{ - //####################################### - //# Grid1D TYPE # - //####################################### - template - struct Grid1D{ - utils::Vector grid; - utils::Vector vertices; - utils::Vector vertices_norm; - - void create_vertices_norm(){ - vertices_norm.fill(vertices.size()*2, 0); - uint64_t k = 0; - for (uint64_t i = 0; i < grid.size(); i++){ - for (uint64_t j = 1; j <= 2; j++){ - vertices_norm[k] = grid[i] - vertices[i+j]; - k++; - } - - //vertices_norm[(i*2)+1] = grid[i] - vertices[(i*2)+1]; - } - vertices_norm.print(); - } -}; - - - typedef Grid1D Gridi; - typedef Grid1D Gridf; - typedef Grid1D Gridd; -} - - -#endif // _grid1d_n_ \ No newline at end of file diff --git a/include/utils/matrix.h b/include/utils/matrix.h index 7fb1abb..34f802c 100644 --- a/include/utils/matrix.h +++ b/include/utils/matrix.h @@ -3,249 +3,178 @@ #include "./utils/vector.h" +#include namespace utils{ //####################################### //# MATRIX TYPE # + //# Backed by utils::Vector # //####################################### template - struct Matrix{ - utils::Vector m; - - T& operator[](uint64_t idx) { return m[idx]; } // Makes it able to do matr[1][1] - const T& operator[](uint64_t idx) const { return m[idx]; } // Makes it able to do matr[1][1] - - using vector_type = typename decltype(std::declval().v)::value_type; + class Matrix{ +public: + Matrix() : rows_(0), cols_(0), data_() {} // Default constructor // Constructor to initialize matrix with rows × cols and a fill value - Matrix(uint64_t rows, uint64_t cols, typename T::value_type value = {}) { - fill(rows, cols, value); - } - - Matrix() = default; // Default constructor + Matrix(uint64_t rows, uint64_t cols, const T& value = T()) + : rows_(rows), cols_(cols), data_(rows * cols, value) {} + //# MATRIX: basic properties # + uint64_t rows() const noexcept {return rows_;} + uint64_t cols() const noexcept {return cols_;} + //# MATRIX: element access (fast; unchecked) # + T& operator()(uint64_t i, uint64_t j) { return data_[i * cols_ + j]; } + const T& operator()(uint64_t i, uint64_t j) const { return data_[i * cols_ + j]; } - void fill(uint64_t rows, uint64_t cols, const vector_type num=0){ - m.clear(); - for (uint64_t i = 0; i < rows; i++){ - T temp_vec; + //# MATRIX: data access # + T* data() noexcept { return data_.data(); } + const T* data() const noexcept { return data_.data(); } - for (uint64_t j = 0; j < cols; j++){ - temp_vec.v.push_back(num); - } - m.push_back(temp_vec); - } + //# MATRIX: equal operator # + bool operator==(const Matrix& A) const { + if (rows_ != A.rows_ || cols_ != A.cols_) return false; + for (uint64_t i = 0; i < rows_; ++i) + for (uint64_t j = 0; j < cols_; ++j) + if (data_[i*cols_ + j] != A(i,j)) + return false; + return true; } - - void fill_RNG(const uint64_t rows, const uint64_t cols, const vector_type min = 0, const vector_type max = 1){ - m.clear(); - - std::mt19937_64 rng{}; - rng.seed( std::random_device{}()); - - for (uint64_t i = 0; i < rows; i++){ - T temp_vec; - for (uint64_t j = 0; j < cols; j++){ - temp_vec.v.push_back(std::uniform_real_distribution<>{min, max}(rng)); - } - m.push_back(temp_vec); - } + bool operator!=(const Matrix& A) const { + return !(*this == A); } - - - inline friend std::ostream& operator << (std::ostream& out, const Matrix& mat){ - out << "["; - for (uint64_t i = 0; i < mat.m.size(); i++){ - out << "["; - for (uint64_t j = 0; j < mat.m[i].v.size(); j++){ - if (j % mat.m[i].v.size() == mat.m[i].v.size() -1 && i == mat.m.size()-1){ - out << mat.m[i].v[j] << "]"; - } - else if ((j % mat.m[i].v.size() == mat.m[i].v.size() -1)){ - out << mat.m[i].v[j] << "]," << std::endl; - } - else{ - out << mat.m[i].v[j] << ", "; - } - } - } - out << "]"; - return out; - } - void print() const{ - std::cout << *this << std::endl; - } - - void inplace_transpose(){ - utils::Vector temp_m = m; - m.clear(); - - uint64_t rows = temp_m.size(); - uint64_t cols = temp_m[0].v.size(); - - for (uint64_t i = 0; i < cols; i++){ - T temp_vec; - for (uint64_t j = 0; j < rows; j++){ - temp_vec.v.push_back(temp_m[j].v[i]); - } - m.push_back(temp_vec); - } - } - Matrix transpose()const{ - Matrix copy = *this; - copy.inplace_transpose(); - return copy; - } - - - void inplace_inverse(std::string method = "Gauss-Jordan"){ - //Matrix temp_m = *this; // Copies the m into temp_m correctly (Before: utils::Vector temp_m = m;) - if (method == "Gauss-Jordan"){ - Matrix temp_m(m.v.size(),m[0].v.size(),0); - - //std::cout << temp_m.m.v[0].size() << std::endl; - //std::cout << m.v.size() << std::endl; - - uint64_t icol,irow,N=m.v.size(),M=temp_m.m.v[0].size(); - double big,dum,pivinv; - Vi indxc(N,0),indxr(N,0),ipiv(N,0); - - //for (uint64_t j = 0; j < N; ++j){ ipiv[j] = 0;} - for (uint64_t i = 0; i < N; i++){ - big=0.0; - for (uint64_t j = 0; j < N; j++){ - if (ipiv[j] != 1){ - for (uint64_t k = 0; k < N; k++){ - if (ipiv[k] == 0){ - if (abs(m[j].v[k]) >= big){ - big = abs(m[j].v[k]); - irow = j; - icol = k; - } - } - } - } - } - ipiv[icol]++; - if (irow != icol){ - for (uint64_t l = 0; l < N; l++){ // SWAP - double temp = m[irow].v[l]; - m[irow].v[l] = m[icol].v[l]; - m[icol].v[l] = temp; - } - for (uint64_t l = 0; l < M; l++){ // SWAP temp matrix - double temp = temp_m.m[irow].v[l]; - temp_m.m[irow].v[l] = temp_m.m[icol].v[l]; - temp_m.m[icol].v[l] = temp; - } - } - - indxr[i] = irow; - indxc[i] = icol; - if (m[icol].v[icol] == 0.0){ - throw std::runtime_error("utill:Matrix.Gauss-Jordan - Singular Matrix"); - } - pivinv= 1.0/m[icol].v[icol]; - m[icol].v[icol]=1.0; - for (uint64_t l = 0; l < N; l++){ - m[icol].v[l] *= pivinv; - } - for (uint64_t l = 0; l < M; l++){ - temp_m.m[icol].v[l] *= pivinv; - } - for (uint64_t ll = 0; ll < N; ll++){ - if (ll != icol){ - dum = m[ll].v[icol]; - m[ll].v[icol] = 0; - for (uint64_t l = 0; l < N; l++){ - m[ll].v[l] -= m[icol].v[l]*dum; - } - for (uint64_t l = 0; l < N; l++){ - temp_m.m[ll].v[l] -= temp_m.m[icol].v[l]*dum; - } - } - } - - } - //m = temp_m; - for (int64_t l = N-1; l >= 0; l--){ - if (indxr[l] != indxc[l]){ - for (uint64_t k = 0; k < N; k++){ - double temp = m[k].v[indxr[l]]; - m[k].v[indxr[l]] = m[k].v[indxc[l]]; - m[k].v[indxc[l]] = temp; - } - } - } - } - else{ - throw std::runtime_error("utill:Matrix." + method + " - Not implemented yet \r \nImplemented: 'Gauss-Jordan',"); - } - } - Matrix inverse(std::string method = "Gauss-Jordan")const{ - Matrix copy = *this; - copy.inplace_inverse(method); - return copy; - } - - utils::Vector vecmult(const utils::Vector& Vec)const{ - - if (m[0].size() != Vec.size()){ - throw std::runtime_error("utill:Matrix.vecmult - Dimentions does not fit"); - } - - // Create a temporary result vector - utils::Vector copy(Vec.size(), 0); - - - for (uint64_t i = 0; i < m.size(); ++i) { - for (uint64_t j = 0; j < m[0].size(); ++j) { - copy[i] += m[i][j] * Vec[j]; + bool nearly_equal(const Matrix& A, T tol = static_cast(1e-9)) const { + if (rows_ != A.rows_ || cols_ != A.cols_) return false; + for (uint64_t i = 0; i < rows_; ++i) + for (uint64_t j = 0; j < cols_; ++j) { + T a = (*this)(i,j); + T b = A(i,j); + if (std::is_floating_point::value) { + if (std::fabs(a - b) > tol) return false; + } else { + if (a != b) return false; + } } + return true; + } + + + + //# MATRIX: row helpers (copy out) # + // Read whole row as an owning Vector + // utils::Vf v = M.get_row(2); + Vector get_row(const uint64_t row) const { + if (row >= rows_) { + throw std::out_of_range("Matrix::get_row -> row index"); + } + utils::Vector result(cols_, T{}); + for (uint64_t i = 0; i < cols_; ++i){ + result[i] = data_[row * cols_ + i]; + } + return result; + } + //# MATRIX: row helpers (copy in) # + // Assign a whole Vector to a row + // M.set_row(2) = v; + void set_row(const uint64_t row, const Vector& vector){ + if (row >= rows_) { + throw std::out_of_range("Matrix::set_row -> row index"); + } + if (vector.size() != cols_){ + throw std::runtime_error("Matrix::set_row -> size mismatch"); } - return copy; - } + for (uint64_t i = 0; i < cols_; ++i){ + data_[row * cols_ + i] = vector[i]; + } + } - void inplace_matmult(const Matrix& Mat){ + //# MATRIX: col helpers (copy out) # + // Read whole col as an owning Vector + // utils::Vf v = M.get_col(2); + Vector get_col(const uint64_t col) const { + if (col >= cols_) { + throw std::out_of_range("Matrix::get_col -> col index"); + } + utils::Vector result(rows_, T{}); + for (uint64_t i = 0; i < rows_; ++i){ + result[i] = data_[i * cols_ + col]; + } + return result; + } - if (m.v[0].size() != Mat.m.v.size()){ - throw std::runtime_error("utill:Matrix.matmult - Dimentions does not fit"); - } + //# MATRIX: col helpers (copy in) # + // Assign a whole Vector to a col + // M.set_col(2) = v; + void set_col(const uint64_t col, const Vector& vector){ + if (col >= cols_) { + throw std::out_of_range("Matrix::set_col -> col index"); + } + if (vector.size() != rows_){ + throw std::runtime_error("Matrix::set_col -> size mismatch"); + } + for (uint64_t i = 0; i < rows_; ++i){ + data_[i * cols_ + col] = vector[i]; + } + } - // Dimensions of the result - uint64_t rows = m.v.size(); // rows in *this - uint64_t cols = Mat.m[0].v.size(); // columns in Mat - uint64_t inner = m.v[0].size(); // shared dimension + void swap_rows(uint64_t a, uint64_t b){ + if (a >= rows_ || b >= rows_) { + throw std::out_of_range("Matrix::swap_rows -> row index"); + } + if (a == b){ + return; + } + for (uint64_t i = 0; i < cols_; ++i){ + T tmp = data_[a * cols_ + i]; + data_[a * cols_ + i] = data_[b * cols_ + i]; + data_[b * cols_ + i] = tmp; + } + } + void swap_cols(uint64_t a, uint64_t b){ + if (a >= cols_ || b >= cols_) { + throw std::out_of_range("Matrix::swap_cols -> col index"); + } + if (a == b){ + return; + } + for (uint64_t i = 0; i < rows_; ++i){ + T tmp = data_[i * cols_ + a]; + data_[i * cols_ + a] = data_[i * cols_ + b]; + data_[i * cols_ + b] = tmp; + } + } - // Create a temporary result matrix - Matrix temp_m(rows, cols, 0); + inline friend std::ostream& operator<<(std::ostream& out, const Matrix& M) { + out << "["; + for (uint64_t i = 0; i < M.rows_; ++i) { + out << "["; + for (uint64_t j = 0; j < M.cols_; ++j) { + out << std::setw(4) << std::setprecision(3) << std::fixed << M(i, j); + if (j + 1 < M.cols_) out << ", "; + } + out << "]"; + if (i + 1 < M.rows_) out << ",\n "; + } + out << "]"; + return out; + } - // Perform matrix multiplication - for (uint64_t i = 0; i < rows; i++){ - for (uint64_t j = 0; j < cols; j++){ - for (uint64_t k = 0; k < inner; k++){ - temp_m.m[i].v[j] += m[i].v[k] * Mat.m[k].v[j]; - } - } - } - *this = temp_m; - } - Matrix matmult(const Matrix& Mat)const{ - Matrix copy = *this; - copy.inplace_matmult(Mat); - return copy; - } + void print() const { + std::cout << *this << std::endl; + } +private: + uint64_t rows_, cols_; + std::vector data_; - }; - typedef Matrix Mi; - typedef Matrix Mf; - typedef Matrix Md; + }; + typedef Matrix Mi; + typedef Matrix Mf; + typedef Matrix Md; } -#endif // _numerics_n_ \ No newline at end of file +#endif // _matrix_n_ \ No newline at end of file diff --git a/include/utils/utils.h b/include/utils/utils.h index bbef9ab..cfd11c5 100644 --- a/include/utils/utils.h +++ b/include/utils/utils.h @@ -3,4 +3,3 @@ #include "./utils/vector.h" #include "./utils/matrix.h" -#include "./utils/Grid1D.h" diff --git a/include/utils/vector.h b/include/utils/vector.h index 0dbb7a9..753c5fd 100644 --- a/include/utils/vector.h +++ b/include/utils/vector.h @@ -44,6 +44,9 @@ public: // vector.size(); uint64_t size() const noexcept { return v.size(); } + void resize(uint64_t new_size, const T& value = T()) { + v.resize(new_size, value); + } //########################################### //# VECTOR: == and != # @@ -303,38 +306,19 @@ public: Vector result = *this; result.inplace_power(a); return result; - } - //################################################ - //# VECTOR: Scalar Square # - //################################################ - template ::value>::type> - void inplace_square(const U a){ - const uint64_t n = v.size(); - for (uint64_t i = 0; i < n; ++i){ - v[i] = static_cast(std::sqrt(v[i], a)); - } - } - template ::value>::type> - Vector square(const U a) const{ - Vector result = *this; - result.inplace_square(a); - return result; } //################################################ //# VECTOR: Vector square # //################################################ - void inplace_square(const Vector& a){ - if (a.size() != v.size()){ - throw std::runtime_error("utill:Vector.inplace_square -> Dimensions does not fit"); - } - uint64_t n = a.size(); + void inplace_sqrt(){ + uint64_t n = v.size(); for (uint64_t i = 0; i < n; ++i){ - v[i] = static_cast(std::sqrt(v[i], a[i])); + v[i] = static_cast(std::sqrt(v[i])); } } - Vector square(const Vector& a) const{ + Vector sqrt() const{ Vector result = *this; - result.inplace_square(a); + result.inplace_sqrt(); return result; } //################################################### @@ -344,7 +328,7 @@ public: if (a.size() != v.size()){ throw std::runtime_error("utill:Vector.dot -> Dimensions does not fit"); } - T result; + T result = T{0}; const uint64_t n = v.size(); for (uint64_t i = 0; i < n; ++i){ result += a[i]*v[i]; @@ -368,6 +352,21 @@ public: T norm() const{ return static_cast(std::sqrt(this->dot(*this))); } + //############################################ + //# VECTOR: Normalize # + //############################################ + void inplace_normalize() { + T norm = this->norm(); + if (norm == T{0}){ + throw std::runtime_error("utils::Vector.normalize -> zero norm"); + } + this->inplace_divide(norm); + } + Vector normalize() const{ + Vector result = *this; + result.inplace_normalize(); + return result; + } //###################################################### //# VECTOR: Support Functions # //###################################################### diff --git a/makefile b/makefile index 6c542a8..052f8f7 100644 --- a/makefile +++ b/makefile @@ -1,7 +1,9 @@ # Compiler and flags CC := g++ -CXXFLAGS := -std=c++14 -Wall -Iinclude +CXXFLAGS := -std=c++14 -Wall -Iinclude -O3 -march=native -fopenmp +LDFLAGS := -fopenmp + # Directories SRC_DIR := src @@ -20,19 +22,50 @@ SRCS := $(shell find $(SRC_DIR) -name '*.cpp') # === Convert src/foo/bar.cpp → obj/foo/bar.o === OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(SRCS)) + +# === OpenMP runtime configuration (override-able) === +OMP_PROC_BIND ?= close # close|spread|master +OMP_PLACES ?= cores # cores|threads|sockets +OMP_MAX_LEVELS ?= 1 # 1 = no nested teams; set 2+ to allow nesting +OMP_THREADS ?= 16 # e.g. "16" or "8,4" for nested (outer,inner) +OMP_DYNAMIC ?= true # true/false: let runtime adjust threads +OMP_DISPLAY_ENV ?= FALSE # TRUE to print runtime config at startup + + + # === Default Target === all: $(TARGET) # === Linking final executable === $(TARGET): $(OBJS) @mkdir -p $(dir $@) - $(CXX) $(CXXFLAGS) -o $@ $^ + $(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) # === Compiling source files to object files === $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp @mkdir -p $(dir $@) $(CXX) $(CXXFLAGS) -c $< -o $@ +# === Run with OpenMP env set only for the run === +.PHONY: run +run: $(TARGET) + OMP_PROC_BIND=$(OMP_PROC_BIND) \ + OMP_PLACES=$(OMP_PLACES) \ + OMP_MAX_ACTIVE_LEVELS=$(OMP_MAX_LEVELS) \ + OMP_NUM_THREADS="$(OMP_THREADS)" \ + OMP_DYNAMIC=$(OMP_DYNAMIC) \ + OMP_DISPLAY_ENV=$(OMP_DISPLAY_ENV) \ + ./$(TARGET) + +# Handy presets +.PHONY: run-single +run-single: ## Single-level parallel (good default) + $(MAKE) run OMP_MAX_LEVELS=1 OMP_THREADS=16 OMP_PROC_BIND=close OMP_PLACES=cores + +.PHONY: run-nested +run-nested: ## Two-level nested (outer,inner), adjust to your cores + $(MAKE) run OMP_MAX_LEVELS=2 OMP_THREADS="4,8" OMP_PROC_BIND=close OMP_PLACES=cores + # Clean up .PHONY: clean clean: @@ -43,4 +76,5 @@ clean: info: @echo "Source files: $(SRCS)" @echo "Object files: $(OBJS)" - @echo "CXXFLAGS: $(CXXFLAGS)" \ No newline at end of file + @echo "CXXFLAGS: $(CXXFLAGS)" + @echo "LDFLAGS: $(LDFLAGS)" \ No newline at end of file diff --git a/obj/main.o b/obj/main.o index 291e788..f843db1 100644 Binary files a/obj/main.o and b/obj/main.o differ diff --git a/src/main.cpp b/src/main.cpp index 4abdfb4..baaf79d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,10 +1,12 @@ #include "./utils/utils.h" - +#include "./numerics/numerics.h" +#include "./core/omp_config.h" #include #include + #define CHECK(cond, msg) \ do { if (!(cond)) throw std::runtime_error(msg); } while (0) @@ -21,9 +23,17 @@ void expect_throw(F&& f, const char* msg_if_no_throw) { int main(int argc, char const *argv[]) -{ +{ - using utils::Vf; + // Single-level, 16 threads, runtime may adjust + omp_configure(/*max_levels=*/1, /*dynamic=*/true, /*threads_per_level=*/{16}); + + using utils::Vi; + using utils::Vf; + using utils::Vd; + using utils::Mi; + using utils::Mf; + using utils::Md; // ---------------- Equality / Inequality ---------------- { @@ -154,6 +164,82 @@ int main(int argc, char const *argv[]) CHECK(a == expect, "a /= 2 should produce [3,3,3]"); } + // ---------- sum ---------- + { + Vf a(3, 2.0f); // [2,2,2] + CHECK(a.sum() == 6.0f, "sum failed"); + } + + // ---------- dot ---------- + { + Vf a(3, 2.0f); // [2,2,2] + Vf b(3, 3.0f); // [3,3,3] + CHECK(a.dot(b) == 18.0f, "dot failed"); // 2*3 * 3 = 18 + Vf c(4, 1.0f); + expect_throw([&]{ (void)a.dot(c); }, "dot should throw on size mismatch"); + } + + // ---------- norm ---------- + { + Vf a(3, 2.0f); // [2,2,2] + float n = a.norm(); + CHECK(std::fabs(n - std::sqrt(12.0f)) < 1e-6f, "norm failed"); + } + + // ---------- normalize ---------- + { + Vf a(3, 3.0f); // [3,3,3], norm = sqrt(27) + Vf b = a.normalize(); + float n = b.norm(); + CHECK(std::fabs(n - 1.0f) < 1e-6f, "normalize failed"); + expect_throw([&]{ Vf z(3, 0.0f); z.inplace_normalize(); }, "normalize should throw on zero norm"); + } + + // ---------- scalar power ---------- + { + Vf a(3, 2.0f); // [2,2,2] + Vf c = a.power(3); // [8,8,8] + CHECK(c == Vf(3, 8.0f), "power(scalar) failed"); + + Vf d = a; d.inplace_power(4); // [16,16,16] + CHECK(d == Vf(3, 16.0f), "inplace_power(scalar) failed"); + } + + // ---------- vector power ---------- + { + Vf base(3, 2.0f); // [2,2,2] + Vf exps; exps.v = {1.0f, 2.0f, 3.0f}; // explicit construction for clarity + Vf out = base.power(exps); // [2^1, 2^2, 2^3] = [2,4,8] + Vf expect; expect.v = {2.0f, 4.0f, 8.0f}; + CHECK(out == expect, "power(vector) failed"); + + expect_throw([&]{ Vf bad(2, 1.0f); (void)base.power(bad); }, + "power(vector) should throw on size mismatch"); + } + + // ---------- square ---------- + { + Vf a; a.v = {4.0f, 9.0f, 16.0f}; + Vf b = a.sqrt(); // [4,9,16] + Vf expect; expect.v = {2.0f, 3.0f, 4.0f}; + CHECK(b == expect, "sqrt failed"); + + a.inplace_sqrt(); // mutate a to [4,9,16] + CHECK(a == expect, "inplace_square failed"); + } + + // ---------- scalar commutative friends (s + v, s * v) ---------- + { + Vf a(3, 2.0f); // [2,2,2] + Vf b = 3.0f + a; // [5,5,5] + Vf c = a + 3.0f; // [5,5,5] + CHECK(b == c, "s+v commutative failed"); + + Vf d = 4.0f * a; // [8,8,8] + Vf e = a * 4.0f; // [8,8,8] + CHECK(d == e, "s*v commutative failed"); + } + // ---------------- Size mismatch throws ---------------- { Vf a(3, 1.0f); @@ -177,12 +263,310 @@ int main(int argc, char const *argv[]) "operator/ should throw (through divide) on size mismatch"); } - Vf b(3, 8.0f); // [1,1,1] - Vf c(3, 2.0f); // [2,2,2] - b.print(); - b.inplace_power(2); - b.print(); - std::cout << b.norm() << std::endl; + { + + auto* a = new utils::Vf(3, 1.0f); // constructor runs + delete a; // <- calls ~Vector() and frees memory + + } + + { + Vf a(2, 1.0f); // a = [1, 1] + Vf b(2, 1.0f); // b = [1, 1] + + a.clear(); // a = [] + CHECK(a.size() == 0, "clear() did not empty vector"); + + a.resize(2, 1.0f); // a = [1, 1] + + CHECK(a == b, "clear/resize lifecycle failed"); + + } + std::cout << "All Vector tests passed ✅\n"; + + + // shape + element access + { + Mf M(3, 4, 0.0f); + CHECK(M.rows()==3 && M.cols()==4, "shape failed"); + + M(1,1) = 5.0f; + CHECK(M(1,1) == 5.0f, "write/read element failed"); + + // ensure independence of other cells + CHECK(M(0,0) == 0.0f && M(2,3) == 0.0f, "unexpected element modified"); + } + + // set/get row (with size checks) + { + Mf M(2, 3, 0.0f); // 2x3 + + Vf r(3, 0.0f); + r[0]=1; r[1]=2; r[2]=3; + M.set_row(1, r); + + Vf g = M.get_row(1); + CHECK(g.size()==3, "get_row size wrong"); + CHECK(g[0]==1 && g[1]==2 && g[2]==3, "get_row values wrong"); + + // size mismatch should throw + bool threw=false; + try { + Vf bad(2, 9.0f); + M.set_row(0, bad); + } catch (const std::exception&) { threw=true; } + CHECK(threw, "set_row should throw on size mismatch"); + } + + // set/get col (with size checks) + { + Mf M(3, 2, 0.0f); // 3x2 + + Vf c(3, 0.0f); + c[0]=4; c[1]=5; c[2]=6; + M.set_col(1, c); + + Vf h = M.get_col(1); + CHECK(h.size()==3, "get_col size wrong"); + CHECK(h[0]==4 && h[1]==5 && h[2]==6, "get_col values wrong"); + + bool threw=false; + try { + Vf bad(2, 7.0f); + M.set_col(0, bad); + } catch (const std::exception&) { threw=true; } + CHECK(threw, "set_col should throw on size mismatch"); + } + + // swap_rows / swap_cols + { + Mf M(3, 3, 0.0f); + // set rows to [1,2,3], [4,5,6], [7,8,9] + for (uint64_t j=0;j<3;++j) M(0,j) = 1.0f + j; + for (uint64_t j=0;j<3;++j) M(1,j) = 4.0f + j; + for (uint64_t j=0;j<3;++j) M(2,j) = 7.0f + j; + + M.swap_rows(0,2); + CHECK(M(0,0)==7 && M(0,1)==8 && M(0,2)==9, "swap_rows top row wrong"); + CHECK(M(2,0)==1 && M(2,1)==2 && M(2,2)==3, "swap_rows bottom row wrong"); + + M.swap_cols(0,2); + // after col swap: first row should be [9,8,7] + CHECK(M(0,0)==9 && M(0,1)==8 && M(0,2)==7, "swap_cols first row wrong"); + // bottom row should be [3,2,1] + CHECK(M(2,0)==3 && M(2,1)==2 && M(2,2)==1, "swap_cols last row wrong"); + } + + // Exact integer comparison / Floating-point exact equality / Floating-point with small perturbation + { + + Mi A(2,2,0); + A(0,0)=1; A(0,1)=2; + A(1,0)=3; A(1,1)=4; + + Mi B(2,2,0); + B(0,0)=1; B(0,1)=2; + B(1,0)=3; B(1,1)=4; + + Mi C(2,2,0); + C(0,0)=9; C(0,1)=9; + C(1,0)=9; C(1,1)=9; + + CHECK(A == B, "Matrix == failed on identical int matrices"); + CHECK(!(A != B), "Matrix != failed on identical int matrices"); + CHECK(A != C, "Matrix != failed on different int matrices"); + + // Floating-point exact equality + + + Md F1(2,2,0.0); + F1(0,0)=1.0; F1(0,1)=2.0; + F1(1,0)=3.0; F1(1,1)=4.0; + + Md F2(2,2,0.0); + F2(0,0)=1.0; F2(0,1)=2.0; + F2(1,0)=3.0; F2(1,1)=4.0; + + CHECK(F1 == F2, "Matrix == failed on identical float matrices"); + + // Floating-point with small perturbation + + Md F3 = F1; + F3(1,1) += 1e-10; // tiny difference + + CHECK(!(F1 == F3), "Matrix == should fail on exact compare with perturbation"); + CHECK(F1.nearly_equal(F3, 1e-9), "Matrix nearly_equal failed with tolerance"); + + // Larger perturbation + F3(1,1) += 1e-3; + CHECK(!F1.nearly_equal(F3, 1e-6), "Matrix nearly_equal should fail when tolerance too small"); + CHECK(F1.nearly_equal(F3, 1e-2), "Matrix nearly_equal should pass with loose tolerance"); + } + + std::cout << "Matrix basic tests passed ✅\n"; + + // --- Test: normal transpose --- + { + Mf M(2, 3, 0.0f); + // Fill: [ [1,2,3], + // [4,5,6] ] + M(0,0)=1; M(0,1)=2; M(0,2)=3; + M(1,0)=4; M(1,1)=5; M(1,2)=6; + + Mf MT = numerics::transpose(M); + + // Should be shape 3x2 + CHECK(MT.rows()==3 && MT.cols()==2, "transpose shape wrong"); + + // Values: [ [1,4], [2,5], [3,6] ] + CHECK(MT(0,0)==1 && MT(0,1)==4, "transpose value (0,*) wrong"); + CHECK(MT(1,0)==2 && MT(1,1)==5, "transpose value (1,*) wrong"); + CHECK(MT(2,0)==3 && MT(2,1)==6, "transpose value (2,*) wrong"); + + //std::cout << "Original M:\n" << M << "\n"; + //std::cout << "Transposed MT:\n" << MT << "\n\n"; + } + + // --- Test: inplace transpose (square only) --- + { + Mf S(3, 3, 0.0f); + // Fill with row-major increasing + float val = 1.0f; + for (uint64_t i=0;i(A, x); + CHECK(y.size()==2, "matvec size wrong"); + CHECK(y[0]==50 && y[1]==122, "matvec values wrong"); + + // dimension mismatch should throw + bool threw = false; + try { + Vd bad(4,1.0); + (void)numerics::matvec(A, bad); + } catch (const std::runtime_error&) { threw = true; } + CHECK(threw, "matvec: expected throw on dim mismatch"); + } + + std::cout << "matvec tests passed ✅\n"; + + // vecmat test + { + // A = [[1,2], + // [3,4]] (2x2) + Md A(2,2,0.0); + A(0,0)=1; A(0,1)=2; + A(1,0)=3; A(1,1)=4; + + // x^T = [5,6] + Vd x(2,0.0); + x[0]=5; x[1]=6; + + // y = x^T * A = [5*1+6*3, 5*2+6*4] = [23, 34] + Vd y = numerics::vecmat(x, A); + CHECK(y.size()==2, "vecmat size wrong"); + CHECK(y[0]==23 && y[1]==34, "vecmat values wrong"); + + // mismatch should throw + bool threw = false; + try { + Md B(3,2,0.0); // 3x2, doesn't match x size 2 + (void)numerics::vecmat(x, B); + } catch (const std::runtime_error&) { threw = true; } + CHECK(threw, "vecmat: expected throw on dim mismatch"); + } + + std::cout << "vecmat tests passed ✅\n"; + + + + // Inverse 'Gauss-Jordan' tests + { + Md A(2,2,0.0); + A(0,0)=4; A(0,1)=7; + A(1,0)=2; A(1,1)=6; + + Md Ai = numerics::inverse(A, "Gauss-Jordan"); + + Md I1 = numerics::matmul(A, Ai); + Md I2 = numerics::matmul(Ai, A); + + Md I(2,2,0.0); + I(0,0)=1; I(1,1)=1; + + CHECK(I1.nearly_equal(I), "A*inv(A) != I"); + CHECK(I2.nearly_equal(I), "inv(A)*A != I"); + } + + std::cout << "Inverse 'Gauss-Jordan' tests passed ✅\n"; + return 0; } \ No newline at end of file