19template <
typename K>
class Jacobi;
29template <
typename K>
class Gauss {
47 static_assert(std::is_floating_point<K>::value,
"");
59 using regT =
typename SimdT::reg;
60 const size_t W = SimdT::width;
64 for (
size_t i = 0; i <
n; ++i) {
67 for (
size_t r = i + 1; r <
n; ++r) {
74 if (maxv <
static_cast<K
>(1e-12))
75 throw std::runtime_error(
"Gauss: matrix is singular or nearly singular.");
82 constexpr auto TILE =
UNROLL;
83#pragma omp parallel for schedule(dynamic, TILE)
84 for (
size_t j = i + 1; j <
n; ++j) {
85 for (
size_t k = 0; k <
n; ++k) {
92 regT fv = SimdT::set1(f);
95 for (; k + 4 *
W <=
n; k += 4 *
W) {
96 for (
int t = 0; t < 4; ++t) {
97 size_t off = k + t *
W;
100 vj = SimdT::sub(vj, SimdT::mul(fv, vi));
101 SimdT::storeu(&
rowj[off], vj);
104 for (; k +
W <=
n; k +=
W) {
107 vj = SimdT::sub(vj, SimdT::mul(fv, vi));
108 SimdT::storeu(&
rowj[k], vj);
113 for (
size_t k = 0; k <
n; ++k)
120 for (
size_t ii =
n; ii-- > 0;) {
121 for (
size_t k = 0; k <
n; ++k)
124 regT acc = SimdT::setzero();
126 for (; j +
W <=
n; j +=
W) {
128 regT xv = SimdT::loadu(&
x[j]);
129 acc = SimdT::fmadd(u, xv, acc);
131 K sum = SimdT::horizontal_add(acc);
133 sum +=
rowi[j] *
x[j];
135 x[ii] = (
B[ii] - sum) /
rowi[ii];
142 const size_t n = A.
rows;
143 const size_t m = A.
cols;
147 for (
size_t r = 0; r <
n; ++r) {
166 K pivot = A(r, lead);
167 for (
size_t j = 0; j < m; ++j)
172 for (
size_t k = 0; k <
n; ++k) {
176 for (
size_t j = 0; j < m; ++j)
177 A(k, j) -= f * A(r, j);
179 (*b)[k] -= f * (*b)[r];
213 int max_iter = 2000) {
214 static_assert(std::is_floating_point<K>::value,
215 "Jacobi solver requires floating-point type.");
216 assert(A.
rows == A.
cols &&
"Matrix A must be square");
217 assert(A.
rows == b.
size() &&
"Matrix/vector size mismatch");
218 const size_t n = A.
rows;
223 using reg =
typename Simd::reg;
224 const size_t simd_width = Simd::width;
226 for (
int iter = 0; iter < max_iter; ++iter) {
227#pragma omp parallel for schedule(dynamic, 4)
228 for (
size_t i = 0; i < n; ++i) {
230 throw std::runtime_error(
"Jacobi: division by near-zero on diagonal, matrix "
231 "likely not diagonally dominant.");
233 reg sum_vec = Simd::setzero();
236 for (; j + simd_width <= n; j += simd_width) {
237 reg a_vec = Simd::loadu(&A(i, j));
238 reg x_vec = Simd::loadu(&x[j]);
240 if (i >= j && i < j + simd_width) {
241 std::array<K, simd_width> mask_arr;
242 Simd::storeu(mask_arr.data(), a_vec);
243 mask_arr[i - j] = K(0);
244 a_vec = Simd::loadu(mask_arr.data());
247 sum_vec = Simd::fma(a_vec, x_vec, sum_vec);
250 K
sigma = Simd::horizontal_add(sum_vec);
254 sigma += A(i, j) * x[j];
257 x_new[i] = (b[i] -
sigma) / A(i, i);
261#pragma omp parallel for reduction(+ : err) schedule(dynamic)
262 for (
size_t i = 0; i < n; ++i) {
263 K diff = x_new[i] - x[i];
266 err = std::sqrt(err);
304 int max_iter = 2000);
std::vector< K, AlignedAllocator< K, ALIGN > > aligned_vector
Type alias for a std::vector with aligned memory allocation.
Definition Allocator.hpp:111
#define UNROLL
Definition SIMD.hpp:28
static void _swap(T &a, T &b)
Definition MathsUtils.hpp:26
static double _abs(double a)
Definition MathsUtils.hpp:32
High-performance aligned matrix class with SIMD support.
Definition Matrix.hpp:27
size_t rows
Definition Matrix.hpp:29
size_t cols
Definition Matrix.hpp:29
void swap_rows(size_t i, size_t j)
Swap two rows of the matrix.
Definition Matrix.hpp:69
Aligned, SIMD-optimized mathematical vector class for scientific computing.
Definition Vector.hpp:26
size_t size() const
Definition Vector.hpp:76
Placeholder for Gauss–Seidel iterative solver.
Definition LinearSolver.hpp:291
static Vector< K > solve(const Matrix< K > &A, const Vector< K > &b, K tol=1e-8, int max_iter=2000)
Solve the system using Gauss–Seidel method.
aligned_vector< K > data
Definition LinearSolver.hpp:293
Direct Gaussian elimination solver with SIMD acceleration.
Definition LinearSolver.hpp:29
assert(n==A_in.cols &&n==b_in.size())
typename SimdT::reg regT
Definition LinearSolver.hpp:59
__attribute__((always_inline, hot, flatten)) static inline Vector< K > solve(const Matrix< K > &A_in
Solve the linear system .
aligned_vector< K > rowj(n)
aligned_vector< K > data
Definition LinearSolver.hpp:33
if(n >=1024) return Jacobi< K > Matrix< K > M
Definition LinearSolver.hpp:54
const size_t W
Definition LinearSolver.hpp:60
Vector< K > B
Definition LinearSolver.hpp:55
return x
Definition LinearSolver.hpp:138
aligned_vector< K > rowi(n)
static void raw_row_echelon(Matrix< K > &A, Vector< K > *b=nullptr, K eps=1e-12)
Definition LinearSolver.hpp:141
size_t block_size
Definition LinearSolver.hpp:32
const Vector< K > & b_in
Definition LinearSolver.hpp:46
const size_t n
Definition LinearSolver.hpp:48
Iterative Jacobi solver with SIMD and OpenMP support.
Definition LinearSolver.hpp:199
aligned_vector< K > data
Definition LinearSolver.hpp:201
static Vector< K > solve(const Matrix< K > &A, const Vector< K > &b, K tol=1e-10, int max_iter=2000)
Solve the system using the Jacobi method.
Definition LinearSolver.hpp:212
Namespace containing linear system solvers.
Definition LinearSolver.hpp:18