Tensorium
Loading...
Searching...
No Matches
LinearSolver.hpp
Go to the documentation of this file.
1#pragma once
4#include "../SIMD/CPU_id.hpp"
5#include "../SIMD/SIMD.hpp"
6#include "Matrix.hpp"
7#include "Vector.hpp"
8
19template <typename K> class Jacobi;
20
29template <typename K> class Gauss {
30 public:
31 size_t rows() const;
32 size_t block_size;
45 __attribute__((always_inline, hot, flatten)) static inline Vector<K>
46 solve(const Matrix<K> &A_in, const Vector<K> &b_in) {
47 static_assert(std::is_floating_point<K>::value, "");
48 const size_t n = A_in.rows;
49
50 assert(n == A_in.cols && n == b_in.size());
51 if (n >= 1024)
52 return Jacobi<K>::solve(A_in, b_in);
53
54 Matrix<K> M = A_in;
57
59 using regT = typename SimdT::reg;
60 const size_t W = SimdT::width;
61
63
64 for (size_t i = 0; i < n; ++i) {
65 size_t piv = i;
66 K maxv = MathsUtils::_abs(M(i, i));
67 for (size_t r = i + 1; r < n; ++r) {
68 K v = MathsUtils::_abs(M(r, i));
69 if (v > maxv) {
70 maxv = v;
71 piv = r;
72 }
73 }
74 if (maxv < static_cast<K>(1e-12))
75 throw std::runtime_error("Gauss: matrix is singular or nearly singular.");
76
77 if (piv != i) {
78 M.swap_rows(i, piv);
79 MathsUtils::_swap(B[i], B[piv]);
80 }
81
82 constexpr auto TILE = UNROLL;
83#pragma omp parallel for schedule(dynamic, TILE)
84 for (size_t j = i + 1; j < n; ++j) {
85 for (size_t k = 0; k < n; ++k) {
86 rowj[k] = M(j, k);
87 rowi[k] = M(i, k);
88 }
89
90 K f = rowj[i] / rowi[i];
91 rowj[i] = K(0);
92 regT fv = SimdT::set1(f);
93
94 size_t k = i + 1;
95 for (; k + 4 * W <= n; k += 4 * W) {
96 for (int t = 0; t < 4; ++t) {
97 size_t off = k + t * W;
98 regT vj = SimdT::loadu(&rowj[off]);
99 regT vi = SimdT::loadu(&rowi[off]);
100 vj = SimdT::sub(vj, SimdT::mul(fv, vi));
101 SimdT::storeu(&rowj[off], vj);
102 }
103 }
104 for (; k + W <= n; k += W) {
105 regT vj = SimdT::loadu(&rowj[k]);
106 regT vi = SimdT::loadu(&rowi[k]);
107 vj = SimdT::sub(vj, SimdT::mul(fv, vi));
108 SimdT::storeu(&rowj[k], vj);
109 }
110 for (; k < n; ++k)
111 rowj[k] -= f * rowi[k];
112
113 for (size_t k = 0; k < n; ++k)
114 M(j, k) = rowj[k];
115
116 B[j] -= f * B[i];
117 }
118 }
119
120 for (size_t ii = n; ii-- > 0;) {
121 for (size_t k = 0; k < n; ++k)
122 rowi[k] = M(ii, k);
123
124 regT acc = SimdT::setzero();
125 size_t j = ii + 1;
126 for (; j + W <= n; j += W) {
127 regT u = SimdT::loadu(&rowi[j]);
128 regT xv = SimdT::loadu(&x[j]);
129 acc = SimdT::fmadd(u, xv, acc);
130 }
131 K sum = SimdT::horizontal_add(acc);
132 for (; j < n; ++j)
133 sum += rowi[j] * x[j];
134
135 x[ii] = (B[ii] - sum) / rowi[ii];
136 }
137
138 return x;
139 }
140
141 static inline void raw_row_echelon(Matrix<K> &A, Vector<K> *b = nullptr, K eps = 1e-12) {
142 const size_t n = A.rows;
143 const size_t m = A.cols;
144 assert(!b || b->size() == n);
145
146 size_t lead = 0;
147 for (size_t r = 0; r < n; ++r) {
148 if (lead >= m)
149 break;
150
151 size_t i = r;
152 while (i < n && MathsUtils::_abs(A(i, lead)) < eps)
153 ++i;
154 if (i == n) {
155 ++lead;
156 --r;
157 continue;
158 }
159
160 if (i != r) {
161 A.swap_rows(i, r);
162 if (b)
163 MathsUtils::_swap((*b)[i], (*b)[r]);
164 }
165
166 K pivot = A(r, lead);
167 for (size_t j = 0; j < m; ++j)
168 A(r, j) /= pivot;
169 if (b)
170 (*b)[r] /= pivot;
171
172 for (size_t k = 0; k < n; ++k) {
173 if (k == r)
174 continue;
175 K f = A(k, lead);
176 for (size_t j = 0; j < m; ++j)
177 A(k, j) -= f * A(r, j);
178 if (b)
179 (*b)[k] -= f * (*b)[r];
180 }
181
182 ++lead;
183 }
184 }
185};
199template <typename K> class Jacobi {
200 public:
212 static inline Vector<K> solve(const Matrix<K> &A, const Vector<K> &b, K tol = 1e-10,
213 int max_iter = 2000) {
214 static_assert(std::is_floating_point<K>::value,
215 "Jacobi solver requires floating-point type.");
216 assert(A.rows == A.cols && "Matrix A must be square");
217 assert(A.rows == b.size() && "Matrix/vector size mismatch");
218 const size_t n = A.rows;
219 Vector<K> x(n, K(0));
220 Vector<K> x_new(n, K(0));
221
223 using reg = typename Simd::reg;
224 const size_t simd_width = Simd::width;
225
226 for (int iter = 0; iter < max_iter; ++iter) {
227#pragma omp parallel for schedule(dynamic, 4)
228 for (size_t i = 0; i < n; ++i) {
229 if (MathsUtils::_abs(A(i, i)) < 1e-10)
230 throw std::runtime_error("Jacobi: division by near-zero on diagonal, matrix "
231 "likely not diagonally dominant.");
232
233 reg sum_vec = Simd::setzero();
234 size_t j = 0;
235
236 for (; j + simd_width <= n; j += simd_width) {
237 reg a_vec = Simd::loadu(&A(i, j));
238 reg x_vec = Simd::loadu(&x[j]);
239
240 if (i >= j && i < j + simd_width) {
241 std::array<K, simd_width> mask_arr;
242 Simd::storeu(mask_arr.data(), a_vec);
243 mask_arr[i - j] = K(0);
244 a_vec = Simd::loadu(mask_arr.data());
245 }
246
247 sum_vec = Simd::fma(a_vec, x_vec, sum_vec);
248 }
249
250 K sigma = Simd::horizontal_add(sum_vec);
251
252 for (; j < n; ++j) {
253 if (j != i)
254 sigma += A(i, j) * x[j];
255 }
256
257 x_new[i] = (b[i] - sigma) / A(i, i);
258 }
259
260 K err = K(0);
261#pragma omp parallel for reduction(+ : err) schedule(dynamic)
262 for (size_t i = 0; i < n; ++i) {
263 K diff = x_new[i] - x[i];
264 err += diff * diff;
265 }
266 err = std::sqrt(err);
267
268 if (err < tol)
269 break;
270
271 x = x_new;
272 }
273
274 return x;
275 }
276};
277
291template <typename K> class GaussSeidel {
292 public:
303 static Vector<K> solve(const Matrix<K> &A, const Vector<K> &b, K tol = 1e-8,
304 int max_iter = 2000);
305};
306
307} // namespace tensorium::solver
std::vector< K, AlignedAllocator< K, ALIGN > > aligned_vector
Type alias for a std::vector with aligned memory allocation.
Definition Allocator.hpp:111
#define UNROLL
Definition SIMD.hpp:28
static void _swap(T &a, T &b)
Definition MathsUtils.hpp:26
static double _abs(double a)
Definition MathsUtils.hpp:32
High-performance aligned matrix class with SIMD support.
Definition Matrix.hpp:27
size_t rows
Definition Matrix.hpp:29
size_t cols
Definition Matrix.hpp:29
void swap_rows(size_t i, size_t j)
Swap two rows of the matrix.
Definition Matrix.hpp:69
Aligned, SIMD-optimized mathematical vector class for scientific computing.
Definition Vector.hpp:26
size_t size() const
Definition Vector.hpp:76
Placeholder for Gauss–Seidel iterative solver.
Definition LinearSolver.hpp:291
static Vector< K > solve(const Matrix< K > &A, const Vector< K > &b, K tol=1e-8, int max_iter=2000)
Solve the system using Gauss–Seidel method.
aligned_vector< K > data
Definition LinearSolver.hpp:293
Direct Gaussian elimination solver with SIMD acceleration.
Definition LinearSolver.hpp:29
assert(n==A_in.cols &&n==b_in.size())
typename SimdT::reg regT
Definition LinearSolver.hpp:59
__attribute__((always_inline, hot, flatten)) static inline Vector< K > solve(const Matrix< K > &A_in
Solve the linear system .
aligned_vector< K > rowj(n)
aligned_vector< K > data
Definition LinearSolver.hpp:33
if(n >=1024) return Jacobi< K > Matrix< K > M
Definition LinearSolver.hpp:54
const size_t W
Definition LinearSolver.hpp:60
Vector< K > B
Definition LinearSolver.hpp:55
return x
Definition LinearSolver.hpp:138
aligned_vector< K > rowi(n)
static void raw_row_echelon(Matrix< K > &A, Vector< K > *b=nullptr, K eps=1e-12)
Definition LinearSolver.hpp:141
size_t block_size
Definition LinearSolver.hpp:32
const Vector< K > & b_in
Definition LinearSolver.hpp:46
const size_t n
Definition LinearSolver.hpp:48
Iterative Jacobi solver with SIMD and OpenMP support.
Definition LinearSolver.hpp:199
aligned_vector< K > data
Definition LinearSolver.hpp:201
static Vector< K > solve(const Matrix< K > &A, const Vector< K > &b, K tol=1e-10, int max_iter=2000)
Solve the system using the Jacobi method.
Definition LinearSolver.hpp:212
Namespace containing linear system solvers.
Definition LinearSolver.hpp:18
Definition SIMD.hpp:177