27template <
typename K,
bool RowMajor = false>
class Matrix {
42 inline size_t index(
size_t i,
size_t j)
const {
43 if constexpr (RowMajor)
49 using reg =
typename Simd::reg;
61 for (
size_t i = 0; i <
rows; ++i) {
63 for (
size_t j = 0; j <
cols; ++j)
64 std::cout <<
operator()(i, j) <<
" ";
71 for (
size_t k = 0; k <
cols; ++k) {
83 assert(
cols == v.
size() &&
"Matrix-Vector size mismatch");
85 for (
auto &x : result)
88 for (
size_t i = 0; i <
rows; ++i) {
89 for (
size_t j = 0; j <
cols; ++j) {
90 result[i] += (*this)(i, j) * v[j];
98 throw std::invalid_argument(
"Matrix sizes do not match");
101 using reg =
typename Simd::reg;
107 _mm_prefetch((
const char *)&m.
data[0], _MM_HINT_T0);
111 reg b0 = Simd::load(&m.
data[i]);
112 a0 = Simd::add(a0, b0);
113 Simd::store(&
data[i], a0);
117 a1 = Simd::add(a1, b1);
127 throw std::invalid_argument(
"Matrix sizes do not match");
129 using reg =
typename Simd::reg;
135 _mm_prefetch((
const char *)&m.
data[0], _MM_HINT_T0);
136 for (; i + 15 < n; i += 16) {
138 reg b0 = Simd::load(&m.
data[i]);
139 a0 = Simd::sub(a0, b0);
140 Simd::store(&
data[i], a0);
144 a1 = Simd::sub(a1, b1);
147 for (; i <
size(); ++i) {
156 using reg =
typename Simd::reg;
158 _mm_prefetch((
const char *)&
data[0], _MM_HINT_T0);
159 reg scalar = Simd::set1(a);
161 for (; i + 15 < n; i += 16) {
163 v0 = Simd::mul(v0, scalar);
164 Simd::store(&
data[i], v0);
167 v1 = Simd::mul(v1, scalar);
178 throw std::invalid_argument(
"Matrix size mismatch for lerp");
181 using reg =
typename Simd::reg;
188 reg one_minus_alpha_vec = Simd::set1(K(1) -
alpha);
191 reg a0 = Simd::load(&A.
data[i]);
192 reg b0 = Simd::load(&B.
data[i]);
193 reg r0 = Simd::fmadd(one_minus_alpha_vec, a0, Simd::mul(alpha_vec, b0));
194 Simd::store(&
data[i], r0);
198 reg r1 = Simd::fmadd(one_minus_alpha_vec, a1, Simd::mul(alpha_vec, b1));
214 throw std::invalid_argument(
"Matrix dimensions do not match for multiplication");
218 const K *A =
data.data();
219 const K *B = mat.
data.data();
220 K *C = result.
data.data();
223 kernel.
matmul(
const_cast<K *
>(A),
const_cast<K *
>(B), C,
224 static_cast<int>(
rows),
225 static_cast<int>(mat.
cols),
226 static_cast<int>(
cols)
238 using reg =
typename Simd::reg;
239 constexpr size_t W = Simd::width;
245 alignas(64) T buffer[W];
247 for (
size_t i = 0; i <
rows; ++i) {
248 reg acc = Simd::zero();
251 for (; j + W <=
cols; j += W) {
252 for (
size_t w = 0; w < W; ++w)
253 buffer[w] = (*
this)(i, j + w);
255 reg A_vec = Simd::load(buffer);
256 reg x_vec = Simd::load(&x[j]);
257 acc = Simd::fmadd(A_vec, x_vec, acc);
260 T sum = Simd::horizontal_add(acc);
262 for (; j <
cols; ++j)
263 sum += (*
this)(i, j) * x[j];
275 for (
size_t i = 0; i <
rows; ++i)
276 for (
size_t j = 0; j <
cols; ++j)
277 result(j, i) = (*this)(i, j);
285 throw std::invalid_argument(
"Matrix is not square");
291 for (
size_t i = 0; i <
rows; ++i) {
305 throw std::invalid_argument(
"Matrix must be square");
311 for (
auto i =
decltype(n)(0); i < n; ++i) {
312 for (
auto j =
decltype(n)(0); j < n; ++j) {
314 Inv(i, j) = (i == j) ? K(1) : K(0);
319 for (
auto i =
decltype(n)(0); i < n; ++i) {
322 for (
auto r = i + 1; r < n; ++r) {
329 if (maxv <
static_cast<K
>(1e-6))
330 throw std::runtime_error(
"Matrix is singular or nearly singular.");
338 auto diag_inv = K(1) / diag;
339 for (
auto j = 0u; j < n; ++j) {
341 Inv(i, j) *= diag_inv;
344#pragma omp parallel for schedule(dynamic, UNROLL)
345 for (
auto j = 0u; j < n; ++j) {
348 for (
auto k = 0u; k < n; ++k) {
349 M(j, k) -= f * M(i, k);
350 Inv(j, k) -= f * Inv(i, k);
367 throw std::invalid_argument(
"Matrix must be square");
369 const size_t n =
rows;
374 for (
size_t i = 0; i < n; ++i)
375 for (
size_t j = 0; j < n; ++j)
380 for (
size_t i = 0; i < n; ++i) {
383 for (
size_t r = i + 1; r < n; ++r) {
390 if (maxv <
static_cast<K
>(1e-12))
395 det_sign = -det_sign;
398 for (
size_t j = i + 1; j < n; ++j) {
399 auto f = M(j, i) / M(i, i);
402 auto f_vec = SimdT::set1(-f);
406 auto mjk = SimdT::load(&M(j, k));
407 auto mik = SimdT::load(&M(i, k));
408 mjk = SimdT::fmadd(f_vec, mik, mjk);
409 SimdT::store(&M(j, k), mjk);
412 M(j, k) -= f * M(i, k);
418 for (
size_t i = 0; i < n; ++i)
428 inline size_t rank(K eps = K(1e-6))
const {
430 const size_t m =
rows;
431 const size_t n =
cols;
434 for (
size_t col = 0; col < n; ++col) {
435 size_t pivot_row = r;
436 for (
size_t i = r; i < m; ++i) {
447 for (
size_t i = r + 1; i < m; ++i) {
448 auto f = M(i, col) / M(r, col);
450 for (
size_t j = col + 1; j < n; ++j)
451 M(i, j) -= f * M(r, j);
std::vector< K, AlignedAllocator< K, ALIGN > > aligned_vector
Type alias for a std::vector with aligned memory allocation.
Definition Allocator.hpp:111
size_t detect_optimal_block_size()
Definition CPU_id.hpp:18
static void _swap(T &a, T &b)
Definition MathsUtils.hpp:26
static double _abs(double a)
Definition MathsUtils.hpp:32
Definition GemmKernel_bigger.hpp:16
void matmul(T *A, T *B, T *C, int M, int N, int K)
Definition GemmKernel_bigger.hpp:828
High-performance aligned matrix class with SIMD support.
Definition Matrix.hpp:27
Matrix< K > transpose() const
Returns the transpose of the matrix (column-major layout)
Definition Matrix.hpp:272
const K & operator()(size_t i, size_t j) const
Definition Matrix.hpp:57
Matrix(size_t r, size_t c)
Construct a matrix of size r × c, initialized with zeros.
Definition Matrix.hpp:36
Matrix _mul_mat(const Matrix< K > &mat) const
Multiply matrix by another matrix using optimized SIMD path.
Definition Matrix.hpp:212
size_t rows
Definition Matrix.hpp:29
size_t rank(K eps=K(1e-6)) const
Compute the numerical rank of the matrix.
Definition Matrix.hpp:428
void sub(const Matrix &m)
In-place matrix subtraction: this -= m.
Definition Matrix.hpp:125
Vector< T > operator*(const Vector< T > &v) const
Multiply matrix by a vector (naïve fallback)
Definition Matrix.hpp:82
void scl(K a)
In-place scalar multiplication: this *= a.
Definition Matrix.hpp:152
void lerp(const Matrix< K > &A, const Matrix< K > &B, K alpha)
Linearly interpolate between two matrices: this = (1 - α) * A + α * B.
Definition Matrix.hpp:176
Vector< T > mul_vec(const Vector< T > &x) const
Multiply matrix by a vector using SIMD.
Definition Matrix.hpp:236
size_t simd_width
Definition Matrix.hpp:50
K det() const
Compute the determinant using Gaussian elimination.
Definition Matrix.hpp:365
bool iscolumn
Definition Matrix.hpp:32
Matrix< K > trace() const
Returns the trace of a square matrix as a 1×1 matrix.
Definition Matrix.hpp:283
void print() const
Print the matrix to stdout.
Definition Matrix.hpp:60
size_t block_size
Definition Matrix.hpp:31
K & operator()(size_t i, size_t j)
Element access (mutable)
Definition Matrix.hpp:55
aligned_vector< K > data
Definition Matrix.hpp:30
size_t index(size_t i, size_t j) const
Definition Matrix.hpp:42
typename Simd::reg reg
Definition Matrix.hpp:49
void add(const Matrix &m)
In-place matrix addition: this += m.
Definition Matrix.hpp:96
size_t cols
Definition Matrix.hpp:29
Matrix< K > inverse() const
Compute the inverse of the matrix using Gauss–Jordan elimination.
Definition Matrix.hpp:303
void swap_rows(size_t i, size_t j)
Swap two rows of the matrix.
Definition Matrix.hpp:69
size_t size() const
Return the total number of elements.
Definition Matrix.hpp:52
Aligned, SIMD-optimized mathematical vector class for scientific computing.
Definition Vector.hpp:26
size_t size() const
Definition Vector.hpp:76
Definition Derivate.hpp:24