46 std::cout <<
"Auto-selected BLOCK_SIZE = " <<
block_size << std::endl;
67 std::cerr <<
"[OOB] operator(): "
68 <<
"i=" << i <<
" rows=" <<
rows <<
" | j=" << j <<
" cols=" <<
cols <<
"\n";
81 assert(i <
rows && j <
cols &&
"Derivate::operator() const: indice hors bornes");
101 using reg =
typename Simd::reg;
106#pragma omp parallel for
107 for (
size_t i = 0; i < input.
rows; ++i) {
111 const size_t simd_end =
117 const K *left_ptr = &input(i, j - 1);
118 const K *right_ptr = &input(i, j + 1);
119 K *out_ptr = &
output(i, j);
121 reg left = Simd::loadu(left_ptr);
122 reg right = Simd::loadu(right_ptr);
123 reg diff = Simd::sub(right, left);
124 reg res = Simd::mul(diff, Simd::set1(
inv_2dx));
125 Simd::storeu(out_ptr, res);
128 for (; j < input.
cols - 1; ++j)
129 output(i, j) = (input(i, j + 1) - input(i, j - 1)) *
inv_2dx;
136#pragma omp parallel for
137 for (
size_t i = 1; i < input.
rows - 1; ++i)
138 for (
size_t j = 0; j < input.
cols; ++j)
139 output(i, j) = (input(i + 1, j) - input(i - 1, j)) *
inv_2dx;
141 for (
size_t j = 0; j < input.
cols; ++j) {
148 std::cerr <<
"[centered_derivative] Invalid axis: must be 0 or 1\n";
162 using reg =
typename Simd::reg;
163 constexpr size_t W = Simd::width;
170#pragma omp parallel for
171 for (
size_t i = 0; i < input.
rows; ++i) {
176 for (; j +
W <= input.
cols - 2; j +=
W) {
177 const K *ptr_m2 = &input(i, j - 2);
178 const K *ptr_m1 = &input(i, j - 1);
179 const K *ptr_p1 = &input(i, j + 1);
180 const K *ptr_p2 = &input(i, j + 2);
182 reg fm2 = Simd::loadu(ptr_m2);
183 reg fm1 = Simd::loadu(ptr_m1);
184 reg fp1 = Simd::loadu(ptr_p1);
185 reg fp2 = Simd::loadu(ptr_p2);
188 reg term2 = Simd::mul(fp2, Simd::set1(1.0f));
191 reg num = Simd::sub(Simd::add(fm2, term1), term2);
192 num = Simd::sub(num, term3);
193 reg res = Simd::mul(num,
inv);
195 Simd::storeu(&
output(i, j), res);
198 for (; j < input.
cols - 2; ++j) {
199 output(i, j) = (input(i, j - 2) - 8 * input(i, j - 1) + 8 * input(i, j + 1) -
208#pragma omp parallel for
209 for (
size_t j = 0; j < input.
cols; ++j) {
214 for (; i +
W <= input.
rows - 2; i +=
W) {
215 const K *ptr_m2 = &input(i - 2, j);
216 const K *ptr_m1 = &input(i - 1, j);
217 const K *ptr_p1 = &input(i + 1, j);
218 const K *ptr_p2 = &input(i + 2, j);
220 reg fm2 = Simd::loadu(ptr_m2);
221 reg fm1 = Simd::loadu(ptr_m1);
222 reg fp1 = Simd::loadu(ptr_p1);
223 reg fp2 = Simd::loadu(ptr_p2);
226 reg term2 = Simd::mul(fp2, Simd::set1(1.0f));
229 reg num = Simd::sub(Simd::add(fm2, term1), term2);
230 num = Simd::sub(num, term3);
231 reg res = Simd::mul(num,
inv);
233 Simd::storeu(&
output(i, j), res);
236 for (; i < input.
rows - 2; ++i) {
237 output(i, j) = (input(i - 2, j) - 8 * input(i - 1, j) + 8 * input(i + 1, j) -
246 std::cerr <<
"[SIMD Order4] Invalid axis: must be 0 or 1\n";
269 data(std::accumulate(dims.begin(), dims.
end(), size_t(1), std::multiplies<size_t>()),
272 std::cout <<
"Auto-selected BLOCK_SIZE = " <<
block_size << std::endl;
275 inline size_t flatten_index(
const std::array<size_t, Rank> &indices)
const {
276 size_t index = 0, stride = 1;
277 for (
int i = Rank - 1; i >= 0; --i) {
278 index += indices[i] * stride;
289 inline K &
operator()(
const std::array<size_t, Rank> &indices) {
298 inline const K &
operator()(
const std::array<size_t, Rank> &indices)
const {
319 using reg =
typename Simd::reg;
329 for (
int i = Rank - 2; i >= 0; --i)
335#pragma omp parallel for schedule(static)
339 for (
size_t offset = 0; offset <
simd_width; ++offset) {
340 if (flat + offset >=
total) {
346 if (coord_axis == 0 || coord_axis >=
dim_axis - 1) {
353 for (
size_t offset = 0; offset <
simd_width && flat + offset <
total; ++offset) {
354 const size_t f = flat + offset;
357 if (coord_axis == 0 || coord_axis >=
dim_axis - 1) {
369 K *out_ptr =
output.data.data() + flat;
371 reg forward = Simd::loadu(ptr_fwd);
372 reg backward = Simd::loadu(ptr_back);
373 reg diff = Simd::sub(forward, backward);
375 Simd::storeu(out_ptr, result);
388 size_t axis, K dx)
const {
390 using reg =
typename Simd::reg;
391 constexpr size_t W = Simd::width;
398 std::array<size_t, Rank>
strides;
400 for (
int i = Rank - 2; i >= 0; --i)
406#pragma omp parallel for schedule(static)
407 for (
size_t flat = 0; flat <
total; flat +=
W) {
410 for (
size_t offset = 0; offset <
W; ++offset) {
411 if (flat + offset >=
total) {
417 if (coord_axis < 2 || coord_axis >=
dim_axis - 2) {
424 for (
size_t offset = 0; offset <
W && flat + offset <
total; ++offset) {
425 size_t f = flat + offset;
427 if (coord_axis < 2 || coord_axis >=
dim_axis - 2) {
446 K *out_ptr =
output.data.data() + flat;
448 reg fm2 = Simd::loadu(ptr_m2);
449 reg fm1 = Simd::loadu(ptr_m1);
450 reg fp1 = Simd::loadu(ptr_p1);
451 reg fp2 = Simd::loadu(ptr_p2);
454 fm2, Simd::sub(Simd::mul(fp1, Simd::set1(8)), Simd::mul(fp2, Simd::set1(1))));
455 num = Simd::sub(num, Simd::mul(fm1, Simd::set1(8)));
456 reg res = Simd::mul(num,
inv);
458 Simd::storeu(out_ptr, res);
473template <
typename Container>
475 const Container &plus_half_h,
476 const Container &minus_half_h,
double h) {
477 assert(plus_h.size() == minus_h.size());
478 assert(plus_h.size() == plus_half_h.size());
479 assert(plus_h.size() == minus_half_h.size());
481 Container out(plus_h);
482#pragma omp parallel for
483 for (
size_t i = 0; i < plus_h.size(); ++i) {
484 auto diff_h = (plus_h[i] - minus_h[i]) / (2.0 * h);
485 auto diff_half = (plus_half_h[i] - minus_half_h[i]) / h;
486 out[i] = (4.0 * diff_half - diff_h) / 3.0;
503 const T &minus_half_h,
double h) {
504 T diff_h = (plus_h - minus_h) / (2.0 * h);
505 T diff_half = (plus_half_h - minus_half_h) / h;
506 return (4.0 * diff_half - diff_h) / 3.0;
std::vector< K, AlignedAllocator< K, ALIGN > > aligned_vector
Type alias for a std::vector with aligned memory allocation.
Definition Allocator.hpp:111
size_t detect_optimal_block_size()
Definition CPU_id.hpp:18
A multi-dimensional aligned tensor for numerical derivatives.
Definition Derivate.hpp:257
__attribute__((always_inline, hot, flatten)) inline void centered_derivative(const DerivateND< K
Compute second-order centered derivative along an axis.
size_t block_size
Definition Derivate.hpp:261
DerivateND(const std::array< size_t, Rank > &dims)
Construct a tensor with a given shape.
Definition Derivate.hpp:267
std::array< size_t, Rank > strides
Definition Derivate.hpp:327
const size_t dim_axis
Definition Derivate.hpp:333
__attribute__((always_inline, hot, flatten)) inline void centered_derivative_order4_rank(const DerivateND< K
Compute fourth-order centered derivative along an axis.
size_t size() const
Total number of elements in the tensor.
Definition Derivate.hpp:306
const K inv_12dx
Definition Derivate.hpp:395
const size_t total
Definition Derivate.hpp:323
const K & operator()(const std::array< size_t, Rank > &indices) const
Const access to a tensor element.
Definition Derivate.hpp:298
const reg inv2dx
Definition Derivate.hpp:325
const size_t stride_axis
Definition Derivate.hpp:332
const size_t simd_width
Definition Derivate.hpp:320
aligned_vector< K > data
Definition Derivate.hpp:260
Rank DerivateND< K, Rank > & output
Definition Derivate.hpp:316
Rank & input
Definition Derivate.hpp:316
Rank DerivateND< K, Rank > size_t axis
Definition Derivate.hpp:316
K & operator()(const std::array< size_t, Rank > &indices)
Mutable access to a tensor element.
Definition Derivate.hpp:289
const K inv_2dx
Definition Derivate.hpp:324
size_t flatten_index(const std::array< size_t, Rank > &indices) const
Definition Derivate.hpp:275
std::array< size_t, Rank > shape
Definition Derivate.hpp:259
const reg inv
Definition Derivate.hpp:396
constexpr size_t W
Definition Derivate.hpp:391
typename Simd::reg reg
Definition Derivate.hpp:319
A 2D aligned matrix for numerical derivatives.
Definition Derivate.hpp:30
typename Simd::reg reg
Definition Derivate.hpp:101
size_t block_size
Definition Derivate.hpp:34
const K inv_12dx
Definition Derivate.hpp:165
const size_t simd_width
Definition Derivate.hpp:102
size_t size() const
Total number of elements in the matrix.
Definition Derivate.hpp:89
constexpr size_t W
Definition Derivate.hpp:163
Derivate< K > size_t axis
Definition Derivate.hpp:99
Derivate(size_t r, size_t c)
Constructor with explicit dimensions.
Definition Derivate.hpp:41
K & operator()(size_t i, size_t j)
Mutable access to an element.
Definition Derivate.hpp:65
size_t rows
Definition Derivate.hpp:32
Derivate< K > size_t K dx
Definition Derivate.hpp:160
Derivate< K > & output
Definition Derivate.hpp:99
size_t cols
Definition Derivate.hpp:32
const K inv_2dx
Definition Derivate.hpp:103
__attribute__((always_inline, hot, flatten)) inline void centered_derivative(const Derivate< K > &input
Compute second-order centered derivative.
const reg eight
Definition Derivate.hpp:167
const K & operator()(size_t i, size_t j) const
Const access to an element.
Definition Derivate.hpp:80
Derivate(const Matrix< K > &m)
Construct from an existing matrix.
Definition Derivate.hpp:53
const reg inv
Definition Derivate.hpp:166
__attribute__((always_inline, hot, flatten)) inline void centered_derivative_order4(const Derivate< K > &input
Compute fourth-order centered derivative.
aligned_vector< K > data
Definition Derivate.hpp:33
High-performance aligned matrix class with SIMD support.
Definition Matrix.hpp:27
Definition Derivate.hpp:24
Container richardson_derivative_container(const Container &plus_h, const Container &minus_h, const Container &plus_half_h, const Container &minus_half_h, double h)
Richardson extrapolation for vectors or containers.
Definition Derivate.hpp:474
T richardson_derivative(const T &plus_h, const T &minus_h, const T &plus_half_h, const T &minus_half_h, double h)
Richardson extrapolation for scalar values.
Definition Derivate.hpp:502
void centered_derivative(const Derivate< K > &input, Derivate< K > &output, size_t axis, K dx)
Definition Functional.hpp:349
void centered_derivative_order4(const Derivate< K > &input, Derivate< K > &output, size_t axis, K dx)
Definition Functional.hpp:355