Tensorium_lib/Spectral_8hpp_source.html

#pragma once


#include "../MathUtils/MathsUtils.hpp"

#include "../SIMD/Allocator.hpp"

#include "../SIMD/CPU_id.hpp"

#include "../SIMD/SIMD.hpp"

#include "Matrix.hpp"

#include "Tensor.hpp"

#include "Vector.hpp"

#include <array>

#include <cassert>

#include <cmath>

#include <iomanip>

#include <iostream>

#include <numbers>

#include <stdexcept>

#include <vector>


namespace tensorium {


template <typename T> class SpectralFFT {

  public:

    using Tensor2D = tensorium::Tensor<T, 2>;

    using VectorT = tensorium::Vector<T>;

    using C = std::complex<T>;

    using CVectorT = tensorium::Vector<C>;

    static inline void forward(CVectorT &a) { transform_impl(a, false); }


    static void forward_3D(Tensor<std::complex<T>, 3> &a) {

        const auto   shape = a.shape();

        const size_t NX = shape[0], NY = shape[1], NZ = shape[2];

        using CVector = Vector<std::complex<T>>;


#pragma omp parallel for collapse(2)

        for (size_t i = 0; i < NX; ++i) {

            for (size_t j = 0; j < NY; ++j) {

                CVector sliceZ(NZ);

                for (size_t k = 0; k < NZ; ++k)

                    sliceZ(k) = a(i, j, k);


                forward(sliceZ);


                for (size_t k = 0; k < NZ; ++k)

                    a(i, j, k) = sliceZ(k);

            }

        }


#pragma omp parallel for collapse(2)

        for (size_t i = 0; i < NX; ++i) {

            for (size_t k = 0; k < NZ; ++k) {

                CVector sliceY(NY);

                for (size_t j = 0; j < NY; ++j)

                    sliceY(j) = a(i, j, k);


                forward(sliceY);


                for (size_t j = 0; j < NY; ++j)

                    a(i, j, k) = sliceY(j);

            }

        }


#pragma omp parallel for collapse(2)

        for (size_t j = 0; j < NY; ++j) {

            for (size_t k = 0; k < NZ; ++k) {

                CVector sliceX(NX);

                for (size_t i = 0; i < NX; ++i)

                    sliceX(i) = a(i, j, k);


                forward(sliceX);


                for (size_t i = 0; i < NX; ++i)

                    a(i, j, k) = sliceX(i);

            }

        }

    }


    static inline void backward(CVectorT &a) { transform_impl(a, true); }


    static void backward_3D(Tensor<std::complex<T>, 3> &a) {

        const auto   shape = a.shape();

        const size_t NX = shape[0], NY = shape[1], NZ = shape[2];

        using CVector = Vector<std::complex<T>>;


#pragma omp parallel for collapse(2)

        for (size_t j = 0; j < NY; ++j) {

            for (size_t k = 0; k < NZ; ++k) {

                CVector sliceX(NX);

                for (size_t i = 0; i < NX; ++i)

                    sliceX(i) = a(i, j, k);


                backward(sliceX);


                for (size_t i = 0; i < NX; ++i)

                    a(i, j, k) = sliceX(i);

            }

        }


#pragma omp parallel for collapse(2)

        for (size_t i = 0; i < NX; ++i) {

            for (size_t k = 0; k < NZ; ++k) {

                CVector sliceY(NY);

                for (size_t j = 0; j < NY; ++j)

                    sliceY(j) = a(i, j, k);


                backward(sliceY);


                for (size_t j = 0; j < NY; ++j)

                    a(i, j, k) = sliceY(j);

            }

        }


#pragma omp parallel for collapse(2)

        for (size_t i = 0; i < NX; ++i) {

            for (size_t j = 0; j < NY; ++j) {

                CVector sliceZ(NZ);

                for (size_t k = 0; k < NZ; ++k)

                    sliceZ(k) = a(i, j, k);


                backward(sliceZ);


                for (size_t k = 0; k < NZ; ++k)

                    a(i, j, k) = sliceZ(k);

            }

        }

        const T norm = 1.0 / (NX * NY * NZ);

#pragma omp parallel for collapse(3)

        for (size_t i = 0; i < NX; ++i)

            for (size_t j = 0; j < NY; ++j)

                for (size_t k = 0; k < NZ; ++k)

                    a(i, j, k) *= norm;

    }


  private:


    static void transform_impl(CVectorT &a, bool inverse) {

        const std::size_t N = a.size();

        if (N <= 1 || (N & (N - 1)))

            throw std::invalid_argument("SpectralFFT: size must be power of two");


        bit_reverse(a);


        std::vector<C> twiddles(N / 2);

        const T        sign = inverse ? T(+1) : T(-1);

        constexpr T    pi = T(3.141592653589793238462643383279502884L);

        for (std::size_t k = 0; k < N / 2; ++k)

            twiddles[k] = {std::cos(sign * 2 * pi * k / N), std::sin(sign * 2 * pi * k / N)};


        for (std::size_t len = 2; len <= N; len <<= 1) {

            const std::size_t half = len >> 1;

            const std::size_t step = N / len;


#pragma omp parallel for schedule(static) if (len >= 64)

            for (std::size_t i = 0; i < N; i += len) {

                for (std::size_t j = 0; j < half; ++j) {

                    C t = a[i + j + half] * twiddles[j * step];

                    C u = a[i + j];

                    a[i + j] = u + t;

                    a[i + j + half] = u - t;

                }

            }

        }


        if (inverse) {

            const T invN = T(1) / T(N);

#pragma omp parallel for schedule(static)

            for (std::size_t i = 0; i < N; ++i)

                a[i] *= invN;

        }

    }


    static void bit_reverse(CVectorT &a) {

        const std::size_t N = a.size();

        for (std::size_t i = 1, j = 0; i < N; ++i) {

            std::size_t bit = N >> 1;

            for (; j & bit; bit >>= 1)

                j ^= bit;

            j ^= bit;

            if (i < j)

                std::swap(a[i], a[j]);

        }

    }


};


template <typename T> class SpectalChebyshev {

  public:

    using Tensor2D = tensorium::Tensor<T, 2>;

    using VectorT = tensorium::Vector<T>;


    static void compute(const VectorT &X, T h, Tensor2D &result) {

        const size_t dim = 4;

        result.resize(dim, dim);

        result.fill(T(0));


        for (size_t i = 0; i < dim; ++i) {

            for (size_t j = 0; j < dim; ++j) {

                result(i, j) = std::cos(X(i) * X(j)) * h;

            }

        }

    }


};


} // namespace tensorium

Allocator.hpp

CPU_id.hpp

GreekSymbolminus::pi
@ pi

MathsUtils.hpp

Matrix.hpp

SIMD.hpp

Tensor.hpp

X
static FrontendPluginRegistry::Add< TensoriumPluginAction > X("tensorium-dispatch", "Handle #pragma tensorium directives")
Register the plugin under the name "tensorium-dispatch".

Vector.hpp

tensorium::SpectalChebyshev
Placeholder Chebyshev spectral method class.
Definition Spectral.hpp:213

tensorium::SpectalChebyshev::compute
static void compute(const VectorT &X, T h, Tensor2D &result)
Dummy computation using Chebyshev-like cosine weights.
Definition Spectral.hpp:227

tensorium::SpectralFFT
Fast Fourier Transform (FFT) implementation using Cooley–Tukey algorithm.
Definition Spectral.hpp:27

tensorium::SpectralFFT::forward_3D
static void forward_3D(Tensor< std::complex< T >, 3 > &a)
Definition Spectral.hpp:40

tensorium::SpectralFFT::transform_impl
static void transform_impl(CVectorT &a, bool inverse)
Internal FFT implementation (shared by forward/backward)
Definition Spectral.hpp:155

tensorium::SpectralFFT::backward_3D
static void backward_3D(Tensor< std::complex< T >, 3 > &a)
Definition Spectral.hpp:95

tensorium::SpectralFFT::C
std::complex< T > C
Definition Spectral.hpp:31

tensorium::SpectralFFT::backward
static void backward(CVectorT &a)
Perform inverse FFT (in-place)
Definition Spectral.hpp:93

tensorium::SpectralFFT::bit_reverse
static void bit_reverse(CVectorT &a)
Bit-reversal permutation step.
Definition Spectral.hpp:194

tensorium::SpectralFFT::forward
static void forward(CVectorT &a)
Perform forward FFT (in-place)
Definition Spectral.hpp:38

tensorium::Tensor< T, 2 >

tensorium::Tensor::resize
void resize(const std::array< size_t, Rank > &dims)
Resize 2D tensor.
Definition Tensor.hpp:70

tensorium::Tensor::fill
void fill(K value)
Fill tensor with a constant value.
Definition Tensor.hpp:125

tensorium::Vector< T >

tensorium::Vector::size
size_t size() const
Definition Vector.hpp:76

tensorium
Definition Derivate.hpp:24