Tensorium
Loading...
Searching...
No Matches
Tensor.hpp
Go to the documentation of this file.
1#pragma once
2
4#include "../SIMD/CPU_id.hpp"
5#include "../SIMD/SIMD.hpp"
6#include <array>
7#include <cassert>
8#include <cmath>
9#include <cstdint>
10#include <iomanip>
11#include <iostream>
12#include <vector>
13
14namespace tensorium {
25template <typename K, std::size_t Rank> class Tensor {
26 public:
27 using value_type = K;
29 std::array<size_t, Rank> dimensions;
30 size_t total_size;
32 size_t block_size;
33 std::array<size_t, Rank> strides;
36
41 Tensor(const std::array<size_t, Rank> &dims)
42 : dimensions(dims),
43 total_size(1),
44 block_size(128) {
45 strides[Rank - 1] = 1;
46 for (int64_t i = Rank - 2; i >= 0; --i) {
47 strides[i] = strides[i + 1] * dimensions[i + 1];
48 }
49
50 size_t total = 1;
51 for (size_t i = 0; i < Rank; ++i)
52 total *= dimensions[i];
53 data.resize(total);
54 total_size = total;
55 }
56
57 size_t flatten_index(size_t i, size_t j, size_t k, size_t l) const {
58 std::array<size_t, 4> idx = {i, j, k, l};
59 return flatten_index(idx);
60 }
61
62 std::array<size_t, Rank> shape() const { return dimensions; }
64 strides[Rank - 1] = 1;
65 for (int64_t i = Rank - 2; i >= 0; --i)
66 strides[i] = strides[i + 1] * dimensions[i + 1];
67 }
70 void resize(const std::array<size_t, Rank> &dims) {
71 dimensions = dims;
73
74 size_t total = 1;
75 for (size_t i = 0; i < Rank; ++i)
76 total *= dims[i];
77
78 total_size = total;
79 data.resize(total);
80 }
81
83 void resize(size_t d0, size_t d1) { resize(std::array<size_t, 2>{d0, d1}); }
85
87 void resize(size_t d0, size_t d1, size_t d2) {
88 static_assert(Rank == 3, "Rank mismatch in resize()");
89
90 dimensions = {d0, d1, d2};
91 data.resize(d0 * d1 * d2, K(0));
92 }
94 K &operator()(const std::array<size_t, Rank> &indices) {
95 size_t index = flatten_index(indices);
96 assert(index < total_size);
97 return data[index];
98 }
99 const K &operator()(const std::array<size_t, Rank> &indices) const {
100 size_t index = flatten_index(indices);
101 assert(index < total_size);
102 return data[index];
103 }
104 const K &operator()(size_t i, size_t j, size_t k, size_t l) const {
105 std::array<size_t, 4> idx = {i, j, k, l};
106 return data[flatten_index(idx)];
107 }
108 K &operator()(size_t i, size_t j) { return data[i * dimensions[1] + j]; }
109 K &operator()(size_t i, size_t j, size_t k, size_t l) {
110 std::array<size_t, 4> idx = {i, j, k, l};
111 return data[flatten_index(idx)];
112 }
113 const K &operator()(size_t i, size_t j) const { return data[i * dimensions[1] + j]; }
114 K &operator()(size_t i, size_t j, size_t k) {
115 static_assert(Rank == 3, "Rank mismatch in operator()");
116 return data[i * dimensions[1] * dimensions[2] + j * dimensions[2] + k];
117 }
118 const K &operator()(size_t i, size_t j, size_t k) const {
119 static_assert(Rank == 3, "Rank mismatch in operator()");
120 return data[i * dimensions[1] * dimensions[2] + j * dimensions[2] + k];
121 }
122
124
125 void fill(K value) { std::fill(data.begin(), data.end(), value); }
128
129 void print_shape() const {
130 std::cout << "Tensor shape: (";
131 for (size_t i = 0; i < Rank; ++i) {
132 std::cout << dimensions[i];
133 if (i + 1 < Rank)
134 std::cout << ", ";
135 }
136 std::cout << ")\n";
137 }
138
140 void print() const {
141 for (size_t i = 0; i < dimensions[0]; ++i) {
142 for (size_t j = 0; j < dimensions[1]; ++j) {
143 std::cout << std::setw(10) << std::setprecision(4) << std::fixed << (*this)({i, j})
144 << " ";
145 }
146 std::cout << "\n";
147 }
148 }
150
152 __attribute__((always_inline, hot, flatten))
160 inline size_t
161 flatten_index_simd(const size_t *indices, const size_t *strides) const {
163 using reg = typename Simd::reg;
164 constexpr size_t W = Simd::width / sizeof(size_t);
165
166 size_t acc = 0;
167 size_t i = 0;
168
169 for (; i + W - 1 < Rank; i += W) {
170 reg idx = Simd::load(&indices[i]);
171 reg str = Simd::load(&strides[i]);
172 reg prod = Simd::mul(idx, str);
173 acc += detail::reduce_sum(prod);
174 }
175 for (; i < Rank; ++i)
176 acc += indices[i] * strides[i];
177
178 return acc;
179 }
180
181 __attribute__((always_inline, hot, flatten))
188 inline size_t
189 flatten_index(const std::array<size_t, Rank> &indices) const {
190 return flatten_index_simd(indices.data(), strides.data());
191 }
192
193 __attribute__((always_inline, hot, flatten))
205 Tensor<K, Rank - 2>
206 contract_simd(const Tensor<K, Rank> &t, size_t i, size_t j) const {
207 static_assert(Rank >= 2, "Cannot contract tensor of rank < 2");
208 assert(i < Rank && j < Rank && i != j);
209 assert(t.dimensions[i] == t.dimensions[j]);
210
213 using reg = typename SimdValue::reg;
214 constexpr size_t W = SimdValue::width;
215
216 std::array<size_t, Rank - 2> new_dims;
217 size_t d_idx = 0;
218 for (size_t d = 0; d < Rank; ++d) {
219 if (d != i && d != j)
220 new_dims[d_idx++] = t.dimensions[d];
221 }
222 Tensor<K, Rank - 2> result(new_dims);
223
224 std::array<size_t, Rank> indices{};
225 std::array<size_t, Rank - 2> reduced_idx{};
226
227 for (size_t flat = 0; flat < result.data.size(); ++flat) {
228 size_t tmp = flat;
229 d_idx = 0;
230 for (size_t d = 0; d < Rank; ++d) {
231 if (d == i || d == j) {
232 indices[d] = 0;
233 } else {
234 indices[d] = tmp % result.dimensions[d_idx];
235 tmp /= result.dimensions[d_idx++];
236 }
237 }
238
239 const size_t dim = t.dimensions[i];
240 size_t k = 0;
241 reg acc = SimdValue::zero();
242
243 for (; k + W - 1 < dim; k += W) {
244 alignas(64) size_t k_vec[W];
245 for (size_t w = 0; w < W; ++w) {
246 k_vec[w] = k + w;
247 indices[i] = k_vec[w];
248 indices[j] = k_vec[w];
249 }
250
251 alignas(64) K vals[W];
252 for (size_t w = 0; w < W; ++w)
253 vals[w] = t(indices);
254
255 reg vec = SimdValue::load(vals);
256 acc = SimdValue::add(acc, vec);
257 }
258
259 K sum = detail::reduce_sum(acc);
260 for (; k < dim; ++k) {
261 indices[i] = k;
262 indices[j] = k;
263 sum += t(indices);
264 }
265
266 d_idx = 0;
267 for (size_t d = 0; d < Rank; ++d) {
268 if (d != i && d != j)
270 }
271 result(reduced_idx) = sum;
272 }
273
274 return result;
275 }
276
277 template <size_t I, size_t J> Tensor<K, Rank - 2> contract_tensor() const;
278
288 const size_t rows = dimensions[0];
289 const size_t cols = dimensions[1];
290
291 Tensor<K, 2> result({cols, rows});
292
294 using reg = typename Simd::reg;
295 constexpr size_t W = Simd::width / sizeof(K);
296
297 for (size_t i = 0; i < rows; ++i) {
298 size_t j = 0;
299 for (; j + W - 1 < cols; j += W) {
300 reg vec = Simd::load(&(*this)({i, j}));
301
302 alignas(64) K temp[W];
303 Simd::store(temp, vec);
304 for (size_t k = 0; k < W; ++k)
305 result({j + k, i}) = temp[k];
306 }
307 for (; j < cols; ++j)
308 result({j, i}) = (*this)({i, j});
309 }
310
311 return result;
312 }
313
329 template <size_t R1, size_t R2>
331 const Tensor<K, R2> &B) {
333 using reg = typename Simd::reg;
334 constexpr size_t W = Simd::width / sizeof(K);
335 constexpr size_t R = R1 + R2;
336 constexpr size_t L3_BLOCK = 128;
337 constexpr size_t L1_BLOCK = 128;
338
339 std::array<size_t, R> shape;
340 for (size_t i = 0; i < R1; ++i)
341 shape[i] = A.dimensions[i];
342 for (size_t i = 0; i < R2; ++i)
343 shape[R1 + i] = B.dimensions[i];
344
345 Tensor<K, R> result(shape);
346
347 const size_t max_b_safe = B.data.size() - (W - 1);
348 std::fill(result.data.begin(), result.data.end(), K(0));
349#pragma omp parallel for collapse(2)
350 for (size_t a_outer = 0; a_outer < A.total_size; a_outer += L3_BLOCK) {
351 for (size_t b_outer = 0; b_outer < B.total_size; b_outer += L3_BLOCK) {
352 _mm_prefetch(&A.data[a_outer + L3_BLOCK], _MM_HINT_NTA);
353 _mm_prefetch(&B.data[b_outer + L3_BLOCK], _MM_HINT_NTA);
354 const size_t a_outer_end = std::min(a_outer + L3_BLOCK, A.total_size);
355 const size_t b_outer_end = std::min(b_outer + L3_BLOCK, B.total_size);
356
357 for (size_t a_inner = a_outer; a_inner < a_outer_end; a_inner += L1_BLOCK) {
358 for (size_t b_inner = b_outer; b_inner < b_outer_end; b_inner += L1_BLOCK) {
359 _mm_prefetch(&A.data[a_inner + L1_BLOCK], _MM_HINT_T0);
360 _mm_prefetch(&B.data[b_inner + L1_BLOCK], _MM_HINT_T0);
361 const size_t a_end = std::min(a_inner + L1_BLOCK, a_outer_end);
362 const size_t b_end = std::min(b_inner + L1_BLOCK, b_outer_end);
363 if (B.total_size < W) {
364 }
365 for (size_t a_flat = a_inner; a_flat < a_end; ++a_flat) {
366 std::array<size_t, R1> idx_A;
367 size_t tmp = a_flat;
368 for (ssize_t i = R1 - 1; i >= 0; --i) {
369 idx_A[i] = tmp % A.dimensions[i];
370 tmp /= A.dimensions[i];
371 }
372 K a_scalar = A(idx_A);
373 reg a_vec = Simd::set1(a_scalar);
374
375 if (b_inner < max_b_safe && W != 0) {
376 for (size_t b_flat = b_inner; b_flat + W <= b_end; b_flat += W) {
377 reg b_vec = Simd::loadu(&B.data[b_flat]);
378 reg c_vec = Simd::mul(a_vec, b_vec);
379
380 for (size_t w = 0; w < W; ++w) {
381 std::array<size_t, R2> idx_B;
382 std::array<size_t, R> idx_C;
383
384 size_t tmpb = b_flat + w;
385 for (ssize_t i = R2 - 1; i >= 0; --i) {
386 idx_B[i] = tmpb % B.dimensions[i];
387 tmpb /= B.dimensions[i];
388 }
389#pragma unroll(R1 + R2 - 1)
390 for (size_t i = 0; i < R1; ++i)
391 idx_C[i] = idx_A[i];
392#pragma unroll(R1 + R2 - 1)
393 for (size_t i = 0; i < R2; ++i)
394 idx_C[R1 + i] = idx_B[i];
395
396 result(idx_C) = Simd::extract(c_vec, w);
397 }
398 }
399 } else {
400 for (size_t b_flat = b_inner; b_flat < b_end; ++b_flat) {
401 std::array<size_t, R2> idx_B;
402 std::array<size_t, R> idx_C;
403
404 size_t tmpb = b_flat;
405 for (ssize_t i = R2 - 1; i >= 0; --i) {
406 idx_B[i] = tmpb % B.dimensions[i];
407 tmpb /= B.dimensions[i];
408 }
409 for (size_t i = 0; i < R1; ++i)
410 idx_C[i] = idx_A[i];
411 for (size_t i = 0; i < R2; ++i)
412 idx_C[R1 + i] = idx_B[i];
413
414 result(idx_C) = a_scalar * B(idx_B);
415 }
416 }
417 }
418 }
419 }
420 }
421 }
422
423 return result;
424 }
426};
427} // namespace tensorium
std::vector< K, AlignedAllocator< K, ALIGN > > aligned_vector
Type alias for a std::vector with aligned memory allocation.
Definition Allocator.hpp:111
Multi-dimensional tensor class with fixed rank and SIMD support.
Definition Tensor.hpp:25
size_t block_size
Definition Tensor.hpp:32
void resize(size_t d0, size_t d1, size_t d2)
Definition Tensor.hpp:87
void resize(const std::array< size_t, Rank > &dims)
Resize 2D tensor.
Definition Tensor.hpp:70
K & operator()(const std::array< size_t, Rank > &indices)
Definition Tensor.hpp:94
void update_strides()
Definition Tensor.hpp:63
size_t total_size
Definition Tensor.hpp:30
K value_type
Definition Tensor.hpp:27
void print() const
Print a 2D tensor (Rank == 2)
Definition Tensor.hpp:140
constexpr size_t W
Definition Tensor.hpp:164
__attribute__((always_inline, hot, flatten)) inline size_t flatten_index(const std __attribute__((always_inline, hot, flatten)) Tensor< K
Convert multi-index to flat index.
__attribute__((always_inline, hot, flatten)) inline size_t flatten_index_simd(const size_t *indices
Convert a multi-index into a flattened linear index using SIMD.
K & operator()(size_t i, size_t j, size_t k, size_t l)
Definition Tensor.hpp:109
__attribute__((always_inline, hot, flatten)) inline size_t flatten_index(const std transpose_simd() const
Definition Tensor.hpp:287
size_t acc
Definition Tensor.hpp:166
Tensor()
Default constructor.
Definition Tensor.hpp:35
K & operator()(size_t i, size_t j, size_t k)
Definition Tensor.hpp:114
typename Simd::reg reg
Definition Tensor.hpp:163
size_t flatten_index(size_t i, size_t j, size_t k, size_t l) const
Definition Tensor.hpp:57
std::array< size_t, Rank > strides
Definition Tensor.hpp:33
std::array< size_t, Rank > shape() const
Definition Tensor.hpp:62
aligned_vector< K > data
Definition Tensor.hpp:31
const K & operator()(size_t i, size_t j) const
Definition Tensor.hpp:113
Tensor(const std::array< size_t, Rank > &dims)
Construct tensor with given dimensions.
Definition Tensor.hpp:41
std::array< size_t, Rank > dimensions
Dimensions of the tensor (e.g., {4,4,4,4})
Definition Tensor.hpp:29
void resize(size_t d0, size_t d1)
Resize 2D tensor.
Definition Tensor.hpp:83
const K & operator()(size_t i, size_t j, size_t k) const
Definition Tensor.hpp:118
const K & operator()(const std::array< size_t, Rank > &indices) const
Definition Tensor.hpp:99
size_t i
Definition Tensor.hpp:167
void fill(K value)
Fill tensor with a constant value.
Definition Tensor.hpp:125
void print_shape() const
Print the shape (dimensions) of the tensor.
Definition Tensor.hpp:129
return acc
Definition Tensor.hpp:178
static Tensor< K, R1+R2 > tensor_product(const Tensor< K, R1 > &A, const Tensor< K, R2 > &B)
Compute the tensor (outer) product of two tensors.
Definition Tensor.hpp:330
K & operator()(size_t i, size_t j)
Definition Tensor.hpp:108
const K & operator()(size_t i, size_t j, size_t k, size_t l) const
Definition Tensor.hpp:104
Definition Derivate.hpp:24
Tensor< K, Rank - 2 > contract_tensor(const Tensor< K, Rank > &T)
Definition Functional.hpp:327
Definition SIMD.hpp:177