|
Tensorium
|
This is the complete list of members for tensorium::GemmKernelBigger< T >, including all inherited members.
| __attribute__((aligned(64))) | tensorium::GemmKernelBigger< T > | inlinestatic |
| __attribute__((aligned(64))) | tensorium::GemmKernelBigger< T > | static |
| __attribute__((aligned(64))) | tensorium::GemmKernelBigger< T > | static |
| BlockCols | tensorium::GemmKernelBigger< T > | static |
| BlockDepth | tensorium::GemmKernelBigger< T > | static |
| BlockRows | tensorium::GemmKernelBigger< T > | static |
| build_masks(__m256i *packed_mask_0, __m256i *packed_mask_1, int mr) | tensorium::GemmKernelBigger< T > | inlinestatic |
| fma_loop_00(T *blockA_packed, T *blockB_packed, reg *C_accum_00, reg *C_accum_01, reg *a0_packFloat8, reg *a1_packFloat8, reg *b_packFloat8, int kc) | tensorium::GemmKernelBigger< T > | inline |
| fma_loop_01(T *blockA_packed, T *blockB_packed, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *a0_packFloat8, reg *a1_packFloat8, reg *b_packFloat8, int kc) | tensorium::GemmKernelBigger< T > | inline |
| fma_loop_02(T *blockA_packed, T *blockB_packed, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *a0_packFloat8, reg *a1_packFloat8, reg *b_packFloat8, int kc) | tensorium::GemmKernelBigger< T > | inline |
| fma_loop_03(T *blockA_packed, T *blockB_packed, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *a0_packFloat8, reg *a1_packFloat8, reg *b_packFloat8, int kc) | tensorium::GemmKernelBigger< T > | inline |
| fma_loop_04(T *blockA_packed, T *blockB_packed, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, reg *a0_packFloat8, reg *a1_packFloat8, reg *b_packFloat8, int kc) | tensorium::GemmKernelBigger< T > | inline |
| fma_loop_05(T *blockA_packed, T *blockB_packed, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, reg *C_accum_50, reg *C_accum_51, reg *a0_packFloat8, reg *a1_packFloat8, reg *b_packFloat8, int kc) | tensorium::GemmKernelBigger< T > | inline |
| kernel_16x6_load_accum(T *blockA_packed, T *blockB_packed, T *C, int mr, int nr, int kc, int M) | tensorium::GemmKernelBigger< T > | inline |
| kernel_16x6_zero_init_accum(T *blockA_packed, T *blockB_packed, T *C, int mr, int nr, int kc, int M) | tensorium::GemmKernelBigger< T > | inline |
| load_accum_00(T *C, reg *C_accum_00, reg *C_accum_01, int M) | tensorium::GemmKernelBigger< T > | inline |
| load_accum_01(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, int M) | tensorium::GemmKernelBigger< T > | inline |
| load_accum_02(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, int M) | tensorium::GemmKernelBigger< T > | inline |
| load_accum_03(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, int M) | tensorium::GemmKernelBigger< T > | inline |
| load_accum_04(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, int M) | tensorium::GemmKernelBigger< T > | inline |
| load_accum_05(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, reg *C_accum_50, reg *C_accum_51, int M) | tensorium::GemmKernelBigger< T > | inline |
| maskload_accum_00(T *C, reg *C_accum_00, reg *C_accum_01, __m256i packed_mask_0, __m256i packed_mask_1, int M) | tensorium::GemmKernelBigger< T > | inline |
| maskload_accum_01(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, __m256i packed_mask_0, __m256i packed_mask_1, int M) | tensorium::GemmKernelBigger< T > | inline |
| maskload_accum_02(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, __m256i packed_mask_0, __m256i packed_mask_1, int M) | tensorium::GemmKernelBigger< T > | inline |
| maskload_accum_03(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, __m256i packed_mask_0, __m256i packed_mask_1, int M) | tensorium::GemmKernelBigger< T > | inline |
| maskload_accum_04(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, __m256i packed_mask_0, __m256i packed_mask_1, int M) | tensorium::GemmKernelBigger< T > | inline |
| maskload_accum_05(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, reg *C_accum_50, reg *C_accum_51, __m256i packed_mask_0, __m256i packed_mask_1, int M) | tensorium::GemmKernelBigger< T > | inline |
| maskstore_accum_00(T *C, reg *C_accum_00, reg *C_accum_01, __m256i packed_mask_0, __m256i packed_mask_1, int M) | tensorium::GemmKernelBigger< T > | inline |
| maskstore_accum_01(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, __m256i packed_mask_0, __m256i packed_mask_1, int M) | tensorium::GemmKernelBigger< T > | inline |
| maskstore_accum_02(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, __m256i packed_mask_0, __m256i packed_mask_1, int M) | tensorium::GemmKernelBigger< T > | inline |
| maskstore_accum_03(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, __m256i packed_mask_0, __m256i packed_mask_1, int M) | tensorium::GemmKernelBigger< T > | inline |
| maskstore_accum_04(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, __m256i packed_mask_0, __m256i packed_mask_1, int M) | tensorium::GemmKernelBigger< T > | inline |
| maskstore_accum_05(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, reg *C_accum_50, reg *C_accum_51, __m256i packed_mask_0, __m256i packed_mask_1, int M) | tensorium::GemmKernelBigger< T > | inline |
| matmul(T *A, T *B, T *C, int M, int N, int K) | tensorium::GemmKernelBigger< T > | inline |
| NThreads | tensorium::GemmKernelBigger< T > | static |
| pack_blockA(T *A, T *blockA_packed, int mc, int kc, int M) | tensorium::GemmKernelBigger< T > | inline |
| pack_blockB(T *B, T *blockB_packed, int nc, int kc, int K) | tensorium::GemmKernelBigger< T > | inline |
| pack_panelA(T *A, T *blockA_packed, int mr, int kc, int M) | tensorium::GemmKernelBigger< T > | inline |
| pack_panelB(T *B, T *blockB_packed, int nr, int kc, int K) | tensorium::GemmKernelBigger< T > | inline |
| reg typedef | tensorium::GemmKernelBigger< T > | |
| Simd typedef | tensorium::GemmKernelBigger< T > | |
| SimdWidth | tensorium::GemmKernelBigger< T > | static |
| store_accum_00(T *C, reg *C_accum_00, reg *C_accum_01, int M) | tensorium::GemmKernelBigger< T > | inline |
| store_accum_01(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, int M) | tensorium::GemmKernelBigger< T > | inline |
| store_accum_02(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, int M) | tensorium::GemmKernelBigger< T > | inline |
| store_accum_03(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, int M) | tensorium::GemmKernelBigger< T > | inline |
| store_accum_04(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, int M) | tensorium::GemmKernelBigger< T > | inline |
| store_accum_05(T *C, reg *C_accum_00, reg *C_accum_01, reg *C_accum_10, reg *C_accum_11, reg *C_accum_20, reg *C_accum_21, reg *C_accum_30, reg *C_accum_31, reg *C_accum_40, reg *C_accum_41, reg *C_accum_50, reg *C_accum_51, int M) | tensorium::GemmKernelBigger< T > | inline |
| TileCols | tensorium::GemmKernelBigger< T > | static |
| TileRows | tensorium::GemmKernelBigger< T > | static |