6#include "arm_simd/helpers/concepts.hpp"
7#include "arm_simd/helpers/multivector.hpp"
8#include "arm_simd/helpers/scalar.hpp"
9#include "arm_simd/helpers/store.hpp"
11#ifdef __ARM_FEATURE_MVE
17#ifdef ARGON_PLATFORM_SIMDE
20#define ace [[gnu::always_inline]] constexpr
22#define ace [[gnu::always_inline]] inline
31template <
size_t str
ide,
typename scalar_type,
typename intrinsic_type>
32ace
void store_interleaved(scalar_type* ptr, simd::MultiVector_t<intrinsic_type, stride> multi_vec) {
33 static_assert(stride > 1 && stride < 5,
"Interleaving Stores can only be performed with a stride of 2, 3, or 4");
34 if constexpr (stride == 2) {
35 simd::store2(ptr, multi_vec);
36#ifndef ARGON_PLATFORM_MVE
37 }
else if constexpr (stride == 3) {
38 simd::store3(ptr, multi_vec);
40 }
else if constexpr (stride == 4) {
41 simd::store4(ptr, multi_vec);
49template <
size_t str
ide,
typename scalar_type,
typename argon_type>
50ace
void store_interleaved(scalar_type* ptr, std::array<argon_type, stride> multi_vec) {
51 using intrinsic_type =
typename argon_type::vector_type;
52 using multivec_type = simd::MultiVector_t<intrinsic_type, stride>;
53 using array_type = std::array<argon_type, stride>;
57 static_assert(std::is_standard_layout_v<array_type>);
58 static_assert(
sizeof(multivec_type) ==
sizeof(array_type),
59 "std::array isn't layout-compatible with this NEON multi-vector.");
61 store_interleaved<stride, scalar_type, intrinsic_type>(ptr, *(multivec_type*)multi_vec.data());
67template <
typename scalar_type,
typename... argon_types>
68ace
void store_interleaved(scalar_type* ptr, argon_types... vecs) {
69 static_assert(
sizeof...(vecs) > 1 &&
sizeof...(vecs) < 5,
70 "Interleaving Stores can only be performed with a stride of 2, 3, or 4");
71 static_assert((std::is_same_v<scalar_type, simd::Scalar_t<typename argon_types::vector_type>> && ...),
72 "All vectors must be of the same scalar type.");
74 store_interleaved<
sizeof...(argon_types)>(
75 ptr, std::array<std::common_type_t<argon_types...>,
sizeof...(vecs)>{std::forward<argon_types>(vecs)...});
81template <
typename scalar_type,
typename argon_type>
82 requires std::is_same_v<scalar_type, simd::Scalar_t<typename argon_type::vector_type>>
83ace
void store(scalar_type* ptr, argon_type vector) {
84 simd::store1(ptr, vector);
90template <
typename scalar_type, simd::is_vector_type
intrinsic_type>
91 requires std::is_same_v<scalar_type, simd::Scalar_t<intrinsic_type>>
92ace
void store(scalar_type* ptr, intrinsic_type vector) {
93 simd::store1(ptr, vector);
96#if defined(__clang__) || (__GNUC__ > 13)
106template <
size_t stride = 1,
typename scalar_type,
typename... intrinsic_types>
107 requires(std::is_same_v<scalar_type, simd::Scalar_t<intrinsic_types>> && ...)
108ace
void store(scalar_type* ptr, intrinsic_types... vectors) {
110 using intrinsic_type =
typename std::tuple_element_t<0, std::tuple<intrinsic_types...>>;
112 constexpr size_t size =
sizeof...(vectors);
113 constexpr std::array<intrinsic_type, size> vec_array = {std::move(vectors)...};
116 static_assert(0 < stride && stride < 5,
"Stores can only be performed with a stride of 1, 2, 3, or 4");
117 static_assert(size >= stride,
"You cannot store less vectors than your stride!");
118 static_assert(size % stride == 0,
"The number of vectors being stored must be a multiple of the stride!");
120 if constexpr (stride == 1) {
121 constexpr size_t tail_size = size % 4;
122 constexpr size_t head_size = size - tail_size;
124 if constexpr (head_size > 0) {
125 for (; i < head_size; i += 4) {
126 using multi_type = simd::MultiVector_t<intrinsic_type, 4>;
127 simd::store1_x4(ptr, *(multi_type*)&vec_array[i]);
128 ptr += (
sizeof(intrinsic_type) /
sizeof(*ptr)) * 4;
131 if constexpr (tail_size == 1) {
132 simd::store1(ptr, &vec_array[i]);
133 }
else if constexpr (tail_size == 2) {
134 using tail_multi_type = simd::MultiVector_t<intrinsic_type, 2>;
135 simd::store1_x2(ptr, *(tail_multi_type*)&vec_array[i]);
136 }
else if constexpr (tail_size == 3) {
137 using tail_multi_type = simd::MultiVector_t<intrinsic_type, 3>;
138 simd::store1_x3(ptr, *(tail_multi_type*)&vec_array[i]);
141#pragma GCC unroll size
142 for (
auto v : vec_array | std::views::chunk(stride)) {
143 if constexpr (stride == 2) {
144 store_interleaved<2>(ptr, v.begin());
145 }
else if constexpr (stride == 3) {
146 store_interleaved<3>(ptr, v.begin());
147 }
else if constexpr (stride == 4) {
148 store_interleaved<4>(ptr, v.begin());
150 ptr +=
sizeof(intrinsic_type) /
sizeof(*ptr);
159template <
size_t stride = 1,
typename scalar_type,
typename... argon_types>
160 requires(std::is_same_v<scalar_type, simd::Scalar_t<typename argon_types::vector_type>> && ...)
161ace
void store(scalar_type* ptr, argon_types... vectors) {
162 store<stride>(ptr, std::forward<typename argon_types::vector_type>(vectors)...);
171template <
int lane,
size_t str
ide,
typename scalar_type,
typename argon_type>
172ace
void store_lane_interleaved(scalar_type* ptr, std::array<argon_type, stride> multi_vec) {
173 using intrinsic_type =
typename argon_type::vector_type;
174 using multivec_type = simd::MultiVector_t<intrinsic_type, stride>;
175 using array_type = std::array<argon_type, 2>;
179 static_assert(std::is_standard_layout_v<array_type>);
180 static_assert(std::is_trivial_v<array_type>);
181 static_assert(
sizeof(multivec_type) ==
sizeof(array_type),
182 "std::array isn't layout-compatible with this NEON multi-vector.");
184 store_lane_interleaved<lane, stride, scalar_type, intrinsic_type>(ptr, *(multivec_type*)multi_vec.data());
192template <
int lane,
size_t str
ide,
typename scalar_type,
typename intrinsic_type>
193ace
void store_lane_interleaved(scalar_type* ptr, simd::MultiVector_t<intrinsic_type, stride> multi_vec) {
194 static_assert(stride > 1 && stride < 5,
"Interleaving Stores can only be performed with a stride of 2, 3, or 4");
195 if constexpr (stride == 2) {
196 simd::store2_lane<lane>(ptr, multi_vec);
197 }
else if constexpr (stride == 3) {
198 simd::store3_lane<lane>(ptr, multi_vec);
199 }
else if constexpr (stride == 4) {
200 simd::store4_lane<lane>(ptr, multi_vec);