Argon 0.1.0
Loading...
Searching...
No Matches
store.hpp
1#pragma once
2#include <array>
3#include <cstddef>
4#include <ranges>
5#include <type_traits>
6#include "arm_simd/helpers/concepts.hpp"
7#include "arm_simd/helpers/multivector.hpp"
8#include "arm_simd/helpers/scalar.hpp"
9#include "arm_simd/helpers/store.hpp"
10
11#ifdef __ARM_FEATURE_MVE
12#define simd mve
13#else
14#define simd neon
15#endif
16
17#ifdef ARGON_PLATFORM_SIMDE
18#define ace
19#elifdef __clang__
20#define ace [[gnu::always_inline]] constexpr
21#else
22#define ace [[gnu::always_inline]] inline
23#endif
24
25namespace argon {
26
31template <size_t stride, typename scalar_type, typename intrinsic_type>
32ace void store_interleaved(scalar_type* ptr, simd::MultiVector_t<intrinsic_type, stride> multi_vec) {
33 static_assert(stride > 1 && stride < 5, "Interleaving Stores can only be performed with a stride of 2, 3, or 4");
34 if constexpr (stride == 2) {
35 simd::store2(ptr, multi_vec);
36#ifndef ARGON_PLATFORM_MVE
37 } else if constexpr (stride == 3) {
38 simd::store3(ptr, multi_vec);
39#endif
40 } else if constexpr (stride == 4) {
41 simd::store4(ptr, multi_vec);
42 }
43}
44
49template <size_t stride, typename scalar_type, typename argon_type>
50ace void store_interleaved(scalar_type* ptr, std::array<argon_type, stride> multi_vec) {
51 using intrinsic_type = typename argon_type::vector_type;
52 using multivec_type = simd::MultiVector_t<intrinsic_type, stride>;
53 using array_type = std::array<argon_type, stride>;
54
55 // Since we're using a dirty ugly hack of reinterpreting a C array as a std::array,
56 // the validity and POD-ness of std::array needs to be verified
57 static_assert(std::is_standard_layout_v<array_type>);
58 static_assert(sizeof(multivec_type) == sizeof(array_type),
59 "std::array isn't layout-compatible with this NEON multi-vector.");
60
61 store_interleaved<stride, scalar_type, intrinsic_type>(ptr, *(multivec_type*)multi_vec.data());
62}
63
67template <typename scalar_type, typename... argon_types>
68ace void store_interleaved(scalar_type* ptr, argon_types... vecs) {
69 static_assert(sizeof...(vecs) > 1 && sizeof...(vecs) < 5,
70 "Interleaving Stores can only be performed with a stride of 2, 3, or 4");
71 static_assert((std::is_same_v<scalar_type, simd::Scalar_t<typename argon_types::vector_type>> && ...),
72 "All vectors must be of the same scalar type.");
73
74 store_interleaved<sizeof...(argon_types)>(
75 ptr, std::array<std::common_type_t<argon_types...>, sizeof...(vecs)>{std::forward<argon_types>(vecs)...});
76}
77
81template <typename scalar_type, typename argon_type>
82 requires std::is_same_v<scalar_type, simd::Scalar_t<typename argon_type::vector_type>>
83ace void store(scalar_type* ptr, argon_type vector) {
84 simd::store1(ptr, vector);
85}
86
90template <typename scalar_type, simd::is_vector_type intrinsic_type>
91 requires std::is_same_v<scalar_type, simd::Scalar_t<intrinsic_type>>
92ace void store(scalar_type* ptr, intrinsic_type vector) {
93 simd::store1(ptr, vector);
94}
95
96#if defined(__clang__) || (__GNUC__ > 13)
106template <size_t stride = 1, typename scalar_type, typename... intrinsic_types>
107 requires(std::is_same_v<scalar_type, simd::Scalar_t<intrinsic_types>> && ...)
108ace void store(scalar_type* ptr, intrinsic_types... vectors) {
109 // TODO: C++26 change to `typename intrinsic_types...[0]`
110 using intrinsic_type = typename std::tuple_element_t<0, std::tuple<intrinsic_types...>>;
111
112 constexpr size_t size = sizeof...(vectors);
113 constexpr std::array<intrinsic_type, size> vec_array = {std::move(vectors)...};
114
115 // Best case scenerio: we know both length and stride
116 static_assert(0 < stride && stride < 5, "Stores can only be performed with a stride of 1, 2, 3, or 4");
117 static_assert(size >= stride, "You cannot store less vectors than your stride!");
118 static_assert(size % stride == 0, "The number of vectors being stored must be a multiple of the stride!");
119
120 if constexpr (stride == 1) {
121 constexpr size_t tail_size = size % 4;
122 constexpr size_t head_size = size - tail_size;
123 size_t i = 0;
124 if constexpr (head_size > 0) {
125 for (; i < head_size; i += 4) {
126 using multi_type = simd::MultiVector_t<intrinsic_type, 4>;
127 simd::store1_x4(ptr, *(multi_type*)&vec_array[i]);
128 ptr += (sizeof(intrinsic_type) / sizeof(*ptr)) * 4; // increment output pointer
129 }
130 }
131 if constexpr (tail_size == 1) { // 1-element tail
132 simd::store1(ptr, &vec_array[i]);
133 } else if constexpr (tail_size == 2) {
134 using tail_multi_type = simd::MultiVector_t<intrinsic_type, 2>;
135 simd::store1_x2(ptr, *(tail_multi_type*)&vec_array[i]);
136 } else if constexpr (tail_size == 3) {
137 using tail_multi_type = simd::MultiVector_t<intrinsic_type, 3>;
138 simd::store1_x3(ptr, *(tail_multi_type*)&vec_array[i]);
139 }
140 } else {
141#pragma GCC unroll size
142 for (auto v : vec_array | std::views::chunk(stride)) {
143 if constexpr (stride == 2) {
144 store_interleaved<2>(ptr, v.begin());
145 } else if constexpr (stride == 3) {
146 store_interleaved<3>(ptr, v.begin());
147 } else if constexpr (stride == 4) {
148 store_interleaved<4>(ptr, v.begin());
149 }
150 ptr += sizeof(intrinsic_type) / sizeof(*ptr); // increment output pointer
151 }
152 }
153}
154
159template <size_t stride = 1, typename scalar_type, typename... argon_types>
160 requires(std::is_same_v<scalar_type, simd::Scalar_t<typename argon_types::vector_type>> && ...)
161ace void store(scalar_type* ptr, argon_types... vectors) {
162 store<stride>(ptr, std::forward<typename argon_types::vector_type>(vectors)...);
163}
164#endif
165
171template <int lane, size_t stride, typename scalar_type, typename argon_type>
172ace void store_lane_interleaved(scalar_type* ptr, std::array<argon_type, stride> multi_vec) {
173 using intrinsic_type = typename argon_type::vector_type;
174 using multivec_type = simd::MultiVector_t<intrinsic_type, stride>;
175 using array_type = std::array<argon_type, 2>;
176
177 // Since we're using a dirty ugly hack of reinterpreting a C array as a std::array,
178 // the validity and POD-ness of std::array needs to be verified
179 static_assert(std::is_standard_layout_v<array_type>);
180 static_assert(std::is_trivial_v<array_type>);
181 static_assert(sizeof(multivec_type) == sizeof(array_type),
182 "std::array isn't layout-compatible with this NEON multi-vector.");
183
184 store_lane_interleaved<lane, stride, scalar_type, intrinsic_type>(ptr, *(multivec_type*)multi_vec.data());
185}
186
192template <int lane, size_t stride, typename scalar_type, typename intrinsic_type>
193ace void store_lane_interleaved(scalar_type* ptr, simd::MultiVector_t<intrinsic_type, stride> multi_vec) {
194 static_assert(stride > 1 && stride < 5, "Interleaving Stores can only be performed with a stride of 2, 3, or 4");
195 if constexpr (stride == 2) {
196 simd::store2_lane<lane>(ptr, multi_vec);
197 } else if constexpr (stride == 3) {
198 simd::store3_lane<lane>(ptr, multi_vec);
199 } else if constexpr (stride == 4) {
200 simd::store4_lane<lane>(ptr, multi_vec);
201 }
202}
203} // namespace argon
204#undef ace
205#undef simd