Argon 0.1.0
Loading...
Searching...
No Matches
argon_half.hpp
1#pragma once
2#include "vector.hpp"
3
4#ifndef ARGON_PLATFORM_MVE
5#ifdef __clang__
6#define ace [[gnu::always_inline]] constexpr
7#else
8#define ace [[gnu::always_inline]] inline
9#endif
10template <typename ScalarType>
12
18template <typename ScalarType>
19 requires std::same_as<ScalarType, double> || std::same_as<ScalarType, uint64_t> || std::same_as<ScalarType, int64_t>
20class ArgonHalf<ScalarType> : public argon::Vector<neon::Vec64_t<ScalarType>> {
22
23 public:
24 using vector_type = neon::Vec64_t<ScalarType>;
25 using lane_type = const argon::Lane<vector_type>;
26
27 static_assert(neon::is_doubleword_v<vector_type>);
28
29 static constexpr size_t bytes = 8;
30 static constexpr size_t lanes = bytes / sizeof(ScalarType);
31
32 using T::T;
33 ace ArgonHalf(argon::Vector<vector_type> vec) : T{std::move(vec)} {};
34 ace ArgonHalf(const ArgonHalf&) = default;
35 ace ArgonHalf(ArgonHalf&&) = default;
36 ace ArgonHalf& operator=(const ArgonHalf&) = default;
37 ace ArgonHalf& operator=(ArgonHalf&&) = default;
38
41 ace static ArgonHalf<ScalarType> Create(uint64_t a) { return neon::create<vector_type>(a); }
42
45 template <typename NewScalarType>
46 ace ArgonHalf<NewScalarType> As() const {
47 return neon::reinterpret<neon::Vec64_t<NewScalarType>>(this->vec_);
48 }
49
52 ace ArgonHalf<ScalarType> TableLookup(ArgonHalf<ScalarType> idx) { return neon::table_lookup1(this->vec_, idx); }
53
57 ace ArgonHalf<ScalarType> TableExtension(ArgonHalf<ScalarType> b, ArgonHalf<ScalarType> idx) {
58 return neon::table_extension1(this->vec_, b, idx);
59 }
60
63 template <size_t NumTables>
64 ace ArgonHalf<ScalarType> TableExtension(std::array<ArgonHalf<ScalarType>, NumTables> b, ArgonHalf<ScalarType> idx) {
65 return TableExtension<NumTables>((vector_type*)b.data(), idx);
66 }
67
70 template <size_t NumTables>
71 ace ArgonHalf<ScalarType> TableExtension(vector_type* b, ArgonHalf<ScalarType> idx) {
72 static_assert(NumTables > 1 && NumTables < 5, "Table Extension can only be performed with 1, 2, 3, or 4 tables");
73
74 using multivec_type = neon::MultiVector_t<vector_type, NumTables>;
75
76 multivec_type multivector = *(multivec_type*)b;
77
78 if constexpr (NumTables == 2) {
79 return neon::table_extension2(this->vec_, multivector, idx);
80 } else if constexpr (NumTables == 3) {
81 return neon::table_extension3(this->vec_, multivector, idx);
82 } else if constexpr (NumTables == 4) {
83 return neon::table_extension4(this->vec_, multivector, idx);
84 }
85 }
86
89 template <typename U>
90 ace ArgonHalf<U> ConvertTo() {
91 return neon::convert<typename neon::Vec64<U>::type>(this->vec_);
92 }
93
97 template <typename U, int fracbits>
98 requires(std::is_same_v<U, uint32_t> || std::is_same_v<U, int32_t> || std::is_same_v<U, float>)
99 ace ArgonHalf<U> ConvertTo() {
100 if constexpr (std::is_same_v<U, float>) {
101 return neon::convert_n<fracbits>(this->vec_);
102 } else if constexpr (std::is_unsigned_v<U>) {
103 return neon::convert_n_unsigned<fracbits>(this->vec_);
104 } else if constexpr (std::is_signed_v<U>) {
105 return neon::convert_n_signed<fracbits>(this->vec_);
106 }
107 }
108
110 ace Argon<ScalarType> CombineWith(ArgonHalf<ScalarType> high) const { return neon::combine(this->vec_, high); }
111
113 ace ArgonHalf<ScalarType> Reverse() const { return this->Reverse64bit(); }
114};
115
122template <argon::helpers::has_larger ScalarType>
123class ArgonHalf<ScalarType> : public argon::Vector<neon::Vec64_t<ScalarType>> {
124 using T = argon::Vector<neon::Vec64_t<ScalarType>>;
125 using next_larger = typename argon::helpers::NextLarger_t<ScalarType>;
126 using argon_next_larger = Argon<next_larger>;
127
128 public:
129 using vector_type = neon::Vec64_t<ScalarType>;
130 using lane_type = const argon::Lane<vector_type>;
131
132 static_assert(neon::is_doubleword_v<vector_type>);
133
134 static constexpr size_t bytes = 8;
135 static constexpr size_t lanes = bytes / sizeof(ScalarType);
136
137 using T::T;
138
141 ace static ArgonHalf<ScalarType> Create(uint64_t a) { return neon::create<vector_type>(a); }
142
145 template <typename NewScalarType>
146 ace ArgonHalf<NewScalarType> As() const {
147 return neon::reinterpret<neon::Vec64_t<NewScalarType>>(this->vec_);
148 }
149
151 ace argon_next_larger AddLong(ArgonHalf<ScalarType> b) const { return neon::add_long(this->vec_, b); }
152
154 ace argon_next_larger MultiplyLong(ArgonHalf<ScalarType> b) const { return neon::multiply_long(this->vec_, b); }
155
157 ace argon_next_larger MultiplyLong(ScalarType b) const { return neon::multiply_long(this->vec_, b); }
158
160 ace argon_next_larger MultiplyLong(lane_type b) const {
161 return neon::multiply_long_lane(this->vec_, b.vec(), b.lane());
162 }
163
165 ace argon_next_larger MultiplyAddLong(ArgonHalf<ScalarType> b) const {
166 return neon::multiply_add_long(this->vec_, b);
167 }
168
170 ace argon_next_larger MultiplyAddLong(ScalarType b) const { return neon::multiply_add_long(this->vec_, b); }
171
173 ace argon_next_larger MultiplyAddLong(lane_type b) const {
174 return neon::multiply_add_long_lane(this->vec_, b.vec(), b.lane());
175 }
176
178 ace argon_next_larger MultiplyDoubleSaturateLong(ArgonHalf<ScalarType> b) const
179 requires(std::is_same_v<vector_type, int16x4_t> || std::is_same_v<vector_type, int32x2_t>)
180 {
181 return neon::multiply_double_saturate_long(this->vec_, b);
182 }
183
185 ace argon_next_larger MultiplyDoubleSaturateLong(ScalarType b) const
186 requires(std::is_same_v<vector_type, int16x4_t> || std::is_same_v<vector_type, int32x2_t>)
187 {
188 return neon::multiply_double_saturate_long(this->vec_, b);
189 }
190
192 ace argon_next_larger MultiplyDoubleSaturateLong(lane_type b) const
193 requires(std::is_same_v<vector_type, int16x4_t> || std::is_same_v<vector_type, int32x2_t>)
194 {
195 return neon::multiply_double_saturate_long_lane(this->vec_, b.vec(), b.lane());
196 }
197
199 ace argon_next_larger MultiplyDoubleAddSaturateLong(ArgonHalf<ScalarType> b) const
200 requires(std::is_same_v<vector_type, int16x4_t> || std::is_same_v<vector_type, int32x2_t>)
201 {
202 return neon::multiply_double_add_saturate_long(this->vec_, b);
203 }
204
206 ace argon_next_larger MultiplyDoubleAddSaturateLong(ScalarType b) const
207 requires(std::is_same_v<vector_type, int16x4_t> || std::is_same_v<vector_type, int32x2_t>)
208 {
209 return neon::multiply_double_add_saturate_long(this->vec_, b);
210 }
211
213 ace argon_next_larger MultiplyDoubleAddSaturateLong(lane_type b) const
214 requires(std::is_same_v<vector_type, int16x4_t> || std::is_same_v<vector_type, int32x2_t>)
215 {
216 return neon::multiply_double_add_saturate_long_lane(this->vec_, b.vec(), b.lane());
217 }
218
220 ace argon_next_larger SubtractLong(ArgonHalf<ScalarType> b) const { return neon::subtract_long(this->vec_, b); }
221
223 ace argon_next_larger SubtractAbsoluteLong(ArgonHalf<ScalarType> b) const {
224 return neon::subtract_absolute_long(this->vec_, b);
225 }
226
228 ace argon_next_larger PairwiseAddLong() const { return neon::pairwise_add_long(this->vec_); }
229
231 ace argon_next_larger PairwiseAddLong(ArgonHalf<typename argon::helpers::NextSmaller<ScalarType>> b) const {
232 return neon::pairwise_add_long(this->vec_, b);
233 }
234
236 ace argon_next_larger Widen() const { return neon::move_long(this->vec_); }
237
240 template <size_t n>
241 ace argon_next_larger ShiftLeftLong() {
242 return neon::shift_left_long<n>(this->vec_);
243 }
244
247 ace ArgonHalf<ScalarType> TableLookup(ArgonHalf<ScalarType> idx) { return neon::table_lookup1(this->vec_, idx); }
248
252 ace ArgonHalf<ScalarType> TableExtension(ArgonHalf<ScalarType> b, ArgonHalf<ScalarType> idx) {
253 return neon::table_extension1(this->vec_, b, idx);
254 }
255
258 template <size_t NumTables>
259 ace ArgonHalf<ScalarType> TableExtension(std::array<ArgonHalf<ScalarType>, NumTables> b, ArgonHalf<ScalarType> idx) {
260 return TableExtension<NumTables>((vector_type*)b.data(), idx);
261 }
262
265 template <size_t NumTables>
266 ace ArgonHalf<ScalarType> TableExtension(vector_type* b, ArgonHalf<ScalarType> idx) {
267 static_assert(NumTables > 1 && NumTables < 5, "Table Extension can only be performed with 1, 2, 3, or 4 tables");
268
269 using multivec_type = neon::MultiVector_t<vector_type, NumTables>;
270
271 multivec_type multivector = *(multivec_type*)b;
272
273 if constexpr (NumTables == 2) {
274 return neon::table_extension2(this->vec_, multivector, idx);
275 } else if constexpr (NumTables == 3) {
276 return neon::table_extension3(this->vec_, multivector, idx);
277 } else if constexpr (NumTables == 4) {
278 return neon::table_extension4(this->vec_, multivector, idx);
279 }
280 }
281
284 template <typename U>
285 ace ArgonHalf<U> ConvertTo() {
286 return neon::convert<typename neon::Vec64<U>::type>(this->vec_);
287 }
288
292 template <typename U, int fracbits>
293 requires(std::is_same_v<U, uint32_t> || std::is_same_v<U, int32_t> || std::is_same_v<U, float>)
294 ace ArgonHalf<U> ConvertTo() {
295 if constexpr (std::is_same_v<U, float>) {
296 return neon::convert_n<fracbits>(this->vec_);
297 } else if constexpr (std::is_unsigned_v<U>) {
298 return neon::convert_n_unsigned<fracbits>(this->vec_);
299 } else if constexpr (std::is_signed_v<U>) {
300 return neon::convert_n_signed<fracbits>(this->vec_);
301 }
302 }
303
305 ace Argon<ScalarType> CombineWith(ArgonHalf<ScalarType> high) const { return neon::combine(this->vec_, high); }
306
308 ace ArgonHalf<ScalarType> Reverse() const { return this->Reverse64bit(); }
309};
310
311template <class... ArgTypes>
312 requires(sizeof...(ArgTypes) > 1)
313ArgonHalf(ArgTypes...) -> ArgonHalf<std::tuple_element_t<0, std::tuple<ArgTypes...>>>;
314
315template <typename V>
316 requires std::is_scalar_v<V>
317ace ArgonHalf<V> operator+(const V a, const ArgonHalf<V> b) {
318 return b.Add(a);
319}
320
321template <typename V>
322 requires std::is_scalar_v<V>
323ace ArgonHalf<V> operator-(const V a, const ArgonHalf<V> b) {
324 return ArgonHalf<V>{a}.Subtract(b);
325}
326
327template <typename V>
328 requires std::is_scalar_v<V>
329ace ArgonHalf<V> operator*(const V a, const ArgonHalf<V> b) {
330 return b.Multiply(a);
331}
332
333template <typename V>
334 requires std::is_scalar_v<V>
335ace ArgonHalf<V> operator/(const V a, const ArgonHalf<V> b) {
336 return ArgonHalf<V>{a}.Divide(b);
337}
338
339namespace std {
340template <typename T>
341struct tuple_size<ArgonHalf<T>> {
342 static constexpr size_t value = ArgonHalf<T>::lanes;
343};
344template <size_t Index, typename T>
345struct tuple_element<Index, ArgonHalf<T>> {
346 static_assert(Index < ArgonHalf<T>::lanes);
348};
349} // namespace std
350#endif
351#undef ace
ace ArgonHalf< ScalarType > TableLookup(ArgonHalf< ScalarType > idx)
Look up each index lane in idx from this vector acting as a one-register table.
Definition argon_half.hpp:52
ace argon_next_larger MultiplyDoubleSaturateLong(ArgonHalf< ScalarType > b) const
Multiply, double, and saturate long: saturate(2 * this * b) widened (vector × vector).
Definition argon_half.hpp:178
ace argon_next_larger MultiplyLong(ScalarType b) const
Multiply this by a scalar, widening each product (vector × scalar).
Definition argon_half.hpp:157
ace argon_next_larger MultiplyAddLong(ScalarType b) const
Multiply-accumulate long (vector × scalar).
Definition argon_half.hpp:170
ace argon_next_larger Widen() const
Zero-extend (widen) each lane to the next-larger element type.
Definition argon_half.hpp:236
ace argon_next_larger MultiplyLong(ArgonHalf< ScalarType > b) const
Multiply this and b, widening each product to the next-larger element type (vector × vector).
Definition argon_half.hpp:154
static ace ArgonHalf< ScalarType > Create(uint64_t a)
Create a new ArgonHalf from a raw 64-bit value.
Definition argon_half.hpp:41
ace argon_next_larger MultiplyDoubleSaturateLong(lane_type b) const
Multiply, double, and saturate long (vector × lane).
Definition argon_half.hpp:192
ace argon_next_larger MultiplyDoubleAddSaturateLong(lane_type b) const
Multiply, double, add, and saturate long (vector × lane).
Definition argon_half.hpp:213
ace ArgonHalf< ScalarType > TableExtension(ArgonHalf< ScalarType > b, ArgonHalf< ScalarType > idx)
Extend a previous table-lookup result using this vector as an additional table (1 extension register)...
Definition argon_half.hpp:57
ace argon_next_larger SubtractAbsoluteLong(ArgonHalf< ScalarType > b) const
Absolute difference and widen: |this - b| widened to the next-larger element type.
Definition argon_half.hpp:223
ace argon_next_larger SubtractLong(ArgonHalf< ScalarType > b) const
Subtract b from this, widening each result to the next-larger element type.
Definition argon_half.hpp:220
ace argon_next_larger PairwiseAddLong(ArgonHalf< typename argon::helpers::NextSmaller< ScalarType > > b) const
Pairwise add this and the next-smaller-type vector b, widening into argon_next_larger.
Definition argon_half.hpp:231
ace argon_next_larger AddLong(ArgonHalf< ScalarType > b) const
Add this and b, widening each lane to the next-larger element type.
Definition argon_half.hpp:151
ace ArgonHalf< ScalarType > TableExtension(std::array< ArgonHalf< ScalarType >, NumTables > b, ArgonHalf< ScalarType > idx)
Multi-register table extension lookup (2–4 registers) supplied as std::array.
Definition argon_half.hpp:64
ace argon_next_larger MultiplyAddLong(ArgonHalf< ScalarType > b) const
Multiply this and b, widen each product, and prepare for long accumulation (vector × vector).
Definition argon_half.hpp:165
ace argon_next_larger MultiplyDoubleAddSaturateLong(ScalarType b) const
Multiply, double, add, and saturate long (vector × scalar).
Definition argon_half.hpp:206
ace ArgonHalf< ScalarType > TableExtension(vector_type *b, ArgonHalf< ScalarType > idx)
Multi-register table extension lookup (2–4 registers) from a raw pointer.
Definition argon_half.hpp:71
ace ArgonHalf< NewScalarType > As() const
reinterpret an ArgonHalf to a different type
Definition argon_half.hpp:46
ace argon_next_larger MultiplyDoubleSaturateLong(ScalarType b) const
Multiply, double, and saturate long (vector × scalar).
Definition argon_half.hpp:185
ace argon_next_larger PairwiseAddLong() const
Pairwise add adjacent lanes and widen: produces lanes/2 wider-type results.
Definition argon_half.hpp:228
ace ArgonHalf< U > ConvertTo()
Convert each lane to a different element type.
Definition argon_half.hpp:90
ace Argon< ScalarType > CombineWith(ArgonHalf< ScalarType > high) const
Combine this (low) half with high to form a 128-bit Argon<ScalarType> vector.
Definition argon_half.hpp:110
ace argon_next_larger ShiftLeftLong()
Shift each lane left by n bits, widening the result to the next-larger element type.
Definition argon_half.hpp:241
ace argon_next_larger MultiplyLong(lane_type b) const
Multiply this by a lane value, widening each product (vector × lane).
Definition argon_half.hpp:160
ace ArgonHalf< U > ConvertTo()
Convert each lane to a different type using a fixed-point fractional bit count.
Definition argon_half.hpp:99
ace argon_next_larger MultiplyDoubleAddSaturateLong(ArgonHalf< ScalarType > b) const
Multiply, double, add, and saturate long: saturate(acc + 2 * this * b) widened (vector × vector).
Definition argon_half.hpp:199
ace ArgonHalf< ScalarType > Reverse() const
Reverse the order of all elements within this 64-bit vector.
Definition argon_half.hpp:113
ace argon_next_larger MultiplyAddLong(lane_type b) const
Multiply-accumulate long (vector × lane).
Definition argon_half.hpp:173
Definition argon_half.hpp:11
A 128-bit SIMD vector wrapping a scalar type, providing arithmetic, logical, and data-movement operat...
Definition argon_full.hpp:29
Represents a single lane of a SIMD vector with a runtime-determined index.
Definition lane.hpp:116
ace int lane()
On ARM32, return the local lane index within the 64-bit half-register returned by vec().
Definition lane.hpp:160
ace neon::Vec64_t< scalar_type > vec()
On ARM32, return the 64-bit half-register that contains this lane.
Definition lane.hpp:148
Represents a SIMD vector with various operations.
Definition vector.hpp:50
constexpr neon::Vec64_t< ScalarType > vec() const
Definition vector.hpp:275
NextLarger< T >::type NextLarger_t
Helper alias to get the next larger type for a given type.
Definition helpers.hpp:75
Lane deconstruction feature.
Definition argon_full.hpp:399
Helper template to determine the next smaller type for a given type.
Definition helpers.hpp:79