4#ifndef ARGON_PLATFORM_MVE
6#define ace [[gnu::always_inline]] constexpr
8#define ace [[gnu::always_inline]] inline
10template <
typename ScalarType>
18template <
typename ScalarType>
19 requires std::same_as<ScalarType, double> || std::same_as<ScalarType, uint64_t> || std::same_as<ScalarType, int64_t>
20class ArgonHalf<ScalarType> :
public argon::Vector<neon::Vec64_t<ScalarType>> {
24 using vector_type = neon::Vec64_t<ScalarType>;
27 static_assert(neon::is_doubleword_v<vector_type>);
29 static constexpr size_t bytes = 8;
30 static constexpr size_t lanes = bytes /
sizeof(ScalarType);
34 ace ArgonHalf(
const ArgonHalf&) =
default;
35 ace ArgonHalf(ArgonHalf&&) =
default;
36 ace ArgonHalf& operator=(
const ArgonHalf&) =
default;
37 ace ArgonHalf& operator=(ArgonHalf&&) =
default;
41 ace
static ArgonHalf<ScalarType>
Create(uint64_t a) {
return neon::create<vector_type>(a); }
45 template <
typename NewScalarType>
46 ace ArgonHalf<NewScalarType>
As()
const {
47 return neon::reinterpret<neon::Vec64_t<NewScalarType>>(this->vec_);
52 ace ArgonHalf<ScalarType>
TableLookup(ArgonHalf<ScalarType> idx) {
return neon::table_lookup1(this->vec_, idx); }
57 ace ArgonHalf<ScalarType>
TableExtension(ArgonHalf<ScalarType> b, ArgonHalf<ScalarType> idx) {
58 return neon::table_extension1(this->vec_, b, idx);
63 template <
size_t NumTables>
64 ace ArgonHalf<ScalarType>
TableExtension(std::array<ArgonHalf<ScalarType>, NumTables> b, ArgonHalf<ScalarType> idx) {
70 template <
size_t NumTables>
71 ace ArgonHalf<ScalarType>
TableExtension(vector_type* b, ArgonHalf<ScalarType> idx) {
72 static_assert(NumTables > 1 && NumTables < 5,
"Table Extension can only be performed with 1, 2, 3, or 4 tables");
74 using multivec_type = neon::MultiVector_t<vector_type, NumTables>;
76 multivec_type multivector = *(multivec_type*)b;
78 if constexpr (NumTables == 2) {
79 return neon::table_extension2(this->vec_, multivector, idx);
80 }
else if constexpr (NumTables == 3) {
81 return neon::table_extension3(this->vec_, multivector, idx);
82 }
else if constexpr (NumTables == 4) {
83 return neon::table_extension4(this->vec_, multivector, idx);
91 return neon::convert<typename neon::Vec64<U>::type>(this->vec_);
97 template <
typename U,
int fracbits>
98 requires(std::is_same_v<U, uint32_t> || std::is_same_v<U, int32_t> || std::is_same_v<U, float>)
100 if constexpr (std::is_same_v<U, float>) {
101 return neon::convert_n<fracbits>(this->vec_);
102 }
else if constexpr (std::is_unsigned_v<U>) {
103 return neon::convert_n_unsigned<fracbits>(this->vec_);
104 }
else if constexpr (std::is_signed_v<U>) {
105 return neon::convert_n_signed<fracbits>(this->vec_);
113 ace ArgonHalf<ScalarType>
Reverse()
const {
return this->Reverse64bit(); }
122template <argon::helpers::has_larger ScalarType>
124 using T = argon::Vector<neon::Vec64_t<ScalarType>>;
126 using argon_next_larger = Argon<next_larger>;
129 using vector_type = neon::Vec64_t<ScalarType>;
130 using lane_type =
const argon::Lane<vector_type>;
132 static_assert(neon::is_doubleword_v<vector_type>);
134 static constexpr size_t bytes = 8;
135 static constexpr size_t lanes = bytes /
sizeof(ScalarType);
141 ace
static ArgonHalf<ScalarType>
Create(uint64_t a) {
return neon::create<vector_type>(a); }
145 template <
typename NewScalarType>
146 ace ArgonHalf<NewScalarType>
As()
const {
147 return neon::reinterpret<neon::Vec64_t<NewScalarType>>(this->vec_);
151 ace argon_next_larger
AddLong(ArgonHalf<ScalarType> b)
const {
return neon::add_long(this->vec_, b); }
154 ace argon_next_larger
MultiplyLong(ArgonHalf<ScalarType> b)
const {
return neon::multiply_long(this->vec_, b); }
157 ace argon_next_larger
MultiplyLong(ScalarType b)
const {
return neon::multiply_long(this->vec_, b); }
161 return neon::multiply_long_lane(this->vec_, b.
vec(), b.
lane());
166 return neon::multiply_add_long(this->vec_, b);
170 ace argon_next_larger
MultiplyAddLong(ScalarType b)
const {
return neon::multiply_add_long(this->vec_, b); }
174 return neon::multiply_add_long_lane(this->vec_, b.
vec(), b.
lane());
179 requires(std::is_same_v<vector_type, int16x4_t> || std::is_same_v<vector_type, int32x2_t>)
181 return neon::multiply_double_saturate_long(this->vec_, b);
186 requires(std::is_same_v<vector_type, int16x4_t> || std::is_same_v<vector_type, int32x2_t>)
188 return neon::multiply_double_saturate_long(this->vec_, b);
193 requires(std::is_same_v<vector_type, int16x4_t> || std::is_same_v<vector_type, int32x2_t>)
195 return neon::multiply_double_saturate_long_lane(this->vec_, b.vec(), b.lane());
200 requires(std::is_same_v<vector_type, int16x4_t> || std::is_same_v<vector_type, int32x2_t>)
202 return neon::multiply_double_add_saturate_long(this->vec_, b);
207 requires(std::is_same_v<vector_type, int16x4_t> || std::is_same_v<vector_type, int32x2_t>)
209 return neon::multiply_double_add_saturate_long(this->vec_, b);
214 requires(std::is_same_v<vector_type, int16x4_t> || std::is_same_v<vector_type, int32x2_t>)
216 return neon::multiply_double_add_saturate_long_lane(this->vec_, b.vec(), b.lane());
220 ace argon_next_larger
SubtractLong(ArgonHalf<ScalarType> b)
const {
return neon::subtract_long(this->vec_, b); }
224 return neon::subtract_absolute_long(this->vec_, b);
228 ace argon_next_larger
PairwiseAddLong()
const {
return neon::pairwise_add_long(this->vec_); }
232 return neon::pairwise_add_long(this->vec_, b);
236 ace argon_next_larger
Widen()
const {
return neon::move_long(this->vec_); }
242 return neon::shift_left_long<n>(this->vec_);
247 ace ArgonHalf<ScalarType>
TableLookup(ArgonHalf<ScalarType> idx) {
return neon::table_lookup1(this->vec_, idx); }
252 ace ArgonHalf<ScalarType>
TableExtension(ArgonHalf<ScalarType> b, ArgonHalf<ScalarType> idx) {
253 return neon::table_extension1(this->vec_, b, idx);
258 template <
size_t NumTables>
259 ace ArgonHalf<ScalarType>
TableExtension(std::array<ArgonHalf<ScalarType>, NumTables> b, ArgonHalf<ScalarType> idx) {
265 template <
size_t NumTables>
266 ace ArgonHalf<ScalarType>
TableExtension(vector_type* b, ArgonHalf<ScalarType> idx) {
267 static_assert(NumTables > 1 && NumTables < 5,
"Table Extension can only be performed with 1, 2, 3, or 4 tables");
269 using multivec_type = neon::MultiVector_t<vector_type, NumTables>;
271 multivec_type multivector = *(multivec_type*)b;
273 if constexpr (NumTables == 2) {
274 return neon::table_extension2(this->vec_, multivector, idx);
275 }
else if constexpr (NumTables == 3) {
276 return neon::table_extension3(this->vec_, multivector, idx);
277 }
else if constexpr (NumTables == 4) {
278 return neon::table_extension4(this->vec_, multivector, idx);
284 template <
typename U>
286 return neon::convert<typename neon::Vec64<U>::type>(this->vec_);
292 template <
typename U,
int fracbits>
293 requires(std::is_same_v<U, uint32_t> || std::is_same_v<U, int32_t> || std::is_same_v<U, float>)
295 if constexpr (std::is_same_v<U, float>) {
296 return neon::convert_n<fracbits>(this->vec_);
297 }
else if constexpr (std::is_unsigned_v<U>) {
298 return neon::convert_n_unsigned<fracbits>(this->vec_);
299 }
else if constexpr (std::is_signed_v<U>) {
300 return neon::convert_n_signed<fracbits>(this->vec_);
308 ace ArgonHalf<ScalarType>
Reverse()
const {
return this->Reverse64bit(); }
311template <
class... ArgTypes>
312 requires(
sizeof...(ArgTypes) > 1)
316 requires std::is_scalar_v<V>
322 requires std::is_scalar_v<V>
328 requires std::is_scalar_v<V>
330 return b.Multiply(a);
334 requires std::is_scalar_v<V>
344template <
size_t Index,
typename T>
346 static_assert(Index < ArgonHalf<T>::lanes);
ace ArgonHalf< ScalarType > TableLookup(ArgonHalf< ScalarType > idx)
Look up each index lane in idx from this vector acting as a one-register table.
Definition argon_half.hpp:52
ace argon_next_larger MultiplyDoubleSaturateLong(ArgonHalf< ScalarType > b) const
Multiply, double, and saturate long: saturate(2 * this * b) widened (vector × vector).
Definition argon_half.hpp:178
ace argon_next_larger MultiplyLong(ScalarType b) const
Multiply this by a scalar, widening each product (vector × scalar).
Definition argon_half.hpp:157
ace argon_next_larger MultiplyAddLong(ScalarType b) const
Multiply-accumulate long (vector × scalar).
Definition argon_half.hpp:170
ace argon_next_larger Widen() const
Zero-extend (widen) each lane to the next-larger element type.
Definition argon_half.hpp:236
ace argon_next_larger MultiplyLong(ArgonHalf< ScalarType > b) const
Multiply this and b, widening each product to the next-larger element type (vector × vector).
Definition argon_half.hpp:154
static ace ArgonHalf< ScalarType > Create(uint64_t a)
Create a new ArgonHalf from a raw 64-bit value.
Definition argon_half.hpp:41
ace argon_next_larger MultiplyDoubleSaturateLong(lane_type b) const
Multiply, double, and saturate long (vector × lane).
Definition argon_half.hpp:192
ace argon_next_larger MultiplyDoubleAddSaturateLong(lane_type b) const
Multiply, double, add, and saturate long (vector × lane).
Definition argon_half.hpp:213
ace ArgonHalf< ScalarType > TableExtension(ArgonHalf< ScalarType > b, ArgonHalf< ScalarType > idx)
Extend a previous table-lookup result using this vector as an additional table (1 extension register)...
Definition argon_half.hpp:57
ace argon_next_larger SubtractAbsoluteLong(ArgonHalf< ScalarType > b) const
Absolute difference and widen: |this - b| widened to the next-larger element type.
Definition argon_half.hpp:223
ace argon_next_larger SubtractLong(ArgonHalf< ScalarType > b) const
Subtract b from this, widening each result to the next-larger element type.
Definition argon_half.hpp:220
ace argon_next_larger PairwiseAddLong(ArgonHalf< typename argon::helpers::NextSmaller< ScalarType > > b) const
Pairwise add this and the next-smaller-type vector b, widening into argon_next_larger.
Definition argon_half.hpp:231
ace argon_next_larger AddLong(ArgonHalf< ScalarType > b) const
Add this and b, widening each lane to the next-larger element type.
Definition argon_half.hpp:151
ace ArgonHalf< ScalarType > TableExtension(std::array< ArgonHalf< ScalarType >, NumTables > b, ArgonHalf< ScalarType > idx)
Multi-register table extension lookup (2–4 registers) supplied as std::array.
Definition argon_half.hpp:64
ace argon_next_larger MultiplyAddLong(ArgonHalf< ScalarType > b) const
Multiply this and b, widen each product, and prepare for long accumulation (vector × vector).
Definition argon_half.hpp:165
ace argon_next_larger MultiplyDoubleAddSaturateLong(ScalarType b) const
Multiply, double, add, and saturate long (vector × scalar).
Definition argon_half.hpp:206
ace ArgonHalf< ScalarType > TableExtension(vector_type *b, ArgonHalf< ScalarType > idx)
Multi-register table extension lookup (2–4 registers) from a raw pointer.
Definition argon_half.hpp:71
ace ArgonHalf< NewScalarType > As() const
reinterpret an ArgonHalf to a different type
Definition argon_half.hpp:46
ace argon_next_larger MultiplyDoubleSaturateLong(ScalarType b) const
Multiply, double, and saturate long (vector × scalar).
Definition argon_half.hpp:185
ace argon_next_larger PairwiseAddLong() const
Pairwise add adjacent lanes and widen: produces lanes/2 wider-type results.
Definition argon_half.hpp:228
ace ArgonHalf< U > ConvertTo()
Convert each lane to a different element type.
Definition argon_half.hpp:90
ace Argon< ScalarType > CombineWith(ArgonHalf< ScalarType > high) const
Combine this (low) half with high to form a 128-bit Argon<ScalarType> vector.
Definition argon_half.hpp:110
ace argon_next_larger ShiftLeftLong()
Shift each lane left by n bits, widening the result to the next-larger element type.
Definition argon_half.hpp:241
ace argon_next_larger MultiplyLong(lane_type b) const
Multiply this by a lane value, widening each product (vector × lane).
Definition argon_half.hpp:160
ace ArgonHalf< U > ConvertTo()
Convert each lane to a different type using a fixed-point fractional bit count.
Definition argon_half.hpp:99
ace argon_next_larger MultiplyDoubleAddSaturateLong(ArgonHalf< ScalarType > b) const
Multiply, double, add, and saturate long: saturate(acc + 2 * this * b) widened (vector × vector).
Definition argon_half.hpp:199
ace ArgonHalf< ScalarType > Reverse() const
Reverse the order of all elements within this 64-bit vector.
Definition argon_half.hpp:113
ace argon_next_larger MultiplyAddLong(lane_type b) const
Multiply-accumulate long (vector × lane).
Definition argon_half.hpp:173
Definition argon_half.hpp:11
A 128-bit SIMD vector wrapping a scalar type, providing arithmetic, logical, and data-movement operat...
Definition argon_full.hpp:29
Represents a single lane of a SIMD vector with a runtime-determined index.
Definition lane.hpp:116
ace int lane()
On ARM32, return the local lane index within the 64-bit half-register returned by vec().
Definition lane.hpp:160
ace neon::Vec64_t< scalar_type > vec()
On ARM32, return the 64-bit half-register that contains this lane.
Definition lane.hpp:148
Represents a SIMD vector with various operations.
Definition vector.hpp:50
constexpr neon::Vec64_t< ScalarType > vec() const
Definition vector.hpp:275
NextLarger< T >::type NextLarger_t
Helper alias to get the next larger type for a given type.
Definition helpers.hpp:75
Lane deconstruction feature.
Definition argon_full.hpp:399
Helper template to determine the next smaller type for a given type.
Definition helpers.hpp:79