10#include "arm_simd/helpers.hpp"
11#include "arm_simd/helpers/multivector.hpp"
12#include "arm_simd/helpers/scalar.hpp"
13#include "arm_simd/helpers/vec64.hpp"
16#include "helpers/bool.hpp"
20#ifdef __ARM_FEATURE_MVE
26#ifdef ARGON_PLATFORM_SIMDE
29#define ace [[gnu::always_inline]] constexpr
31#define ace [[gnu::always_inline]] inline
42template <
typename T,
typename... Ts>
43inline constexpr bool is_one_of = std::disjunction_v<std::is_same<T, Ts>...>;
49template <
typename VectorType>
52 template <
size_t LaneIndex>
62 static constexpr size_t lanes = (simd::is_quadword_v<VectorType> ? 16 : 8) /
sizeof(
scalar_type);
74 ace
Vector(VectorType vector) : vec_{
std::move(vector)} {};
81#ifndef ARGON_PLATFORM_MVE
91 template <
size_t LaneIndex>
95 template <
typename... ArgTypes>
96 requires(
sizeof...(ArgTypes) > 1)
97 ace
Vector(ArgTypes... args) : vec_{
std::forward<ArgTypes>(args)...} {}
111#ifdef ARGON_PLATFORM_MVE
112 return simd::duplicate(scalar);
114 return simd::duplicate<VectorType>(scalar);
122 template <simd::is_vector_type IntrinsicType>
124#ifdef ARGON_PLATFORM_MVE
125 return simd::duplicate(lane.
Get());
127 return simd::duplicate_lane<vector_type>(lane.
vec(), lane.
lane());
135 template <
size_t LaneIndex>
137#ifdef ARGON_PLATFORM_MVE
138 return simd::duplicate(lane.
Get());
140 if constexpr (simd::is_quadword_v<VectorType>) {
142 return simd::duplicate_lane_quad<LaneIndex>(lane.
vec());
146 constexpr size_t local_lane = LaneIndex >= (
lanes / 2) ? LaneIndex - (
lanes / 2) : LaneIndex;
147 return simd::duplicate_lane_quad<local_lane>(lane.
vec());
150 return simd::duplicate_lane<LaneIndex>(lane.
vec());
163#if __cpp_if_consteval >= 202106L
164 return IotaHelper(start, std::make_index_sequence<lanes>{});
166 return Argon{start}.
Add(VectorType{0, 1, 2, 3});
174 template <
typename FuncType>
175 requires std::convertible_to<FuncType, std::function<
scalar_type()>>
178 utility::constexpr_for<0, lanes, 1>([&](
size_t i) {
189 template <
typename FuncType>
193 utility::constexpr_for<0, lanes, 1>([&]<
size_t i>() {
258#if ARGON_USE_COMPILER_EXTENSIONS
267#if ARGON_USE_COMPILER_EXTENSIONS
275 [[gnu::always_inline]]
constexpr VectorType
vec()
const {
return vec_; }
278 [[gnu::always_inline]]
constexpr operator VectorType()
const {
return vec_; }
283 std::array<scalar_type, lanes> out;
284 simd::store1(out.data(), vec_);
293#ifdef ARGON_PLATFORM_MVE
296 return {vec_,
static_cast<int>(i)};
300#ifdef ARGON_PLATFORM_MVE
303 return {vec_,
static_cast<int>(i)};
311#ifdef ARGON_PLATFORM_MVE
319#ifdef ARGON_PLATFORM_MVE
329 template <
size_t LaneIndex>
331#ifdef ARGON_PLATFORM_MVE
332 return vec_[LaneIndex];
338 template <
size_t LaneIndex>
340#ifdef ARGON_PLATFORM_MVE
341 return vec_[LaneIndex];
358 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
361 return simd::negate(vec_);
367 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
368 return vec_ + b.vec_;
370 return simd::add(vec_, b);
389 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
390 return vec_ - b.vec_;
392 return simd::subtract(vec_, b);
410#ifdef ARGON_PLATFORM_MVE
411 return mve::add(vec_, mve::subtract_absolute(b, c));
413 return neon::subtract_absolute_add(vec_, b, c);
419 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
420 return vec_ * b.vec_;
422 return simd::multiply(vec_, b);
428 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
431 return simd::multiply(vec_, b);
435#ifndef ARGON_PLATFORM_MVE
440 template <
size_t LaneIndex>
442 return neon::multiply_lane(vec_, b.
vec(), b.
lane());
449 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
450 return vec_ + b.vec_ * c.vec_;
452 return simd::multiply_add(vec_, b, c);
459 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
460 return vec_ + b.vec_ * c;
462 return simd::multiply_add(vec_, b, c);
470#ifndef ARGON_PLATFORM_MVE
474 return simd::multiply_add_lane(vec_, b.vec(), c.
vec(), c.
lane());
483 template <
size_t LaneIndex>
485 return simd::multiply_add_lane(vec_, b.vec(), c.
vec(), c.
lane());
489 template <
size_t LaneIndex>
498#if ARGON_USE_COMPILER_EXTENSIONS
499 return vec_ - b.vec_ * c.vec_;
501 return simd::multiply_subtract(vec_, b, c);
508#if ARGON_USE_COMPILER_EXTENSIONS
509 return vec_ - b.vec_ * c;
511 return simd::multiply_subtract(vec_, b, c);
519#ifndef ARGON_PLATFORM_MVE
523 return simd::multiply_subtract_lane(vec_, b.vec(), c.
vec(), c.
lane());
535#ifndef ARGON_PLATFORM_MVE
539 return simd::multiply_double_saturate_high_lane(vec_, l.
vec(), l.
lane());
546 return simd::multiply_double_round_saturate_high(vec_, v);
552 return simd::multiply_double_round_saturate_high(vec_, s);
555#ifndef ARGON_PLATFORM_MVE
559 return simd::multiply_double_round_saturate_high_lane(vec_, l.
vec(), l.
lane());
571#ifdef ARGON_PLATFORM_MVE
572 if constexpr (std::is_same_v<scalar_type, uint32_t>) {
573 std::numeric_limits<uint32_t>::max() / vec_;
578 return simd::reciprocal_estimate(vec_);
587#ifdef ARGON_PLATFORM_MVE
588 if constexpr (std::is_same_v<scalar_type, uint32_t>) {
589 return std::numeric_limits<uint32_t>::max() / (vec_ * vec_);
591 return 1.f / (vec_ * vec_);
594 return simd::reciprocal_sqrt_estimate(vec_);
602 requires std::floating_point<scalar_type>
604#ifdef ARGON_PLATFORM_MVE
605 return 2.f - vec_ * b.vec_;
607 return simd::reciprocal_step(vec_, b.vec_);
615 requires std::floating_point<scalar_type>
617#ifdef ARGON_PLATFORM_MVE
618 return (3.f - vec_ * b.vec_) * 0.5f;
620 return simd::reciprocal_sqrt_step(vec_, b.vec_);
631 for (
int i = 0; i < n_iters; ++i) {
644 for (
int i = 0; i < n_iters; ++i) {
652 template <
typename arg_type>
653 requires(is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> ||
654 std::is_convertible_v<arg_type, scalar_type>)
656 return Add(b.MultiplyFixedQMax(c));
661 template <
typename arg_type>
662 requires(is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> ||
663 std::is_convertible_v<arg_type, scalar_type>)
665 return Add(b.MultiplyRoundFixedQMax(c));
671 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
672 return vec_ / b.vec_;
674 return simd::divide(vec_, b);
680 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
681 return vec_ / b.vec_;
691 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
692 return vec_ % b.vec_;
693 }
else if constexpr (std::floating_point<scalar_type>) {
703 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
706 return this->map([b](
scalar_type lane1) {
return std::fmod(lane1, b); });
713 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
714 return vec_ > b.vec_ ? vec_ : b.vec_;
716 return simd::max(vec_, b);
722 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
723 return vec_ < b.vec_ ? vec_ : b.vec_;
725 return simd::min(vec_, b);
752 requires std::is_integral_v<scalar_type>
754 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
755 return vec_ << b.vec_;
757 return simd::shift_left(vec_, b.vec_);
764 requires std::is_integral_v<scalar_type>
766 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
770 return simd::shift_left(vec_, b.vec_);
778 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
781 return simd::shift_left<n>(vec_);
787 requires(std::is_integral_v<scalar_type>)
789 return simd::shift_left_saturate(vec_, b);
801 return simd::shift_left_saturate<n>(vec_);
811 return simd::shift_left_insert<n>(vec_, b);
817 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
820 return simd::shift_right<n>(vec_);
827 return simd::shift_right_round<n>(vec_);
835#ifdef ARGON_PLATFORM_MVE
836 return vec_ + (b >> n);
838 return simd::shift_right_accumulate<n>(vec_, b);
847#ifdef ARGON_PLATFORM_MVE
848 return vec_ + mve::shift_right_round<n>(b);
850 return simd::shift_right_accumulate_round<n>(vec_, b);
859 return simd::shift_right_insert<n>(vec_, b);
864#ifdef ARGON_PLATFORM_MVE
865 return mve::load1(ptr);
867 return neon::load1<VectorType>(ptr);
873#ifdef ARGON_PLATFORM_MVE
876 utility::constexpr_for<0, lanes, 1>([val, &
vec]<
int i>() {
vec[i] = val; });
878 return simd::load1_duplicate<VectorType>(ptr);
890#ifdef ARGON_PLATFORM_MVE
893 "Unsupported size for gather load");
896 return mve::load_byte_gather_offset(base, offset_vector);
898 return mve::load_halfword_gather_offset(base, offset_vector);
900 return mve::load_word_gather_offset(base, offset_vector);
902 return mve::load_doubleword_gather_offset(base, offset_vector);
906 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
907 auto lane_val = neon::get_lane<i>(offset_vector);
922#ifdef ARGON_PLATFORM_MVE
925 "Unsupported size for gather load");
928 return mve::load_byte_gather_offset(base, offset_vector);
930 return mve::load_halfword_gather_offset(base, offset_vector *
sizeof(
scalar_type));
932 return mve::load_word_gather_offset(base, offset_vector *
sizeof(
scalar_type));
934 return mve::load_doubleword_gather_offset(base, offset_vector *
sizeof(
scalar_type));
938 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
939 auto lane_val = neon::get_lane<i>(offset_vector);
940 destination = destination.template
LoadToLane<i>(base + lane_val);
949 template <
size_t lane>
961 template <
size_t str
ide>
963#ifdef ARGON_PLATFORM_MVE
964 static_assert(stride == 2 || stride == 4,
965 "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
966 if constexpr (stride == 2) {
968 }
else if constexpr (stride == 4) {
972 static_assert(stride > 1 && stride < 5,
"De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
973 using multivec_type = simd::MultiVector_t<VectorType, stride>;
974 if constexpr (stride == 2) {
976 }
else if constexpr (stride == 3) {
978 }
else if constexpr (stride == 4) {
989 template <
size_t str
ide>
991#ifdef ARGON_PLATFORM_MVE
992 static_assert(stride == 2 || stride == 4,
993 "De-interleaving LoadCopy can only be performed with a stride of 2, 3, or 4");
994 if constexpr (stride == 2) {
995 return {mve::duplicate(*ptr++), mve::duplicate(*ptr++)};
996 }
else if constexpr (stride == 4) {
997 return {mve::duplicate(*ptr++), mve::duplicate(*ptr++), mve::duplicate(*ptr++), mve::duplicate(*ptr)};
1000 static_assert(stride > 1 && stride < 5,
1001 "De-interleaving LoadCopy can only be performed with a stride of 2, 3, or 4");
1002 using multivec_type = simd::MultiVector<VectorType, stride>::type;
1003 if constexpr (stride == 2) {
1004 return argon::to_array(simd::load2_duplicate<multivec_type>(ptr).val);
1005 }
else if constexpr (stride == 3) {
1006 return argon::to_array(simd::load3_duplicate<multivec_type>(ptr).val);
1007 }
else if constexpr (stride == 4) {
1008 return argon::to_array(simd::load4_duplicate<multivec_type>(ptr).val);
1019 template <
size_t LaneIndex,
size_t Str
ide>
1022 static_assert(Stride > 1 && Stride < 5,
"De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
1023#ifdef ARGON_PLATFORM_MVE
1025 utility::constexpr_for<0, Stride, 1>([&]<
int i>() {
1026 out.val[i][LaneIndex] = ptr[i];
1030 if constexpr (Stride == 2) {
1031 if constexpr (simd::is_quadword_v<VectorType>) {
1032 return argon::to_array(simd::load2_lane_quad<LaneIndex>(ptr, multi).val);
1036 }
else if constexpr (Stride == 3) {
1037 if constexpr (simd::is_quadword_v<VectorType>) {
1038 return argon::to_array(simd::load3_lane_quad<LaneIndex>(ptr, multi).val);
1042 }
else if constexpr (Stride == 4) {
1043 if constexpr (simd::is_quadword_v<VectorType>) {
1044 return argon::to_array(simd::load4_lane_quad<LaneIndex>(ptr, multi).val);
1053 template <
size_t lane,
size_t str
ide>
1056 using multivec_type = simd::MultiVector_t<VectorType, stride>;
1070 template <
size_t str
ide>
1074 static_assert(stride > 1 && stride < 5,
"De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
1075 std::array<argon_type, stride> multi{};
1076 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1077 auto lane_val = simd::get_lane<i>(offset_vector);
1092 static_assert(n > 1 && n < 5,
"LoadMulti can only be performed with a size of 2, 3, or 4");
1093#ifdef ARGON_PLATFORM_MVE
1094 std::array<argon_type, n> multi{};
1095 utility::constexpr_for<0, n, 1>([&]<
int i>() {
1101#if defined(__clang__) || (__GNUC__ > 13)
1102 using multi_type = simd::MultiVector_t<VectorType, n>;
1103 if constexpr (n == 2) {
1105 }
else if constexpr (n == 3) {
1107 }
else if constexpr (n == 4) {
1111 if constexpr (n == 2) {
1112 auto a = simd::load1(ptr);
1113 auto b = simd::load1(ptr +
lanes);
1115 }
else if constexpr (n == 3) {
1116 auto a = simd::load1(ptr);
1117 auto b = simd::load1(ptr +
lanes);
1118 auto c = simd::load1(ptr + 2 *
lanes);
1120 }
else if constexpr (n == 4) {
1121 auto a = simd::load1(ptr);
1122 auto b = simd::load1(ptr +
lanes);
1123 auto c = simd::load1(ptr + 2 *
lanes);
1124 auto d = simd::load1(ptr + 3 *
lanes);
1125 return {a, b, c, d};
1138 template <
int LaneIndex>
1140#ifdef ARGON_PLATFORM_MVE
1141 *ptr = vec_[LaneIndex];
1143 simd::store1_lane<LaneIndex>(ptr, vec_);
1147#ifndef ARGON_PLATFORM_MVE
1170 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1173 return simd::bitwise_not(vec_);
1179 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1180 return vec_ & b.vec_;
1182 return simd::bitwise_and(vec_, b);
1188 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1189 return vec_ | b.vec_;
1191 return simd::bitwise_or(vec_, b);
1197 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1198 return vec_ ^ b.vec_;
1200 return simd::bitwise_xor(vec_, b);
1207 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1208 return vec_ | ~b.vec_;
1210 return simd::bitwise_or_not(vec_, b);
1217 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1218 return vec_ & ~b.vec_;
1220 return simd::bitwise_clear(vec_, b);
1227#ifndef ARGON_PLATFORM_MVE
1230 template <
typename ArgType>
1231 requires std::is_unsigned_v<scalar_type>
1233 return simd::bitwise_select(vec_, true_value, false_value);
1237 template <
typename ArgType>
1238 requires std::is_unsigned_v<scalar_type>
1240 return simd::bitwise_select(true_value, false_value);
1256 return simd::count_leading_sign_bits(vec_);
1266#ifdef ARGON_PLATFORM_MVE
1267 auto new_vec = vec_;
1268 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1269 new_vec[i] = std::popcount(vec_[i]);
1273 return neon::count_active_bits(vec_);
1285#ifdef ARGON_PLATFORM_MVE
1286 auto new_vec = vec_;
1287 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1289 new_vec[i] = b.vec_[i];
1294 return simd::extract<n>(vec_, b);
1298 ace
argon_type Reverse64bit()
const {
return simd::reverse_64bit(vec_); }
1299 ace
argon_type Reverse32bit()
const {
return simd::reverse_32bit(vec_); }
1300 ace
argon_type Reverse16bit()
const {
return simd::reverse_16bit(vec_); }
1306#ifdef ARGON_PLATFORM_MVE
1307 std::array<argon_type, 2> new_vec;
1308 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1310 new_vec[0][i] = vec_[i / 2];
1311 new_vec[1][i] = vec_[(i +
lanes) / 2];
1313 new_vec[0][i] = b.vec_[i / 2];
1314 new_vec[1][i] = b.vec_[(i +
lanes) / 2];
1327#ifdef ARGON_PLATFORM_MVE
1328 std::array<argon_type, 2> new_vec;
1329 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1330 if ((i * 2) <
lanes) {
1331 new_vec[0][i] = vec_[i * 2];
1332 new_vec[1][i] = vec_[i * 2 + 1];
1334 new_vec[0][i] = b.vec_[i * 2];
1335 new_vec[1][i] = b.vec_[i * 2 + 1];
1350#ifdef ARGON_PLATFORM_MVE
1351 std::array<argon_type, 2> new_vec;
1352 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1354 new_vec[0][i] = vec_[i];
1355 new_vec[1][i] = vec_[i + 1];
1357 new_vec[0][i] = b.vec_[i + 1];
1358 new_vec[1][i] = b.vec_[i];
1370 template <
typename FuncType>
1374 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1375 out[i] = body(vec_[i]);
1380 template <
typename FuncType>
1382 ace
argon_type map_with_index(FuncType body)
const {
1384 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1385 out[i] = body(vec_[i], i);
1390 template <
typename FuncType>
1394 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1395 out[i] = body(vec_[i], other.vec_[i]);
1400 template <
typename FuncType>
1401 requires std::convertible_to<FuncType, std::function<void(
scalar_type&)>>
1403 VectorType out = vec_;
1404 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1410 template <
typename FuncType>
1411 requires std::convertible_to<FuncType, std::function<void(
scalar_type&,
int)>>
1412 ace
argon_type each_lane_with_index(FuncType body) {
1413 VectorType out = vec_;
1414 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1420 template <
typename FuncType>
1421 requires std::convertible_to<FuncType, std::function<void()>>
1422 ace
void if_lane(FuncType true_branch) {
1423 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1430 template <
typename FuncType>
1431 requires std::convertible_to<FuncType, std::function<void()>>
1432 ace
void if_else_lane(FuncType true_branch, FuncType false_branch) {
1433 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1442 template <
typename FuncType>
1443 requires std::convertible_to<FuncType, std::function<void(
int)>>
1444 ace
void if_lane_with_index(FuncType true_branch) {
1445 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1452 template <
typename FuncType1,
typename FuncType2>
1453 requires std::convertible_to<FuncType1, std::function<void(
int)>> &&
1454 std::convertible_to<FuncType2, std::function<void(
int)>>
1455 ace
void if_else_lane_with_index(FuncType1 true_branch, FuncType2 false_branch) {
1456 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1466 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1475#ifdef ARGON_PLATFORM_MVE
1476 return mve::max_reduce_max(vec_, vec_) != 0;
1479 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1480 if (nonzero[i] == 0) {
1488 template <std::
size_t Index>
1489 std::tuple_element_t<Index, argon_type> get() {
1490#ifdef ARGON_PLATFORM_MVE
1498 template <std::size_t... Ints>
1500 return VectorType{
static_cast<scalar_type>(start + Ints)...};
1512template <
typename T>
1513struct tuple_size<argon::Vector<T>> {
1517template <
size_t Index,
typename T>
1518struct tuple_element<Index, argon::Vector<T>> {
1519 static_assert(Index < argon::Vector<T>::lanes);
Provides utility templates and concepts for type traits and compile-time iteration.
A 128-bit SIMD vector wrapping a scalar type, providing arithmetic, logical, and data-movement operat...
Definition argon_full.hpp:29
Represents a single lane of a SIMD vector with the lane index known at compile time.
Definition lane.hpp:46
ace int lane()
On ARM32, return the local lane index within the 64-bit half-register returned by vec().
Definition lane.hpp:94
ace neon::Vec64_t< scalar_type > vec()
On ARM32, return the 64-bit half-register that contains this lane.
Definition lane.hpp:82
ace scalar_type Get() const
Get the scalar value of this lane.
Definition lane.hpp:73
Represents a single lane of a SIMD vector with a runtime-determined index.
Definition lane.hpp:116
ace int lane()
On ARM32, return the local lane index within the 64-bit half-register returned by vec().
Definition lane.hpp:160
ace neon::Vec64_t< scalar_type > vec()
On ARM32, return the 64-bit half-register that contains this lane.
Definition lane.hpp:148
ace scalar_type Get() const
Get the scalar value of this lane.
Definition lane.hpp:137
ace argon_type PairwiseMin(argon_type b) const
Select the maximum of each pair of lanes in the two vectors.
Definition vector.hpp:1163
Lane< simd::Vec128_t< ScalarType > > lane_type
Definition vector.hpp:54
ace argon_type MultiplySubtract(argon_type b, lane_type c) const
Multiply a vector by a lane value and subtract from a third vector.
Definition vector.hpp:522
ace Vector(VectorType vector)
Constructs a Vector from a SIMD vector type.
Definition vector.hpp:74
simd::Vec128_t< ScalarType > vector_type
Definition vector.hpp:56
ace argon_type Modulo(argon_type b) const
Get the modulo of two vectors.
Definition vector.hpp:690
ace argon_bool_type operator<(argon_type b) const
Compare two vectors, checking if this vector is less than the other.
Definition vector.hpp:221
ace argon_type ShiftLeftRound(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, rounding the result.
Definition vector.hpp:793
std::array< argon_type, 2 > TransposeWith(argon_type b) const
Perform a 2x2 matrix transpose on two vectors, returning two vectors of pairs.
Definition vector.hpp:1349
constexpr Vector(const Vector &other)=default
Copy constructor for the Vector class.
static ace std::array< argon_type, stride > LoadInterleaved(const scalar_type *ptr)
Load multiple vectors from a pointer, de-interleaving.
Definition vector.hpp:962
static ace argon_type Iota(scalar_type start)
Constructs a Vector from an incrementing sequence.
Definition vector.hpp:161
static ace argon_type FromScalar(scalar_type scalar)
Definition vector.hpp:110
ace argon_type Multiply(scalar_type b) const
Multiply a vector by a scalar value.
Definition vector.hpp:427
ace argon_type ReciprocalEstimateRefine(int n_iters=1) const
Compute a refined reciprocal estimate using Newton-Raphson iterations.
Definition vector.hpp:627
ace argon_type MultiplySubtract(argon_type b, argon_type c) const
Multiply two vectors and subtract from a third vector.
Definition vector.hpp:497
ace argon_type Extract(argon_type b) const
Extract n elements from the lower end of the operand, and the remaining elements from the top end of ...
Definition vector.hpp:1284
constexpr VectorType vec() const
Get the underlying SIMD vector.
Definition vector.hpp:275
ace argon_type ReciprocalEstimate() const
1 / value, using an estimate for speed
Definition vector.hpp:568
ace argon_type Multiply(argon_type b) const
Multiply two vectors.
Definition vector.hpp:418
ace argon_bool_type LessThanOrEqual(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than or equal to ...
Definition vector.hpp:739
ace argon_type operator--() const
Decrement the vector by 1 and return the result.
Definition vector.hpp:236
ace argon_type ShiftLeft(const int i) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:354
static ace argon_type Load(const scalar_type *ptr)
Load a vector from a pointer.
Definition vector.hpp:863
ace argon_type operator+(argon_type b) const
Add a vector and return the result.
Definition vector.hpp:203
ace argon_type ShiftLeft(helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const
Shift the elemnets of the vector to the left by a specified number of bits.
Definition vector.hpp:751
ace argon_type Multiply(const_lane_type< LaneIndex > b) const
Multiply a vector by a lane value.
Definition vector.hpp:441
ace argon_type PairwiseAdd(argon_type b) const
Pairwise ops.
Definition vector.hpp:1153
ace argon_type MultiplyAdd(scalar_type b, argon_type c) const
Multiply a vector by a scalar value and add a third vector.
Definition vector.hpp:468
ace argon_bool_type operator>=(argon_type b) const
Compare two vectors, checking if this vector is greater than or equal to the other.
Definition vector.hpp:230
ace argon_type MultiplyAdd(argon_type b, lane_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:473
ace argon_bool_type operator==(argon_type b) const
Compare two vectors for equality.
Definition vector.hpp:215
ace argon_type MultiplyAdd(argon_type b, argon_type c) const
Multiply two vectors and add a third vector.
Definition vector.hpp:448
ace argon_type BitwiseSelect(ArgType true_value, ArgType false_value) const
Bitwise select between two vectors, using the current vector as a mask.
Definition vector.hpp:1232
ace argon_type ShiftLeft(std::make_signed_t< simd::Scalar_t< Bool_t< VectorType > > > n) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:763
ace argon_type BitwiseOrNot(argon_type b) const
Bitwise OR of the vector with the NOT of another vector.
Definition vector.hpp:1206
ace helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > CountLeadingSignBits() const
Count the number of consecutive bits following the sign bit that are set to the same value as the sig...
Definition vector.hpp:1253
static ace argon_type FromLane(argon::ConstLane< LaneIndex, VectorType > lane)
Constructs a Vector from a ConstLane object.
Definition vector.hpp:136
ace argon_type AddSaturate(argon_type b) const
Adds two vectors, saturating the result.
Definition vector.hpp:385
std::array< argon_type, 2 > UnzipWith(argon_type b)
Unzip two vectors, returning two vectors of pairs.
Definition vector.hpp:1326
ace argon_type operator++() const
Increment the vector by 1 and return the result.
Definition vector.hpp:233
ace argon_type MultiplyAddFixedQMax(argon_type b, arg_type c) const
Multiply-add three fixed-point vectors, returning a fixed-point sum.
Definition vector.hpp:655
ace argon_type operator^(argon_type b) const
Bitwise XOR two vectors and return the result.
Definition vector.hpp:245
ace argon_type ReciprocalSqrtStep(argon_type b) const
Newton-Raphson step for reciprocal-sqrt refinement: (3 - a * b) / 2.
Definition vector.hpp:614
ace argon_bool_type Equal(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if are equal.
Definition vector.hpp:731
ace void StoreTo(scalar_type *ptr) const
Store the vector to a pointer.
Definition vector.hpp:1133
ConstLane< LaneIndex, simd::Vec128_t< ScalarType > > const_lane_type
Definition vector.hpp:53
ace argon_type MultiplyRoundFixedQMax(lane_type l) const
Multiply a fixed-point vector by a lane value, returning a fixed-point product.
Definition vector.hpp:558
ace argon_type CountActiveBits() const
Count the number of bits that are set to one in the vector.
Definition vector.hpp:1265
ace argon_type MultiplyFixedQMax(scalar_type s) const
Multiply a QMax fixed-point vector by a scalar value, returning a fixed-point product.
Definition vector.hpp:533
ace predicate_type CompareTestNonzero(argon_type b) const
Ands the current vector with the given vector, then checks if nonzero.
Definition vector.hpp:1245
static ace std::array< argon_type, stride > LoadToLaneInterleaved(std::array< argon_type, stride > multi, const scalar_type *ptr)
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
Definition vector.hpp:1054
ace argon_type PairwiseMax(argon_type b) const
Select the maximum of each pair of lanes in the two vectors.
Definition vector.hpp:1158
ace argon_type operator/(argon_type b) const
Divide a vector and return the result.
Definition vector.hpp:212
ace argon_type BitwiseNot() const
Bitwise ops.
Definition vector.hpp:1169
ace argon_type Max(argon_type b) const
Compare the lanes of two vectors, copying the larger of each lane to the result.
Definition vector.hpp:712
static ace argon_type FromLane(argon::Lane< IntrinsicType > lane)
Definition vector.hpp:123
constexpr Vector & operator=(Vector &&other)=default
Move assignment operator for the Vector class.
ace argon_type ShiftLeftInsert(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, and then OR the result wi...
Definition vector.hpp:810
ace const lane_type GetLane(const size_t i) const
Get a single lane of the vector by index.
Definition vector.hpp:292
ace argon_type BitwiseAnd(argon_type b) const
Bitwise AND of the vector with another vector.
Definition vector.hpp:1178
ace argon_type MultiplyAdd(argon_type b, const_lane_type< LaneIndex > c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:484
ace argon_type Modulo(scalar_type b) const
Get the modulo of a vector and a scalar value.
Definition vector.hpp:702
ace argon_type Popcount() const
Count the number of bits that are set to one in the vector.
Definition vector.hpp:1278
ace argon_bool_type LessThan(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than b.
Definition vector.hpp:747
ace argon_type ShiftRight(const int i) const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:351
ace argon_type ShiftLeftRoundSaturate(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, rounding and saturating t...
Definition vector.hpp:796
ace Vector(scalar_type scalar)
Constructs a Vector from a scalar value.
Definition vector.hpp:79
ace argon_type ShiftRight() const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:816
ace argon_type Negate() const
Bitwise negate the vector and return the result.
Definition vector.hpp:357
ace void StoreLaneTo(scalar_type *ptr)
Store a lane of the vector to a pointer.
Definition vector.hpp:1139
ace argon_type Multiply(lane_type b) const
Multiply a vector by a lane value.
Definition vector.hpp:437
ace argon_type MultiplySubtract(argon_type b, scalar_type c) const
Multiply a vector by a scalar value and subtract from a third vector.
Definition vector.hpp:507
ace argon_type MultiplyAdd(const_lane_type< LaneIndex > b, argon_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:490
ace argon_type MultiplyRoundFixedQMax(scalar_type s) const
Multiply a fixed-point vector by a scalar value, returning a fixed-point product.
Definition vector.hpp:551
ace argon_bool_type GreaterThanOrEqual(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than or equal ...
Definition vector.hpp:735
ace argon_bool_type operator>(argon_type b) const
Compare two vectors, checking if this vector is greater than the other.
Definition vector.hpp:224
ace argon_type operator<<(const int i) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:266
ace argon_type AddHalve(argon_type b) const
Adds two vectors, halving the result.
Definition vector.hpp:376
static ace std::array< argon_type, stride > LoadGatherOffsetIndexInterleaved(const scalar_type *base_ptr, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Perform a Load-Gather of interleaved elements.
Definition vector.hpp:1071
ace argon_type Divide(argon_type b) const
Divide two vectors.
Definition vector.hpp:679
ace argon_type ShiftRightRound() const
Shift the elements of the vector to the right by a specified number of bits, rounding the result.
Definition vector.hpp:826
Bool_t< simd::Vec128_t< ScalarType > > predicate_type
Definition vector.hpp:58
static ace std::array< argon_type, stride > LoadCopyInterleaved(const scalar_type *ptr)
Load multiple vectors from a pointer, duplicating the value across all lanes.
Definition vector.hpp:990
static ace int size()
Get the number of elements.
Definition vector.hpp:1368
static ace argon_type LoadGatherOffsetBytes(const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Using a base address and a vector of offset bytes and a base pointer, create a new vector.
Definition vector.hpp:887
ace argon_bool_type GreaterThan(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than b.
Definition vector.hpp:743
ace argon_type ReciprocalStep(argon_type b) const
Newton-Raphson step for reciprocal refinement: (2 - a * b) / 2.
Definition vector.hpp:601
ace std::array< argon_type, 2 > ZipWith(argon_type b) const
Zip two vectors together, returning two vectors of pairs.
Definition vector.hpp:1305
ace argon_type operator-(argon_type b) const
Subtract a vector and return the result.
Definition vector.hpp:206
ace argon_type MultiplyRoundAddFixedQMax(argon_type b, arg_type c) const
Multiply-round-add three fixed-point vectors, returning a fixed-point sum.
Definition vector.hpp:664
helpers::ArgonFor_t< simd::Vec128_t< ScalarType > > argon_type
Definition vector.hpp:57
static ace argon_type LoadScalar(const scalar_type *ptr)
Constructs a Vector from a scalar pointer.
Definition vector.hpp:104
ace argon_type ShiftRightAccumulateRound(argon_type b) const
Shift the elements of the b vector to the right by a specified number of bits, and then add the resul...
Definition vector.hpp:846
ace argon_type operator*(argon_type b) const
Multiply a vector and return the result.
Definition vector.hpp:209
ace predicate_type TestNonzero() const
Ands the current vector with the given vector, then checks if nonzero.
Definition vector.hpp:1248
static ace argon_type LoadCopy(const scalar_type *ptr)
Load a vector from a pointer, duplicating the value across all lanes.
Definition vector.hpp:872
ace argon_type LoadToLane(const scalar_type *ptr)
Load a lane from a pointer.
Definition vector.hpp:950
ace argon_type SubtractAbs(argon_type b) const
Subtract two vectors, taking the absolute value of the result.
Definition vector.hpp:405
ace argon_type ShiftLeft() const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:777
ace const_lane_type< lanes - 1 > LastLane()
Get the last lane of the vector.
Definition vector.hpp:348
ace argon_type ShiftLeftSaturate(helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const
Shift the elements of the vector to the left by a specified number of bits, saturating the result.
Definition vector.hpp:786
ace argon_type MultiplyFixedQMax(lane_type l) const
Multiply a QMax fixed-point vector by a lane value, returning a fixed-point product.
Definition vector.hpp:538
ace argon_type MultiplySubtract(scalar_type b, argon_type c) const
Multiply a vector by a scalar value and subtract from a third vector.
Definition vector.hpp:517
ace argon_bool_type operator<=(argon_type b) const
Compare two vectors, checking if this vector is less than or equal to the other.
Definition vector.hpp:227
ace argon_type ShiftLeftSaturate() const
Shift the elements of the vector to the left by a specified number of bits, saturating the result.
Definition vector.hpp:800
ace argon_type operator&(argon_type b) const
Bitwise AND two vectors and return the result.
Definition vector.hpp:239
ace argon_type MultiplyRoundFixedQMax(argon_type v) const
Multiply two fixed-point vectors, returning a fixed-point product.
Definition vector.hpp:545
ace argon_type SubtractSaturate(argon_type b) const
Subtract two vectors, saturating the result.
Definition vector.hpp:401
static ace std::array< argon_type, Stride > LoadToLaneInterleaved(simd::MultiVector_t< VectorType, Stride > multi, const scalar_type *ptr)
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
Definition vector.hpp:1020
ace argon_type ReciprocalSqrtEstimateRefine(int n_iters=1) const
Compute a refined reciprocal-sqrt estimate using Newton-Raphson iterations.
Definition vector.hpp:640
static ace std::array< argon_type, n > LoadMulti(const scalar_type *ptr)
Load n vectors from a single contiguous set of memory.
Definition vector.hpp:1091
static ace argon_type LoadGatherOffsetIndex(const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Using a base address and a vector of offset indices and a base pointer, create a new vector.
Definition vector.hpp:919
helpers::ArgonFor_t< predicate_type > argon_bool_type
Definition vector.hpp:59
ace argon_type operator|(argon_type b) const
Bitwise OR two vectors and return the result.
Definition vector.hpp:242
ace argon_type MultiplyFixedQMax(argon_type v) const
Multiply two QMax fixed-point vectors, returning a fixed-point product.
Definition vector.hpp:529
ace argon_type MultiplyAdd(argon_type b, scalar_type c) const
Multiply a vector by a scalar value and add a third vector.
Definition vector.hpp:458
simd::Scalar_t< simd::Vec128_t< ScalarType > > scalar_type
Definition vector.hpp:55
ace const const_lane_type< LaneIndex > GetLane() const
Get a single lane of the vector by index.
Definition vector.hpp:330
ace argon_type Select(ArgType true_value, ArgType false_value) const
Bitwise select between two vectors, using the current vector as a mask.
Definition vector.hpp:1239
ace argon_type operator~() const
Bitwise NOT the vector and return the result.
Definition vector.hpp:248
ace Vector(argon::ConstLane< LaneIndex, VectorType > lane)
Constructs a Vector from a ConstLane object.
Definition vector.hpp:92
ace std::array< scalar_type, lanes > to_array()
Convert the vector to an array of scalar values.
Definition vector.hpp:282
ace argon_type Absolute() const
Get the absolute value of the vector.
Definition vector.hpp:564
static constexpr size_t lanes
Definition vector.hpp:62
ace lane_type operator[](const size_t i)
Access a lane of the vector by index.
Definition vector.hpp:254
ace argon_type BitwiseClear(argon_type b) const
Bitwise AND of the vector with the NOT of another vector.
Definition vector.hpp:1225
ace argon_type BitwiseAndNot(argon_type b) const
Bitwise AND of the vector with the NOT of another vector.
Definition vector.hpp:1216
ace argon_type ShiftRightAccumulate(argon_type b) const
Shift the elements of the b vector to the right by a specified number of bits, and then add the resul...
Definition vector.hpp:834
ace argon_type operator>>(const int i) const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:257
ace argon_type Add(argon_type b) const
Add two vectors.
Definition vector.hpp:366
ace argon_type BitwiseOr(argon_type b) const
Bitwise OR of the vector with another vector.
Definition vector.hpp:1187
ace argon_type ReciprocalSqrtEstimate() const
1 / sqrt(value), using an estimate for speed
Definition vector.hpp:584
constexpr Vector()=default
The default constructor for the Vector class.
ace const lane_type GetLane(const int i) const
Get a single lane of the vector by index.
Definition vector.hpp:310
constexpr Vector & operator=(const Vector &other)=default
Copy assignment operator for the Vector class.
static ace argon_type GenerateWithIndex(FuncType body)
Constructs a Vector from a function that generates values with an index.
Definition vector.hpp:191
ace argon_bool_type operator!=(argon_type b) const
Compare two vectors for inequality.
Definition vector.hpp:218
ace argon_type CountLeadingZeroBits() const
Count the number of consecutive top bits that are set to zero.
Definition vector.hpp:1261
ace argon_type Min(argon_type b) const
Compare the lanes of two vectors, copying the smaller of each lane to the result.
Definition vector.hpp:721
ace argon_type operator-() const
Negate the SIMD vector and return the result.
Definition vector.hpp:200
ace argon_type SubtractHalve(argon_type b) const
Subtract two vectors, halving the result.
Definition vector.hpp:398
ace argon_type MultiplyAdd(lane_type b, argon_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:479
ace argon_type ShiftRightInsert(argon_type b) const
Shift the elements of the vector to the right by a specified number of bits, ORing the result with th...
Definition vector.hpp:858
ace argon_type SubtractAbsAdd(argon_type b, argon_type c) const
Subtract two vectors, taking the absolute value of the result and adding a third vector.
Definition vector.hpp:409
constexpr Vector(Vector &&other)=default
Move constructor for the Vector class.
ace argon_type Subtract(argon_type b) const
Subtract two vectors.
Definition vector.hpp:388
ace argon_type BitwiseXor(argon_type b) const
Bitwise XOR of the vector with another vector.
Definition vector.hpp:1196
ace Vector(argon::Lane< VectorType > lane)
Constructs a Vector from a Lane object.
Definition vector.hpp:85
static ace argon_type Generate(FuncType body)
Constructs a Vector from a function that generates values.
Definition vector.hpp:176
ace argon_type AddHalveRound(argon_type b) const
Adds two vectors, halving and rounding the result.
Definition vector.hpp:380
ace Lane< const VectorType > operator[](const size_t i) const
Access a lane of the vector by index.
Definition vector.hpp:251
Header file for SIMD features and platform detection.
typename ArgonFor< std::remove_cv_t< T > >::type ArgonFor_t
Helper alias to get the Argon type for a given vector type.
Definition argon_for.hpp:45
Lane deconstruction feature.
Definition argon_full.hpp:399
Helper functions to convert C-style arrays to std::array of Argon types, mimicking std::to_array.
constexpr std::array< helpers::ArgonFor_t< T >, N > to_array(T(&a)[N])
Convert a C-style array of vector types to a std::array of Argon types.
Definition to_array.hpp:29