10#include "arm_simd/helpers.hpp"
11#include "arm_simd/helpers/multivector.hpp"
12#include "arm_simd/helpers/scalar.hpp"
13#include "arm_simd/helpers/vec64.hpp"
16#include "helpers/bool.hpp"
20#ifdef __ARM_FEATURE_MVE
26#ifdef ARGON_PLATFORM_SIMDE
29#define ace [[gnu::always_inline]] constexpr
31#define ace [[gnu::always_inline]] inline
42template <
typename T,
typename... Ts>
43inline constexpr bool is_one_of = std::disjunction_v<std::is_same<T, Ts>...>;
49template <
typename VectorType>
52 template <
size_t LaneIndex>
62 static constexpr size_t lanes = (simd::is_quadword_v<VectorType> ? 16 : 8) /
sizeof(
scalar_type);
74 ace
Vector(VectorType vector) : vec_{
std::move(vector)} {};
81#ifndef ARGON_PLATFORM_MVE
91 template <
size_t LaneIndex>
95 template <
typename... ArgTypes>
96 requires(
sizeof...(ArgTypes) > 1)
97 ace
Vector(ArgTypes... args) : vec_{
std::forward<ArgTypes>(args)...} {}
111#ifdef ARGON_PLATFORM_MVE
112 return simd::duplicate(scalar);
114 return simd::duplicate<VectorType>(scalar);
122 template <simd::is_vector_type IntrinsicType>
124#ifdef ARGON_PLATFORM_MVE
125 return simd::duplicate(lane.Get());
127 return simd::duplicate_lane<vector_type>(lane.vec(), lane.lane());
135 template <
size_t LaneIndex>
137#ifdef ARGON_PLATFORM_MVE
138 return simd::duplicate(lane.Get());
140 if constexpr (simd::is_quadword_v<VectorType>) {
141 return simd::duplicate_lane_quad<LaneIndex>(lane.vec());
143 return simd::duplicate_lane<LaneIndex>(lane.vec());
156#if __cpp_if_consteval >= 202106L
157 return IotaHelper(start, std::make_index_sequence<lanes>{});
159 return Argon{start}.
Add(VectorType{0, 1, 2, 3});
167 template <
typename FuncType>
168 requires std::convertible_to<FuncType, std::function<
scalar_type()>>
171 utility::constexpr_for<0, lanes, 1>([&](
size_t i) {
182 template <
typename FuncType>
186 utility::constexpr_for<0, lanes, 1>([&]<
size_t i>() {
251#if ARGON_USE_COMPILER_EXTENSIONS
260#if ARGON_USE_COMPILER_EXTENSIONS
268 [[gnu::always_inline]]
constexpr VectorType
vec()
const {
return vec_; }
271 [[gnu::always_inline]]
constexpr operator VectorType()
const {
return vec_; }
276 std::array<scalar_type, lanes> out;
277 simd::store1(out.data(), vec_);
286#ifdef ARGON_PLATFORM_MVE
289 return {vec_,
static_cast<int>(i)};
293#ifdef ARGON_PLATFORM_MVE
296 return {vec_,
static_cast<int>(i)};
304#ifdef ARGON_PLATFORM_MVE
312#ifdef ARGON_PLATFORM_MVE
322 template <
size_t LaneIndex>
324#ifdef ARGON_PLATFORM_MVE
325 return vec_[LaneIndex];
331 template <
size_t LaneIndex>
333#ifdef ARGON_PLATFORM_MVE
334 return vec_[LaneIndex];
351 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
354 return simd::negate(vec_);
360 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
361 return vec_ + b.vec_;
363 return simd::add(vec_, b);
382 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
383 return vec_ - b.vec_;
385 return simd::subtract(vec_, b);
403#ifdef ARGON_PLATFORM_MVE
404 return mve::add(vec_, mve::subtract_absolute(b, c));
406 return neon::subtract_absolute_add(vec_, b, c);
412 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
413 return vec_ * b.vec_;
415 return simd::multiply(vec_, b);
421 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
424 return simd::multiply(vec_, b);
428#ifndef ARGON_PLATFORM_MVE
433 template <
size_t LaneIndex>
435 return neon::multiply_lane(vec_, b.vec(), b.lane());
442 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
443 return vec_ + b.vec_ * c.vec_;
445 return simd::multiply_add(vec_, b, c);
452 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
453 return vec_ + b.vec_ * c;
455 return simd::multiply_add(vec_, b, c);
463#ifndef ARGON_PLATFORM_MVE
467 return simd::multiply_add_lane(vec_, b.vec(), c.vec(), c.lane());
476 template <
size_t LaneIndex>
478 return simd::multiply_add_lane(vec_, b.vec(), c.vec(), c.lane());
482 template <
size_t LaneIndex>
491#if ARGON_USE_COMPILER_EXTENSIONS
492 return vec_ - b.vec_ * c.vec_;
494 return simd::multiply_subtract(vec_, b, c);
501#if ARGON_USE_COMPILER_EXTENSIONS
502 return vec_ - b.vec_ * c;
504 return simd::multiply_subtract(vec_, b, c);
512#ifndef ARGON_PLATFORM_MVE
516 return simd::multiply_subtract_lane(vec_, b.vec(), c.vec(), c.lane());
528#ifndef ARGON_PLATFORM_MVE
532 return simd::multiply_double_saturate_high_lane(vec_, l.vec(), l.lane());
539 return simd::multiply_double_round_saturate_high(vec_, v);
545 return simd::multiply_double_round_saturate_high(vec_, s);
548#ifndef ARGON_PLATFORM_MVE
552 return simd::multiply_double_round_saturate_high_lane(vec_, l.vec(), l.lane());
564#ifdef ARGON_PLATFORM_MVE
565 if constexpr (std::is_same_v<scalar_type, uint32_t>) {
566 std::numeric_limits<uint32_t>::max() / vec_;
571 return simd::reciprocal_estimate(vec_);
577 template <
typename arg_type>
578 requires(is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> ||
579 std::is_convertible_v<arg_type, scalar_type>)
581 return Add(b.MultiplyFixedQMax(c));
586 template <
typename arg_type>
587 requires(is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> ||
588 std::is_convertible_v<arg_type, scalar_type>)
590 return Add(b.MultiplyRoundFixedQMax(c));
596 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
597 return vec_ / b.vec_;
599 return simd::divide(vec_, b);
605 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
606 return vec_ / b.vec_;
616 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
617 return vec_ % b.vec_;
618 }
else if constexpr (std::floating_point<scalar_type>) {
628 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
631 return this->map([b](
scalar_type lane1) {
return std::fmod(lane1, b); });
638 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
639 return vec_ > b.vec_ ? vec_ : b.vec_;
641 return simd::max(vec_, b);
647 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
648 return vec_ < b.vec_ ? vec_ : b.vec_;
650 return simd::min(vec_, b);
677 requires std::is_integral_v<scalar_type>
679 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
680 return vec_ << b.vec_;
682 return simd::shift_left(vec_, b.vec_);
689 requires std::is_integral_v<scalar_type>
691 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
695 return simd::shift_left(vec_, b.vec_);
703 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
706 return simd::shift_left<n>(vec_);
712 requires(std::is_integral_v<scalar_type>)
714 return simd::shift_left_saturate(vec_, b);
726 return simd::shift_left_saturate<n>(vec_);
736 return simd::shift_left_insert<n>(vec_, b);
742 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
745 return simd::shift_right<n>(vec_);
752 return simd::shift_right_round<n>(vec_);
760#ifdef ARGON_PLATFORM_MVE
761 return vec_ + (b >> n);
763 return simd::shift_right_accumulate<n>(vec_, b);
772#ifdef ARGON_PLATFORM_MVE
773 return vec_ + mve::shift_right_round<n>(b);
775 return simd::shift_right_accumulate_round<n>(vec_, b);
784 return simd::shift_right_insert<n>(vec_, b);
789#ifdef ARGON_PLATFORM_MVE
790 return mve::load1(ptr);
792 return neon::load1<VectorType>(ptr);
798#ifdef ARGON_PLATFORM_MVE
801 utility::constexpr_for<0, lanes, 1>([val, &
vec]<
int i>() {
vec[i] = val; });
803 return simd::load1_duplicate<VectorType>(ptr);
815#ifdef ARGON_PLATFORM_MVE
818 "Unsupported size for gather load");
821 return mve::load_byte_gather_offset(base, offset_vector);
823 return mve::load_halfword_gather_offset(base, offset_vector);
825 return mve::load_word_gather_offset(base, offset_vector);
827 return mve::load_doubleword_gather_offset(base, offset_vector);
831 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
832 auto lane_val = neon::get_lane<i>(offset_vector);
847#ifdef ARGON_PLATFORM_MVE
850 "Unsupported size for gather load");
853 return mve::load_byte_gather_offset(base, offset_vector);
855 return mve::load_halfword_gather_offset(base, offset_vector *
sizeof(
scalar_type));
857 return mve::load_word_gather_offset(base, offset_vector *
sizeof(
scalar_type));
859 return mve::load_doubleword_gather_offset(base, offset_vector *
sizeof(
scalar_type));
863 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
864 auto lane_val = neon::get_lane<i>(offset_vector);
865 destination = destination.template
LoadToLane<i>(base + lane_val);
874 template <
size_t lane>
886 template <
size_t str
ide>
888#ifdef ARGON_PLATFORM_MVE
889 static_assert(stride == 2 || stride == 4,
890 "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
891 if constexpr (stride == 2) {
893 }
else if constexpr (stride == 4) {
897 static_assert(stride > 1 && stride < 5,
"De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
898 using multivec_type = simd::MultiVector_t<VectorType, stride>;
899 if constexpr (stride == 2) {
901 }
else if constexpr (stride == 3) {
903 }
else if constexpr (stride == 4) {
914 template <
size_t str
ide>
916#ifdef ARGON_PLATFORM_MVE
917 static_assert(stride == 2 || stride == 4,
918 "De-interleaving LoadCopy can only be performed with a stride of 2, 3, or 4");
919 if constexpr (stride == 2) {
920 return {mve::duplicate(*ptr++), mve::duplicate(*ptr++)};
921 }
else if constexpr (stride == 4) {
922 return {mve::duplicate(*ptr++), mve::duplicate(*ptr++), mve::duplicate(*ptr++), mve::duplicate(*ptr)};
925 static_assert(stride > 1 && stride < 5,
926 "De-interleaving LoadCopy can only be performed with a stride of 2, 3, or 4");
927 using multivec_type = simd::MultiVector<VectorType, stride>::type;
928 if constexpr (stride == 2) {
930 }
else if constexpr (stride == 3) {
932 }
else if constexpr (stride == 4) {
944 template <
size_t LaneIndex,
size_t Str
ide>
947 static_assert(Stride > 1 && Stride < 5,
"De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
948#ifdef ARGON_PLATFORM_MVE
950 utility::constexpr_for<0, Stride, 1>([&]<
int i>() {
951 out.val[i][LaneIndex] = ptr[i];
955 if constexpr (Stride == 2) {
956 if constexpr (simd::is_quadword_v<VectorType>) {
957 return argon::to_array(simd::load2_lane_quad<LaneIndex>(ptr, multi).val);
961 }
else if constexpr (Stride == 3) {
962 if constexpr (simd::is_quadword_v<VectorType>) {
963 return argon::to_array(simd::load3_lane_quad<LaneIndex>(ptr, multi).val);
967 }
else if constexpr (Stride == 4) {
968 if constexpr (simd::is_quadword_v<VectorType>) {
969 return argon::to_array(simd::load4_lane_quad<LaneIndex>(ptr, multi).val);
978 template <
size_t lane,
size_t str
ide>
981 using multivec_type = simd::MultiVector_t<VectorType, stride>;
995 template <
size_t str
ide>
999 static_assert(stride > 1 && stride < 5,
"De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
1000 std::array<argon_type, stride> multi{};
1001 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1002 auto lane_val = simd::get_lane<i>(offset_vector);
1017 static_assert(n > 1 && n < 5,
"LoadMulti can only be performed with a size of 2, 3, or 4");
1018#ifdef ARGON_PLATFORM_MVE
1019 std::array<argon_type, n> multi{};
1020 utility::constexpr_for<0, n, 1>([&]<
int i>() {
1026#if defined(__clang__) || (__GNUC__ > 13)
1027 using multi_type = simd::MultiVector_t<VectorType, n>;
1028 if constexpr (n == 2) {
1030 }
else if constexpr (n == 3) {
1032 }
else if constexpr (n == 4) {
1036 if constexpr (n == 2) {
1037 auto a = simd::load1(ptr);
1038 auto b = simd::load1(ptr +
lanes);
1040 }
else if constexpr (n == 3) {
1041 auto a = simd::load1(ptr);
1042 auto b = simd::load1(ptr +
lanes);
1043 auto c = simd::load1(ptr + 2 *
lanes);
1045 }
else if constexpr (n == 4) {
1046 auto a = simd::load1(ptr);
1047 auto b = simd::load1(ptr +
lanes);
1048 auto c = simd::load1(ptr + 2 *
lanes);
1049 auto d = simd::load1(ptr + 3 *
lanes);
1050 return {a, b, c, d};
1063 template <
int LaneIndex>
1065#ifdef ARGON_PLATFORM_MVE
1066 *ptr = vec_[LaneIndex];
1068 simd::store1_lane<LaneIndex>(ptr, vec_);
1072#ifndef ARGON_PLATFORM_MVE
1095 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1098 return simd::bitwise_not(vec_);
1104 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1105 return vec_ & b.vec_;
1107 return simd::bitwise_and(vec_, b);
1113 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1114 return vec_ | b.vec_;
1116 return simd::bitwise_or(vec_, b);
1122 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1123 return vec_ ^ b.vec_;
1125 return simd::bitwise_xor(vec_, b);
1132 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1133 return vec_ | ~b.vec_;
1135 return simd::bitwise_or_not(vec_, b);
1142 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1143 return vec_ & ~b.vec_;
1145 return simd::bitwise_clear(vec_, b);
1152#ifndef ARGON_PLATFORM_MVE
1155 template <
typename ArgType>
1156 requires std::is_unsigned_v<scalar_type>
1158 return simd::bitwise_select(vec_, true_value, false_value);
1162 template <
typename ArgType>
1163 requires std::is_unsigned_v<scalar_type>
1165 return simd::bitwise_select(true_value, false_value);
1181 return simd::count_leading_sign_bits(vec_);
1191#ifdef ARGON_PLATFORM_MVE
1192 auto new_vec = vec_;
1193 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1194 new_vec[i] = std::popcount(vec_[i]);
1198 return neon::count_active_bits(vec_);
1210#ifdef ARGON_PLATFORM_MVE
1211 auto new_vec = vec_;
1212 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1214 new_vec[i] = b.vec_[i];
1219 return simd::extract<n>(vec_, b);
1223 ace
argon_type Reverse64bit()
const {
return simd::reverse_64bit(vec_); }
1224 ace
argon_type Reverse32bit()
const {
return simd::reverse_32bit(vec_); }
1225 ace
argon_type Reverse16bit()
const {
return simd::reverse_16bit(vec_); }
1231#ifdef ARGON_PLATFORM_MVE
1232 std::array<argon_type, 2> new_vec;
1233 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1235 new_vec[0][i] = vec_[i / 2];
1236 new_vec[1][i] = vec_[(i +
lanes) / 2];
1238 new_vec[0][i] = b.vec_[i / 2];
1239 new_vec[1][i] = b.vec_[(i +
lanes) / 2];
1252#ifdef ARGON_PLATFORM_MVE
1253 std::array<argon_type, 2> new_vec;
1254 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1255 if ((i * 2) <
lanes) {
1256 new_vec[0][i] = vec_[i * 2];
1257 new_vec[1][i] = vec_[i * 2 + 1];
1259 new_vec[0][i] = b.vec_[i * 2];
1260 new_vec[1][i] = b.vec_[i * 2 + 1];
1275#ifdef ARGON_PLATFORM_MVE
1276 std::array<argon_type, 2> new_vec;
1277 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1279 new_vec[0][i] = vec_[i];
1280 new_vec[1][i] = vec_[i + 1];
1282 new_vec[0][i] = b.vec_[i + 1];
1283 new_vec[1][i] = b.vec_[i];
1295 template <
typename FuncType>
1299 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1300 out[i] = body(vec_[i]);
1305 template <
typename FuncType>
1307 ace
argon_type map_with_index(FuncType body)
const {
1309 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1310 out[i] = body(vec_[i], i);
1315 template <
typename FuncType>
1319 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1320 out[i] = body(vec_[i], other.vec_[i]);
1325 template <
typename FuncType>
1326 requires std::convertible_to<FuncType, std::function<void(
scalar_type&)>>
1328 VectorType out = vec_;
1329 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1335 template <
typename FuncType>
1336 requires std::convertible_to<FuncType, std::function<void(
scalar_type&,
int)>>
1337 ace
argon_type each_lane_with_index(FuncType body) {
1338 VectorType out = vec_;
1339 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1345 template <
typename FuncType>
1346 requires std::convertible_to<FuncType, std::function<void()>>
1347 ace
void if_lane(FuncType true_branch) {
1348 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1355 template <
typename FuncType>
1356 requires std::convertible_to<FuncType, std::function<void()>>
1357 ace
void if_else_lane(FuncType true_branch, FuncType false_branch) {
1358 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1367 template <
typename FuncType>
1368 requires std::convertible_to<FuncType, std::function<void(
int)>>
1369 ace
void if_lane_with_index(FuncType true_branch) {
1370 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1377 template <
typename FuncType1,
typename FuncType2>
1378 requires std::convertible_to<FuncType1, std::function<void(
int)>> &&
1379 std::convertible_to<FuncType2, std::function<void(
int)>>
1380 ace
void if_else_lane_with_index(FuncType1 true_branch, FuncType2 false_branch) {
1381 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1391 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1400#ifdef ARGON_PLATFORM_MVE
1401 return mve::max_reduce_max(vec_, vec_) != 0;
1404 utility::constexpr_for<0, lanes, 1>([&]<
int i>() {
1405 if (nonzero[i] == 0) {
1413 template <std::
size_t Index>
1414 std::tuple_element_t<Index, argon_type> get() {
1415#ifdef ARGON_PLATFORM_MVE
1423 template <std::size_t... Ints>
1425 return VectorType{
static_cast<scalar_type>(start + Ints)...};
1437template <
typename T>
1438struct tuple_size<argon::Vector<T>> {
1442template <
size_t Index,
typename T>
1443struct tuple_element<Index, argon::Vector<T>> {
1444 static_assert(Index < argon::Vector<T>::lanes);
Provides utility templates and concepts for type traits and compile-time iteration.
Definition argon_full.hpp:24
Represents a single lane of a SIMD vector, where the lane's index is known at compile time.
Definition lane.hpp:44
Represents a single lane of a SIMD vector.
Definition lane.hpp:102
ace argon_type PairwiseMin(argon_type b) const
Select the maximum of each pair of lanes in the two vectors.
Definition vector.hpp:1088
Lane< simd::Vec128_t< ScalarType > > lane_type
Definition vector.hpp:54
ace argon_type MultiplySubtract(argon_type b, lane_type c) const
Multiply a vector by a lane value and subtract from a third vector.
Definition vector.hpp:515
ace Vector(VectorType vector)
Constructs a Vector from a SIMD vector type.
Definition vector.hpp:74
simd::Vec128_t< ScalarType > vector_type
Definition vector.hpp:56
ace argon_type Modulo(argon_type b) const
Get the modulo of two vectors.
Definition vector.hpp:615
ace argon_bool_type operator<(argon_type b) const
Compare two vectors, checking if this vector is less than the other.
Definition vector.hpp:214
ace argon_type ShiftLeftRound(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, rounding the result.
Definition vector.hpp:718
static ace argon_type GenerateWithIndex(FuncType body)
Constructs a Vector from a function that generates values with an index.
Definition vector.hpp:184
std::array< argon_type, 2 > TransposeWith(argon_type b) const
Perform a 2x2 matrix transpose on two vectors, returning two vectors of pairs.
Definition vector.hpp:1274
constexpr Vector(const Vector &other)=default
Copy constructor for the Vector class.
static ace std::array< argon_type, stride > LoadInterleaved(const scalar_type *ptr)
Load multiple vectors from a pointer, de-interleaving.
Definition vector.hpp:887
static ace argon_type Iota(scalar_type start)
Constructs a Vector from an incrementing sequence.
Definition vector.hpp:154
static ace argon_type FromScalar(scalar_type scalar)
Definition vector.hpp:110
ace argon_type Multiply(scalar_type b) const
Multiply a vector by a scalar value.
Definition vector.hpp:420
ace argon_type MultiplySubtract(argon_type b, argon_type c) const
Multiply two vectors and subtract from a third vector.
Definition vector.hpp:490
ace argon_type Extract(argon_type b) const
Extract n elements from the lower end of the operand, and the remaining elements from the top end of ...
Definition vector.hpp:1209
constexpr VectorType vec() const
Get the underlying SIMD vector.
Definition vector.hpp:268
ace argon_type ReciprocalEstimate() const
1 / value, using an estimate for speed
Definition vector.hpp:561
ace argon_type Multiply(argon_type b) const
Multiply two vectors.
Definition vector.hpp:411
ace argon_bool_type LessThanOrEqual(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than or equal to ...
Definition vector.hpp:664
ace argon_type operator--() const
Decrement the vector by 1 and return the result.
Definition vector.hpp:229
ace argon_type ShiftLeft(const int i) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:347
static ace argon_type Load(const scalar_type *ptr)
Load a vector from a pointer.
Definition vector.hpp:788
ace argon_type operator+(argon_type b) const
Add a vector and return the result.
Definition vector.hpp:196
ace argon_type ShiftLeft(helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const
Shift the elemnets of the vector to the left by a specified number of bits.
Definition vector.hpp:676
ace argon_type Multiply(const_lane_type< LaneIndex > b) const
Multiply a vector by a lane value.
Definition vector.hpp:434
ace argon_type PairwiseAdd(argon_type b) const
Pairwise ops.
Definition vector.hpp:1078
ace argon_type MultiplyAdd(scalar_type b, argon_type c) const
Multiply a vector by a scalar value and add a third vector.
Definition vector.hpp:461
ace argon_bool_type operator>=(argon_type b) const
Compare two vectors, checking if this vector is greater than or equal to the other.
Definition vector.hpp:223
ace argon_type MultiplyAdd(argon_type b, lane_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:466
ace argon_bool_type operator==(argon_type b) const
Compare two vectors for equality.
Definition vector.hpp:208
ace argon_type MultiplyAdd(argon_type b, argon_type c) const
Multiply two vectors and add a third vector.
Definition vector.hpp:441
ace argon_type MultiplyRoundAddFixedQMax(argon_type b, arg_type c) const
Multiply-round-add three fixed-point vectors, returning a fixed-point sum.
Definition vector.hpp:589
ace argon_type ShiftLeft(std::make_signed_t< simd::Scalar_t< Bool_t< VectorType > > > n) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:688
ace argon_type BitwiseOrNot(argon_type b) const
Bitwise OR of the vector with the NOT of another vector.
Definition vector.hpp:1131
ace helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > CountLeadingSignBits() const
Count the number of consecutive bits following the sign bit that are set to the same value as the sig...
Definition vector.hpp:1178
static ace argon_type FromLane(argon::ConstLane< LaneIndex, VectorType > lane)
Constructs a Vector from a ConstLane object.
Definition vector.hpp:136
ace argon_type AddSaturate(argon_type b) const
Adds two vectors, saturating the result.
Definition vector.hpp:378
std::array< argon_type, 2 > UnzipWith(argon_type b)
Unzip two vectors, returning two vectors of pairs.
Definition vector.hpp:1251
ace argon_type operator++() const
Increment the vector by 1 and return the result.
Definition vector.hpp:226
ace argon_type operator^(argon_type b) const
Bitwise XOR two vectors and return the result.
Definition vector.hpp:238
ace argon_bool_type Equal(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if are equal.
Definition vector.hpp:656
ace void StoreTo(scalar_type *ptr) const
Store the vector to a pointer.
Definition vector.hpp:1058
ConstLane< LaneIndex, simd::Vec128_t< ScalarType > > const_lane_type
Definition vector.hpp:53
ace argon_type MultiplyRoundFixedQMax(lane_type l) const
Multiply a fixed-point vector by a lane value, returning a fixed-point product.
Definition vector.hpp:551
ace argon_type CountActiveBits() const
Count the number of bits that are set to one in the vector.
Definition vector.hpp:1190
ace argon_type MultiplyFixedQMax(scalar_type s) const
Multiply a QMax fixed-point vector by a scalar value, returning a fixed-point product.
Definition vector.hpp:526
ace predicate_type CompareTestNonzero(argon_type b) const
Ands the current vector with the given vector, then checks if nonzero.
Definition vector.hpp:1170
static ace std::array< argon_type, stride > LoadToLaneInterleaved(std::array< argon_type, stride > multi, const scalar_type *ptr)
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
Definition vector.hpp:979
ace argon_type PairwiseMax(argon_type b) const
Select the maximum of each pair of lanes in the two vectors.
Definition vector.hpp:1083
ace argon_type operator/(argon_type b) const
Divide a vector and return the result.
Definition vector.hpp:205
ace argon_type BitwiseNot() const
Bitwise ops.
Definition vector.hpp:1094
ace argon_type Max(argon_type b) const
Compare the lanes of two vectors, copying the larger of each lane to the result.
Definition vector.hpp:637
static ace argon_type FromLane(argon::Lane< IntrinsicType > lane)
Definition vector.hpp:123
constexpr Vector & operator=(Vector &&other)=default
Move assignment operator for the Vector class.
ace argon_type ShiftLeftInsert(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, and then OR the result wi...
Definition vector.hpp:735
ace const lane_type GetLane(const size_t i) const
Get a single lane of the vector by index.
Definition vector.hpp:285
ace argon_type BitwiseAnd(argon_type b) const
Bitwise AND of the vector with another vector.
Definition vector.hpp:1103
ace argon_type MultiplyAdd(argon_type b, const_lane_type< LaneIndex > c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:477
ace argon_type Modulo(scalar_type b) const
Get the modulo of a vector and a scalar value.
Definition vector.hpp:627
ace argon_type Popcount() const
Count the number of bits that are set to one in the vector.
Definition vector.hpp:1203
ace argon_bool_type LessThan(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than b.
Definition vector.hpp:672
ace argon_type ShiftRight(const int i) const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:344
ace argon_type ShiftLeftRoundSaturate(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, rounding and saturating t...
Definition vector.hpp:721
ace Vector(scalar_type scalar)
Constructs a Vector from a scalar value.
Definition vector.hpp:79
ace argon_type ShiftRight() const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:741
ace argon_type Negate() const
Bitwise negate the vector and return the result.
Definition vector.hpp:350
ace void StoreLaneTo(scalar_type *ptr)
Store a lane of the vector to a pointer.
Definition vector.hpp:1064
ace argon_type Multiply(lane_type b) const
Multiply a vector by a lane value.
Definition vector.hpp:430
ace argon_type MultiplySubtract(argon_type b, scalar_type c) const
Multiply a vector by a scalar value and subtract from a third vector.
Definition vector.hpp:500
ace argon_type MultiplyAdd(const_lane_type< LaneIndex > b, argon_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:483
ace argon_type MultiplyRoundFixedQMax(scalar_type s) const
Multiply a fixed-point vector by a scalar value, returning a fixed-point product.
Definition vector.hpp:544
ace argon_bool_type GreaterThanOrEqual(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than or equal ...
Definition vector.hpp:660
ace argon_bool_type operator>(argon_type b) const
Compare two vectors, checking if this vector is greater than the other.
Definition vector.hpp:217
ace argon_type operator<<(const int i) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:259
ace argon_type AddHalve(argon_type b) const
Adds two vectors, halving the result.
Definition vector.hpp:369
static ace std::array< argon_type, stride > LoadGatherOffsetIndexInterleaved(const scalar_type *base_ptr, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Perform a Load-Gather of interleaved elements.
Definition vector.hpp:996
ace argon_type Divide(argon_type b) const
Divide two vectors.
Definition vector.hpp:604
ace argon_type ShiftRightRound() const
Shift the elements of the vector to the right by a specified number of bits, rounding the result.
Definition vector.hpp:751
Bool_t< simd::Vec128_t< ScalarType > > predicate_type
Definition vector.hpp:58
static ace std::array< argon_type, stride > LoadCopyInterleaved(const scalar_type *ptr)
Load multiple vectors from a pointer, duplicating the value across all lanes.
Definition vector.hpp:915
static ace int size()
Get the number of elements.
Definition vector.hpp:1293
static ace argon_type LoadGatherOffsetBytes(const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Using a base address and a vector of offset bytes and a base pointer, create a new vector.
Definition vector.hpp:812
ace argon_bool_type GreaterThan(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than b.
Definition vector.hpp:668
ace std::array< argon_type, 2 > ZipWith(argon_type b) const
Zip two vectors together, returning two vectors of pairs.
Definition vector.hpp:1230
ace argon_type operator-(argon_type b) const
Subtract a vector and return the result.
Definition vector.hpp:199
helpers::ArgonFor_t< simd::Vec128_t< ScalarType > > argon_type
Definition vector.hpp:57
static ace argon_type LoadScalar(const scalar_type *ptr)
Constructs a Vector from a scalar pointer.
Definition vector.hpp:104
ace argon_type ShiftRightAccumulateRound(argon_type b) const
Shift the elements of the b vector to the right by a specified number of bits, and then add the resul...
Definition vector.hpp:771
ace argon_type operator*(argon_type b) const
Multiply a vector and return the result.
Definition vector.hpp:202
ace predicate_type TestNonzero() const
Ands the current vector with the given vector, then checks if nonzero.
Definition vector.hpp:1173
static ace argon_type LoadCopy(const scalar_type *ptr)
Load a vector from a pointer, duplicating the value across all lanes.
Definition vector.hpp:797
ace argon_type LoadToLane(const scalar_type *ptr)
Load a lane from a pointer.
Definition vector.hpp:875
ace argon_type SubtractAbs(argon_type b) const
Subtract two vectors, taking the absolute value of the result.
Definition vector.hpp:398
ace argon_type ShiftLeft() const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:702
ace const_lane_type< lanes - 1 > LastLane()
Get the last lane of the vector.
Definition vector.hpp:341
ace argon_type ShiftLeftSaturate(helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const
Shift the elements of the vector to the left by a specified number of bits, saturating the result.
Definition vector.hpp:711
ace argon_type MultiplyFixedQMax(lane_type l) const
Multiply a QMax fixed-point vector by a lane value, returning a fixed-point product.
Definition vector.hpp:531
ace argon_type MultiplySubtract(scalar_type b, argon_type c) const
Multiply a vector by a scalar value and subtract from a third vector.
Definition vector.hpp:510
ace argon_bool_type operator<=(argon_type b) const
Compare two vectors, checking if this vector is less than or equal to the other.
Definition vector.hpp:220
ace argon_type ShiftLeftSaturate() const
Shift the elements of the vector to the left by a specified number of bits, saturating the result.
Definition vector.hpp:725
ace argon_type BitwiseSelect(ArgType true_value, ArgType false_value) const
Bitwise select between two vectors, using the current vector as a mask.
Definition vector.hpp:1157
ace argon_type operator&(argon_type b) const
Bitwise AND two vectors and return the result.
Definition vector.hpp:232
ace argon_type MultiplyRoundFixedQMax(argon_type v) const
Multiply two fixed-point vectors, returning a fixed-point product.
Definition vector.hpp:538
ace argon_type SubtractSaturate(argon_type b) const
Subtract two vectors, saturating the result.
Definition vector.hpp:394
static ace std::array< argon_type, Stride > LoadToLaneInterleaved(simd::MultiVector_t< VectorType, Stride > multi, const scalar_type *ptr)
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
Definition vector.hpp:945
static ace std::array< argon_type, n > LoadMulti(const scalar_type *ptr)
Load n vectors from a single contiguous set of memory.
Definition vector.hpp:1016
static ace argon_type LoadGatherOffsetIndex(const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Using a base address and a vector of offset indices and a base pointer, create a new vector.
Definition vector.hpp:844
helpers::ArgonFor_t< predicate_type > argon_bool_type
Definition vector.hpp:59
ace argon_type operator|(argon_type b) const
Bitwise OR two vectors and return the result.
Definition vector.hpp:235
ace argon_type MultiplyFixedQMax(argon_type v) const
Multiply two QMax fixed-point vectors, returning a fixed-point product.
Definition vector.hpp:522
ace argon_type MultiplyAdd(argon_type b, scalar_type c) const
Multiply a vector by a scalar value and add a third vector.
Definition vector.hpp:451
simd::Scalar_t< simd::Vec128_t< ScalarType > > scalar_type
Definition vector.hpp:55
ace const const_lane_type< LaneIndex > GetLane() const
Get a single lane of the vector by index.
Definition vector.hpp:323
ace argon_type operator~() const
Bitwise NOT the vector and return the result.
Definition vector.hpp:241
ace Vector(argon::ConstLane< LaneIndex, VectorType > lane)
Constructs a Vector from a ConstLane object.
Definition vector.hpp:92
ace argon_type Select(ArgType true_value, ArgType false_value) const
Bitwise select between two vectors, using the current vector as a mask.
Definition vector.hpp:1164
ace std::array< scalar_type, lanes > to_array()
Convert the vector to an array of scalar values.
Definition vector.hpp:275
ace argon_type Absolute() const
Get the absolute value of the vector.
Definition vector.hpp:557
static constexpr size_t lanes
Definition vector.hpp:62
ace lane_type operator[](const size_t i)
Access a lane of the vector by index.
Definition vector.hpp:247
ace argon_type BitwiseClear(argon_type b) const
Bitwise AND of the vector with the NOT of another vector.
Definition vector.hpp:1150
ace argon_type BitwiseAndNot(argon_type b) const
Bitwise AND of the vector with the NOT of another vector.
Definition vector.hpp:1141
ace argon_type ShiftRightAccumulate(argon_type b) const
Shift the elements of the b vector to the right by a specified number of bits, and then add the resul...
Definition vector.hpp:759
ace argon_type operator>>(const int i) const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:250
ace argon_type Add(argon_type b) const
Add two vectors.
Definition vector.hpp:359
ace argon_type BitwiseOr(argon_type b) const
Bitwise OR of the vector with another vector.
Definition vector.hpp:1112
constexpr Vector()=default
The default constructor for the Vector class.
ace argon_type MultiplyAddFixedQMax(argon_type b, arg_type c) const
Multiply-add three fixed-point vectors, returning a fixed-point sum.
Definition vector.hpp:580
ace const lane_type GetLane(const int i) const
Get a single lane of the vector by index.
Definition vector.hpp:303
constexpr Vector & operator=(const Vector &other)=default
Copy assignment operator for the Vector class.
ace argon_bool_type operator!=(argon_type b) const
Compare two vectors for inequality.
Definition vector.hpp:211
static ace argon_type Generate(FuncType body)
Constructs a Vector from a function that generates values.
Definition vector.hpp:169
ace argon_type CountLeadingZeroBits() const
Count the number of consecutive top bits that are set to zero.
Definition vector.hpp:1186
ace argon_type Min(argon_type b) const
Compare the lanes of two vectors, copying the smaller of each lane to the result.
Definition vector.hpp:646
ace argon_type operator-() const
Negate the SIMD vector and return the result.
Definition vector.hpp:193
ace argon_type SubtractHalve(argon_type b) const
Subtract two vectors, halving the result.
Definition vector.hpp:391
ace argon_type MultiplyAdd(lane_type b, argon_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:472
ace argon_type ShiftRightInsert(argon_type b) const
Shift the elements of the vector to the right by a specified number of bits, ORing the result with th...
Definition vector.hpp:783
ace argon_type SubtractAbsAdd(argon_type b, argon_type c) const
Subtract two vectors, taking the absolute value of the result and adding a third vector.
Definition vector.hpp:402
constexpr Vector(Vector &&other)=default
Move constructor for the Vector class.
ace argon_type Subtract(argon_type b) const
Subtract two vectors.
Definition vector.hpp:381
ace argon_type BitwiseXor(argon_type b) const
Bitwise XOR of the vector with another vector.
Definition vector.hpp:1121
ace Vector(argon::Lane< VectorType > lane)
Constructs a Vector from a Lane object.
Definition vector.hpp:85
ace argon_type AddHalveRound(argon_type b) const
Adds two vectors, halving and rounding the result.
Definition vector.hpp:373
ace Lane< const VectorType > operator[](const size_t i) const
Access a lane of the vector by index.
Definition vector.hpp:244
Header file for SIMD features and platform detection.
typename ArgonFor< std::remove_cv_t< T > >::type ArgonFor_t
Helper alias to get the Argon type for a given vector type.
Definition argon_for.hpp:45
Lane deconstruction feature.
Definition argon_full.hpp:302
Helper functions to convert C-style arrays to std::array of Argon types, mimicking std::to_array.
constexpr std::array< helpers::ArgonFor_t< T >, N > to_array(T(&a)[N])
Convert a C-style array of vector types to a std::array of Argon types.
Definition to_array.hpp:29