Argon 0.1.0
Loading...
Searching...
No Matches
vector.hpp
1#pragma once
2#include <array>
3#include <bit>
4#include <cmath>
5#include <functional>
6#include <tuple>
7#include <type_traits>
8#include <utility>
9#include "arm_simd.hpp"
10#include "arm_simd/helpers.hpp"
11#include "arm_simd/helpers/multivector.hpp"
12#include "arm_simd/helpers/scalar.hpp"
13#include "arm_simd/helpers/vec64.hpp"
14#include "features.h"
15#include "helpers.hpp"
16#include "helpers/bool.hpp"
17#include "helpers/to_array.hpp"
18#include "lane.hpp"
19
20#ifdef __ARM_FEATURE_MVE
21#define simd mve
22#else
23#define simd neon
24#endif
25
26#ifdef ARGON_PLATFORM_SIMDE
27#define ace
28#elifdef __clang__
29#define ace [[gnu::always_inline]] constexpr
30#else
31#define ace [[gnu::always_inline]] inline
32#endif
33
34namespace argon {
35template <typename T>
36concept arithmetic = std::is_arithmetic_v<T>;
37
42template <typename T, typename... Ts>
43inline constexpr bool is_one_of = std::disjunction_v<std::is_same<T, Ts>...>;
44
49template <typename VectorType>
50class Vector {
51 public:
52 template <size_t LaneIndex>
55 using scalar_type = simd::Scalar_t<VectorType>;
56 using vector_type = VectorType;
58 using predicate_type = Bool_t<VectorType>;
60
62 static constexpr size_t lanes = (simd::is_quadword_v<VectorType> ? 16 : 8) / sizeof(scalar_type);
63
65 constexpr Vector() = default;
66
67 constexpr Vector(Vector&& other) = default;
68 constexpr Vector(const Vector& other) = default;
69 constexpr Vector& operator=(Vector&& other) = default;
70 constexpr Vector& operator=(const Vector& other) = default;
71
74 ace Vector(VectorType vector) : vec_{std::move(vector)} {};
75
79 ace Vector(scalar_type scalar) : vec_(FromScalar(scalar)) {};
80
81#ifndef ARGON_PLATFORM_MVE
85 ace Vector(argon::Lane<VectorType> lane) : vec_(FromLane(lane)) {};
86
91 template <size_t LaneIndex>
93#endif
94
95 template <typename... ArgTypes>
96 requires(sizeof...(ArgTypes) > 1)
97 ace Vector(ArgTypes... args) : vec_{std::forward<ArgTypes>(args)...} {}
98
104 ace static argon_type LoadScalar(const scalar_type* ptr) { return LoadCopy(ptr); }
105
110 ace static argon_type FromScalar(scalar_type scalar) {
111#ifdef ARGON_PLATFORM_MVE
112 return simd::duplicate(scalar);
113#else
114 return simd::duplicate<VectorType>(scalar);
115#endif
116 }
117
122 template <simd::is_vector_type IntrinsicType>
124#ifdef ARGON_PLATFORM_MVE
125 return simd::duplicate(lane.Get());
126#else
127 return simd::duplicate_lane<vector_type>(lane.vec(), lane.lane());
128#endif
129 }
130
135 template <size_t LaneIndex>
137#ifdef ARGON_PLATFORM_MVE
138 return simd::duplicate(lane.Get());
139#else
140 if constexpr (simd::is_quadword_v<VectorType>) {
141#if __ARM_ARCH >= 8
142 return simd::duplicate_lane_quad<LaneIndex>(lane.vec());
143#else
144 // On A32, vec() returns the 64-bit half-register (low or high).
145 // The template arg must be the lane index within that half-vector.
146 constexpr size_t local_lane = LaneIndex >= (lanes / 2) ? LaneIndex - (lanes / 2) : LaneIndex;
147 return simd::duplicate_lane_quad<local_lane>(lane.vec());
148#endif
149 } else {
150 return simd::duplicate_lane<LaneIndex>(lane.vec());
151 }
152#endif
153 }
154
161 ace static argon_type Iota(scalar_type start) {
162 // TODO: Remove this once MSVC 19.44 is released.
163#if __cpp_if_consteval >= 202106L
164 return IotaHelper(start, std::make_index_sequence<lanes>{});
165#else
166 return Argon{start}.Add(VectorType{0, 1, 2, 3});
167#endif
168 }
169
174 template <typename FuncType>
175 requires std::convertible_to<FuncType, std::function<scalar_type()>>
176 ace static argon_type Generate(FuncType body) {
177 VectorType out;
178 utility::constexpr_for<0, lanes, 1>([&](size_t i) { //
179 out[i] = body();
180 });
181 return out;
182 }
183
189 template <typename FuncType>
190 requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type)>>
191 ace static argon_type GenerateWithIndex(FuncType body) {
192 VectorType out;
193 utility::constexpr_for<0, lanes, 1>([&]<size_t i>() { //
194 out[i] = body(i);
195 });
196 return out;
197 }
198
200 ace argon_type operator-() const { return Negate(); }
201
203 ace argon_type operator+(argon_type b) const { return Add(b); }
204
206 ace argon_type operator-(argon_type b) const { return Subtract(b); }
207
209 ace argon_type operator*(argon_type b) const { return Multiply(b); }
210
212 ace argon_type operator/(argon_type b) const { return Divide(b); }
213
215 ace argon_bool_type operator==(argon_type b) const { return Equal(b); }
216
218 ace argon_bool_type operator!=(argon_type b) const { return ~Equal(b); }
219
221 ace argon_bool_type operator<(argon_type b) const { return LessThan(b); }
222
225
228
231
233 ace argon_type operator++() const { return Add(1); }
234
236 ace argon_type operator--() const { return Subtract(1); }
237
239 ace argon_type operator&(argon_type b) const { return BitwiseAnd(b); }
240
242 ace argon_type operator|(argon_type b) const { return BitwiseOr(b); }
243
245 ace argon_type operator^(argon_type b) const { return BitwiseXor(b); }
246
248 ace argon_type operator~() const { return BitwiseNot(); }
249
251 ace Lane<const VectorType> operator[](const size_t i) const { return GetLane(i); }
252
254 ace lane_type operator[](const size_t i) { return GetLane(i); }
255
257 ace argon_type operator>>(const int i) const {
258#if ARGON_USE_COMPILER_EXTENSIONS
259 return vec_ >> i;
260#else
261 return ShiftRight(i);
262#endif
263 }
264
266 ace argon_type operator<<(const int i) const {
267#if ARGON_USE_COMPILER_EXTENSIONS
268 return vec_ << i;
269#else
270 return ShiftLeft(i);
271#endif
272 }
273
275 [[gnu::always_inline]] constexpr VectorType vec() const { return vec_; }
276
278 [[gnu::always_inline]] constexpr operator VectorType() const { return vec_; }
279
282 ace std::array<scalar_type, lanes> to_array() {
283 std::array<scalar_type, lanes> out;
284 simd::store1(out.data(), vec_);
285 return out;
286 }
287
292 ace const lane_type GetLane(const size_t i) const {
293#ifdef ARGON_PLATFORM_MVE
294 return vec_[i];
295#else
296 return {vec_, static_cast<int>(i)};
297#endif
298 }
299 ace lane_type GetLane(const size_t i) {
300#ifdef ARGON_PLATFORM_MVE
301 return vec_[i];
302#else
303 return {vec_, static_cast<int>(i)};
304#endif
305 }
306
310 ace const lane_type GetLane(const int i) const {
311#ifdef ARGON_PLATFORM_MVE
312 return vec_[i];
313#else
314 return {vec_, i};
315#endif
316 }
317
318 ace lane_type GetLane(const int i) {
319#ifdef ARGON_PLATFORM_MVE
320 return vec_[i];
321#else
322 return {vec_, i};
323#endif
324 }
325
329 template <size_t LaneIndex>
331#ifdef ARGON_PLATFORM_MVE
332 return vec_[LaneIndex];
333#else
334 return vec_;
335#endif
336 }
337
338 template <size_t LaneIndex>
340#ifdef ARGON_PLATFORM_MVE
341 return vec_[LaneIndex];
342#else
343 return vec_;
344#endif
345 }
346
348 ace const_lane_type<lanes - 1> LastLane() { return vec_; }
349
351 ace argon_type ShiftRight(const int i) const { return simd::shift_right(vec_, i); }
352
354 ace argon_type ShiftLeft(const int i) const { return simd::shift_left(vec_, i); }
355
357 ace argon_type Negate() const {
358 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
359 return -vec_;
360 } else {
361 return simd::negate(vec_);
362 }
363 }
364
366 ace argon_type Add(argon_type b) const {
367 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
368 return vec_ + b.vec_;
369 } else {
370 return simd::add(vec_, b);
371 }
372 }
373
376 ace argon_type AddHalve(argon_type b) const { return simd::add_halve(vec_, b); }
377
380 ace argon_type AddHalveRound(argon_type b) const { return simd::add_halve_round(vec_, b); }
381
385 ace argon_type AddSaturate(argon_type b) const { return simd::add_saturate(vec_, b); }
386
389 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
390 return vec_ - b.vec_;
391 } else {
392 return simd::subtract(vec_, b);
393 }
394 }
395
398 ace argon_type SubtractHalve(argon_type b) const { return simd::subtract_halve(vec_, b); }
399
401 ace argon_type SubtractSaturate(argon_type b) const { return simd::subtract_saturate(vec_, b); }
402
405 ace argon_type SubtractAbs(argon_type b) const { return simd::subtract_absolute(vec_, b); }
406
410#ifdef ARGON_PLATFORM_MVE
411 return mve::add(vec_, mve::subtract_absolute(b, c));
412#else
413 return neon::subtract_absolute_add(vec_, b, c);
414#endif
415 }
416
419 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
420 return vec_ * b.vec_;
421 } else {
422 return simd::multiply(vec_, b);
423 }
424 }
425
428 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
429 return vec_ * b;
430 } else {
431 return simd::multiply(vec_, b);
432 }
433 }
434
435#ifndef ARGON_PLATFORM_MVE
437 ace argon_type Multiply(lane_type b) const { return neon::multiply_lane(vec_, b.vec(), b.lane()); }
438
440 template <size_t LaneIndex>
442 return neon::multiply_lane(vec_, b.vec(), b.lane());
443 }
444#endif
445
449 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
450 return vec_ + b.vec_ * c.vec_;
451 } else {
452 return simd::multiply_add(vec_, b, c);
453 }
454 }
455
459 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
460 return vec_ + b.vec_ * c;
461 } else {
462 return simd::multiply_add(vec_, b, c);
463 }
464 }
465
468 ace argon_type MultiplyAdd(scalar_type b, argon_type c) const { return MultiplyAdd(c, b); }
469
470#ifndef ARGON_PLATFORM_MVE
474 return simd::multiply_add_lane(vec_, b.vec(), c.vec(), c.lane());
475 }
476
479 ace argon_type MultiplyAdd(lane_type b, argon_type c) const { return MultiplyAdd(c, b); }
480
483 template <size_t LaneIndex>
485 return simd::multiply_add_lane(vec_, b.vec(), c.vec(), c.lane());
486 }
487
489 template <size_t LaneIndex>
491 return MultiplyAdd(c, b);
492 }
493#endif
494
498#if ARGON_USE_COMPILER_EXTENSIONS
499 return vec_ - b.vec_ * c.vec_;
500#else
501 return simd::multiply_subtract(vec_, b, c);
502#endif
503 }
504
508#if ARGON_USE_COMPILER_EXTENSIONS
509 return vec_ - b.vec_ * c;
510#else
511 return simd::multiply_subtract(vec_, b, c);
512#endif
513 }
514
518
519#ifndef ARGON_PLATFORM_MVE
523 return simd::multiply_subtract_lane(vec_, b.vec(), c.vec(), c.lane());
524 }
525#endif
526
529 ace argon_type MultiplyFixedQMax(argon_type v) const { return simd::multiply_double_saturate_high(vec_, v); }
530
533 ace argon_type MultiplyFixedQMax(scalar_type s) const { return simd::multiply_double_saturate_high(vec_, s); }
534
535#ifndef ARGON_PLATFORM_MVE
539 return simd::multiply_double_saturate_high_lane(vec_, l.vec(), l.lane());
540 }
541#endif
542
546 return simd::multiply_double_round_saturate_high(vec_, v);
547 }
548
552 return simd::multiply_double_round_saturate_high(vec_, s);
553 }
554
555#ifndef ARGON_PLATFORM_MVE
559 return simd::multiply_double_round_saturate_high_lane(vec_, l.vec(), l.lane());
560 }
561#endif
562
564 ace argon_type Absolute() const { return simd::abs(vec_); }
565
569 requires std::floating_point<scalar_type> || std::is_same_v<scalar_type, uint32_t>
570 {
571#ifdef ARGON_PLATFORM_MVE
572 if constexpr (std::is_same_v<scalar_type, uint32_t>) {
573 std::numeric_limits<uint32_t>::max() / vec_;
574 } else {
575 return 1.f / vec_;
576 }
577#else
578 return simd::reciprocal_estimate(vec_);
579#endif
580 }
581
585 requires std::floating_point<scalar_type> || std::is_same_v<scalar_type, uint32_t>
586 {
587#ifdef ARGON_PLATFORM_MVE
588 if constexpr (std::is_same_v<scalar_type, uint32_t>) {
589 return std::numeric_limits<uint32_t>::max() / (vec_ * vec_);
590 } else {
591 return 1.f / (vec_ * vec_);
592 }
593#else
594 return simd::reciprocal_sqrt_estimate(vec_);
595#endif
596 }
597
602 requires std::floating_point<scalar_type>
603 {
604#ifdef ARGON_PLATFORM_MVE
605 return 2.f - vec_ * b.vec_;
606#else
607 return simd::reciprocal_step(vec_, b.vec_);
608#endif
609 }
610
615 requires std::floating_point<scalar_type>
616 {
617#ifdef ARGON_PLATFORM_MVE
618 return (3.f - vec_ * b.vec_) * 0.5f;
619#else
620 return simd::reciprocal_sqrt_step(vec_, b.vec_);
621#endif
622 }
623
627 ace argon_type ReciprocalEstimateRefine(int n_iters = 1) const
628 requires std::floating_point<scalar_type>
629 {
631 for (int i = 0; i < n_iters; ++i) {
632 est = est * ReciprocalStep(est);
633 }
634 return est;
635 }
636
640 ace argon_type ReciprocalSqrtEstimateRefine(int n_iters = 1) const
641 requires std::floating_point<scalar_type>
642 {
644 for (int i = 0; i < n_iters; ++i) {
645 est = est * (*this * est).ReciprocalSqrtStep(est);
646 }
647 return est;
648 }
649
652 template <typename arg_type>
653 requires(is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> ||
654 std::is_convertible_v<arg_type, scalar_type>)
655 ace argon_type MultiplyAddFixedQMax(argon_type b, arg_type c) const {
656 return Add(b.MultiplyFixedQMax(c));
657 }
658
661 template <typename arg_type>
662 requires(is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> ||
663 std::is_convertible_v<arg_type, scalar_type>)
665 return Add(b.MultiplyRoundFixedQMax(c));
666 }
667
668#ifdef __aarch64__
670 ace argon_type Divide(argon_type b) const {
671 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
672 return vec_ / b.vec_;
673 } else {
674 return simd::divide(vec_, b);
675 }
676 }
677#else
680 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
681 return vec_ / b.vec_;
682 } else {
683 return this->map2(b, [](scalar_type lane1, scalar_type lane2) { return lane1 / lane2; });
684 }
685 }
686#endif
687
691 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
692 return vec_ % b.vec_;
693 } else if constexpr (std::floating_point<scalar_type>) {
694 return this->map2(b, [](scalar_type lane1, scalar_type lane2) { return std::fmod(lane1, lane2); });
695 } else {
696 return this->map2(b, [](scalar_type lane1, scalar_type lane2) { return lane1 % lane2; });
697 }
698 }
699
703 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
704 return vec_ % b;
705 } else {
706 return this->map([b](scalar_type lane1) { return std::fmod(lane1, b); });
707 }
708 }
709
712 ace argon_type Max(argon_type b) const {
713 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
714 return vec_ > b.vec_ ? vec_ : b.vec_;
715 } else {
716 return simd::max(vec_, b);
717 }
718 }
719
721 ace argon_type Min(argon_type b) const {
722 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
723 return vec_ < b.vec_ ? vec_ : b.vec_;
724 } else {
725 return simd::min(vec_, b);
726 }
727 }
728
731 ace argon_bool_type Equal(argon_type b) const { return simd::equal(vec_, b); }
732
735 ace argon_bool_type GreaterThanOrEqual(argon_type b) const { return simd::greater_than_or_equal(vec_, b); }
736
739 ace argon_bool_type LessThanOrEqual(argon_type b) const { return simd::less_than_or_equal(vec_, b); }
740
743 ace argon_bool_type GreaterThan(argon_type b) const { return simd::greater_than(vec_, b); }
744
747 ace argon_bool_type LessThan(argon_type b) const { return simd::less_than(vec_, b); }
748
751 ace argon_type ShiftLeft(helpers::ArgonFor_t<simd::make_signed_t<Bool_t<VectorType>>> b) const
752 requires std::is_integral_v<scalar_type>
753 {
754 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
755 return vec_ << b.vec_;
756 } else {
757 return simd::shift_left(vec_, b.vec_);
758 }
759 }
760
763 ace argon_type ShiftLeft(std::make_signed_t<simd::Scalar_t<Bool_t<VectorType>>> n) const
764 requires std::is_integral_v<scalar_type>
765 {
766 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
767 return vec_ << n;
768 } else {
770 return simd::shift_left(vec_, b.vec_);
771 }
772 }
773
776 template <int n>
777 ace argon_type ShiftLeft() const {
778 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
779 return vec_ << n;
780 } else {
781 return simd::shift_left<n>(vec_);
782 }
783 }
784
786 ace argon_type ShiftLeftSaturate(helpers::ArgonFor_t<simd::make_signed_t<Bool_t<VectorType>>> b) const
787 requires(std::is_integral_v<scalar_type>)
788 {
789 return simd::shift_left_saturate(vec_, b);
790 }
791
793 ace argon_type ShiftLeftRound(argon_type b) const { return simd::shift_left_round(vec_, b); }
794
796 ace argon_type ShiftLeftRoundSaturate(argon_type b) const { return simd::shift_left_round_saturate(vec_, b); }
797
799 template <int n>
801 return simd::shift_left_saturate<n>(vec_);
802 }
803
809 template <int n>
811 return simd::shift_left_insert<n>(vec_, b);
812 }
813
815 template <int n>
816 ace argon_type ShiftRight() const {
817 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
818 return vec_ >> n;
819 } else {
820 return simd::shift_right<n>(vec_);
821 }
822 }
823
825 template <int n>
827 return simd::shift_right_round<n>(vec_);
828 }
829
833 template <int n>
835#ifdef ARGON_PLATFORM_MVE
836 return vec_ + (b >> n);
837#else
838 return simd::shift_right_accumulate<n>(vec_, b);
839#endif
840 }
841
845 template <int n>
847#ifdef ARGON_PLATFORM_MVE
848 return vec_ + mve::shift_right_round<n>(b);
849#else
850 return simd::shift_right_accumulate_round<n>(vec_, b);
851#endif
852 }
853
857 template <int n>
859 return simd::shift_right_insert<n>(vec_, b);
860 }
861
863 ace static argon_type Load(const scalar_type* ptr) {
864#ifdef ARGON_PLATFORM_MVE
865 return mve::load1(ptr);
866#else
867 return neon::load1<VectorType>(ptr);
868#endif
869 }
870
872 ace static argon_type LoadCopy(const scalar_type* ptr) {
873#ifdef ARGON_PLATFORM_MVE
874 scalar_type val = *ptr;
875 VectorType vec;
876 utility::constexpr_for<0, lanes, 1>([val, &vec]<int i>() { vec[i] = val; });
877#else
878 return simd::load1_duplicate<VectorType>(ptr);
879#endif
880 }
881
888 const scalar_type* base,
889 helpers::ArgonFor_t<simd::make_unsigned_t<Bool_t<VectorType>>> offset_vector) {
890#ifdef ARGON_PLATFORM_MVE
891 static_assert(
892 sizeof(scalar_type) == 1 || sizeof(scalar_type) == 2 || sizeof(scalar_type) == 4 || sizeof(scalar_type) == 8,
893 "Unsupported size for gather load");
894
895 if constexpr (sizeof(scalar_type) == 1) {
896 return mve::load_byte_gather_offset(base, offset_vector);
897 } else if constexpr (sizeof(scalar_type) == 2) {
898 return mve::load_halfword_gather_offset(base, offset_vector);
899 } else if constexpr (sizeof(scalar_type) == 4) {
900 return mve::load_word_gather_offset(base, offset_vector);
901 } else if constexpr (sizeof(scalar_type) == 8) {
902 return mve::load_doubleword_gather_offset(base, offset_vector);
903 }
904#else
905 argon_type destination;
906 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
907 auto lane_val = neon::get_lane<i>(offset_vector);
908 destination = destination.template LoadToLane<i>(base + (lane_val * sizeof(scalar_type)));
909 });
910 return destination;
911#endif
912 }
913
920 const scalar_type* base,
921 helpers::ArgonFor_t<simd::make_unsigned_t<Bool_t<VectorType>>> offset_vector) {
922#ifdef ARGON_PLATFORM_MVE
923 static_assert(
924 sizeof(scalar_type) == 1 || sizeof(scalar_type) == 2 || sizeof(scalar_type) == 4 || sizeof(scalar_type) == 8,
925 "Unsupported size for gather load");
926
927 if constexpr (sizeof(scalar_type) == 1) {
928 return mve::load_byte_gather_offset(base, offset_vector);
929 } else if constexpr (sizeof(scalar_type) == 2) {
930 return mve::load_halfword_gather_offset(base, offset_vector * sizeof(scalar_type));
931 } else if constexpr (sizeof(scalar_type) == 4) {
932 return mve::load_word_gather_offset(base, offset_vector * sizeof(scalar_type));
933 } else if constexpr (sizeof(scalar_type) == 8) {
934 return mve::load_doubleword_gather_offset(base, offset_vector * sizeof(scalar_type));
935 }
936#else
937 argon_type destination;
938 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
939 auto lane_val = neon::get_lane<i>(offset_vector);
940 destination = destination.template LoadToLane<i>(base + lane_val);
941 });
942 return destination;
943#endif
944 }
945
949 template <size_t lane>
951 argon_type new_argon = *this;
952 return new_argon.template GetLane<lane>().Load(ptr);
953 }
954
961 template <size_t stride>
962 ace static std::array<argon_type, stride> LoadInterleaved(const scalar_type* ptr) {
963#ifdef ARGON_PLATFORM_MVE
964 static_assert(stride == 2 || stride == 4,
965 "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
966 if constexpr (stride == 2) {
967 return argon::to_array(mve::load2(ptr).val);
968 } else if constexpr (stride == 4) {
969 return argon::to_array(mve::load4(ptr).val);
970 }
971#else
972 static_assert(stride > 1 && stride < 5, "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
973 using multivec_type = simd::MultiVector_t<VectorType, stride>;
974 if constexpr (stride == 2) {
975 return argon::to_array(neon::load2<multivec_type>(ptr).val);
976 } else if constexpr (stride == 3) {
977 return argon::to_array(neon::load3<multivec_type>(ptr).val);
978 } else if constexpr (stride == 4) {
979 return argon::to_array(neon::load4<multivec_type>(ptr).val);
980 }
981#endif
982 }
983
989 template <size_t stride>
990 ace static std::array<argon_type, stride> LoadCopyInterleaved(const scalar_type* ptr) {
991#ifdef ARGON_PLATFORM_MVE
992 static_assert(stride == 2 || stride == 4,
993 "De-interleaving LoadCopy can only be performed with a stride of 2, 3, or 4");
994 if constexpr (stride == 2) {
995 return {mve::duplicate(*ptr++), mve::duplicate(*ptr++)};
996 } else if constexpr (stride == 4) {
997 return {mve::duplicate(*ptr++), mve::duplicate(*ptr++), mve::duplicate(*ptr++), mve::duplicate(*ptr)};
998 }
999#else
1000 static_assert(stride > 1 && stride < 5,
1001 "De-interleaving LoadCopy can only be performed with a stride of 2, 3, or 4");
1002 using multivec_type = simd::MultiVector<VectorType, stride>::type;
1003 if constexpr (stride == 2) {
1004 return argon::to_array(simd::load2_duplicate<multivec_type>(ptr).val);
1005 } else if constexpr (stride == 3) {
1006 return argon::to_array(simd::load3_duplicate<multivec_type>(ptr).val);
1007 } else if constexpr (stride == 4) {
1008 return argon::to_array(simd::load4_duplicate<multivec_type>(ptr).val);
1009 }
1010#endif
1011 }
1012
1019 template <size_t LaneIndex, size_t Stride>
1020 ace static std::array<argon_type, Stride> LoadToLaneInterleaved(simd::MultiVector_t<VectorType, Stride> multi,
1021 const scalar_type* ptr) {
1022 static_assert(Stride > 1 && Stride < 5, "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
1023#ifdef ARGON_PLATFORM_MVE
1024 auto out = multi;
1025 utility::constexpr_for<0, Stride, 1>([&]<int i>() { //<
1026 out.val[i][LaneIndex] = ptr[i];
1027 });
1028 return argon::to_array(out.val);
1029#else
1030 if constexpr (Stride == 2) {
1031 if constexpr (simd::is_quadword_v<VectorType>) {
1032 return argon::to_array(simd::load2_lane_quad<LaneIndex>(ptr, multi).val);
1033 } else {
1034 return argon::to_array(simd::load2_lane<LaneIndex>(ptr, multi).val);
1035 }
1036 } else if constexpr (Stride == 3) {
1037 if constexpr (simd::is_quadword_v<VectorType>) {
1038 return argon::to_array(simd::load3_lane_quad<LaneIndex>(ptr, multi).val);
1039 } else {
1040 return argon::to_array(simd::load3_lane<LaneIndex>(ptr, multi).val);
1041 }
1042 } else if constexpr (Stride == 4) {
1043 if constexpr (simd::is_quadword_v<VectorType>) {
1044 return argon::to_array(simd::load4_lane_quad<LaneIndex>(ptr, multi).val);
1045 } else {
1046 return argon::to_array(simd::load4_lane<LaneIndex>(ptr, multi).val);
1047 }
1048 }
1049#endif
1050 }
1051
1053 template <size_t lane, size_t stride>
1054 ace static std::array<argon_type, stride> LoadToLaneInterleaved(std::array<argon_type, stride> multi,
1055 const scalar_type* ptr) {
1056 using multivec_type = simd::MultiVector_t<VectorType, stride>;
1057 return LoadToLaneInterleaved<lane, stride>(*(multivec_type*)multi.data(), ptr);
1058 }
1059
1070 template <size_t stride>
1071 ace static std::array<argon_type, stride> LoadGatherOffsetIndexInterleaved(
1072 const scalar_type* base_ptr,
1073 helpers::ArgonFor_t<simd::make_unsigned_t<Bool_t<VectorType>>> offset_vector) {
1074 static_assert(stride > 1 && stride < 5, "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
1075 std::array<argon_type, stride> multi{};
1076 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1077 auto lane_val = simd::get_lane<i>(offset_vector);
1078 multi = LoadToLaneInterleaved<i, stride>(multi, base_ptr + (lane_val * stride));
1079 });
1080 return multi;
1081 }
1082
1090 template <size_t n>
1091 ace static std::array<argon_type, n> LoadMulti(const scalar_type* ptr) {
1092 static_assert(n > 1 && n < 5, "LoadMulti can only be performed with a size of 2, 3, or 4");
1093#ifdef ARGON_PLATFORM_MVE
1094 std::array<argon_type, n> multi{};
1095 utility::constexpr_for<0, n, 1>([&]<int i>() { //<
1096 multi[i] = *ptr;
1097 ptr += lanes;
1098 });
1099 return multi;
1100#else
1101#if defined(__clang__) || (__GNUC__ > 13)
1102 using multi_type = simd::MultiVector_t<VectorType, n>;
1103 if constexpr (n == 2) {
1104 return argon::to_array(simd::load1_x2<multi_type>(ptr).val);
1105 } else if constexpr (n == 3) {
1106 return argon::to_array(simd::load1_x3<multi_type>(ptr).val);
1107 } else if constexpr (n == 4) {
1108 return argon::to_array(simd::load1_x4<multi_type>(ptr).val);
1109 }
1110#else
1111 if constexpr (n == 2) {
1112 auto a = simd::load1(ptr);
1113 auto b = simd::load1(ptr + lanes);
1114 return {a, b};
1115 } else if constexpr (n == 3) {
1116 auto a = simd::load1(ptr);
1117 auto b = simd::load1(ptr + lanes);
1118 auto c = simd::load1(ptr + 2 * lanes);
1119 return {a, b, c};
1120 } else if constexpr (n == 4) {
1121 auto a = simd::load1(ptr);
1122 auto b = simd::load1(ptr + lanes);
1123 auto c = simd::load1(ptr + 2 * lanes);
1124 auto d = simd::load1(ptr + 3 * lanes);
1125 return {a, b, c, d};
1126 }
1127#endif
1128#endif
1129 }
1130
1133 ace void StoreTo(scalar_type* ptr) const { simd::store1(ptr, vec_); }
1134
1138 template <int LaneIndex>
1139 ace void StoreLaneTo(scalar_type* ptr) {
1140#ifdef ARGON_PLATFORM_MVE
1141 *ptr = vec_[LaneIndex];
1142#else
1143 simd::store1_lane<LaneIndex>(ptr, vec_);
1144#endif
1145 }
1146
1147#ifndef ARGON_PLATFORM_MVE
1149
1153 ace argon_type PairwiseAdd(argon_type b) const { return simd::pairwise_add(vec_, b); }
1154
1158 ace argon_type PairwiseMax(argon_type b) const { return simd::pairwise_max(vec_, b); }
1159
1163 ace argon_type PairwiseMin(argon_type b) const { return simd::pairwise_min(vec_, b); }
1164#endif
1165
1167
1170 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1171 return ~vec_;
1172 } else {
1173 return simd::bitwise_not(vec_);
1174 }
1175 }
1176
1179 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1180 return vec_ & b.vec_;
1181 } else {
1182 return simd::bitwise_and(vec_, b);
1183 }
1184 }
1185
1188 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1189 return vec_ | b.vec_;
1190 } else {
1191 return simd::bitwise_or(vec_, b);
1192 }
1193 }
1194
1197 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1198 return vec_ ^ b.vec_;
1199 } else {
1200 return simd::bitwise_xor(vec_, b);
1201 }
1202 }
1203
1207 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1208 return vec_ | ~b.vec_;
1209 } else {
1210 return simd::bitwise_or_not(vec_, b);
1211 }
1212 }
1213
1217 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1218 return vec_ & ~b.vec_;
1219 } else {
1220 return simd::bitwise_clear(vec_, b);
1221 }
1222 }
1223
1226
1227#ifndef ARGON_PLATFORM_MVE
1230 template <typename ArgType>
1231 requires std::is_unsigned_v<scalar_type>
1232 ace argon_type BitwiseSelect(ArgType true_value, ArgType false_value) const {
1233 return simd::bitwise_select(vec_, true_value, false_value);
1234 }
1235
1237 template <typename ArgType>
1238 requires std::is_unsigned_v<scalar_type>
1239 ace argon_type Select(ArgType true_value, ArgType false_value) const {
1240 return simd::bitwise_select(true_value, false_value);
1241 }
1242
1245 ace predicate_type CompareTestNonzero(argon_type b) const { return simd::compare_test_nonzero(vec_, b); }
1246
1248 ace predicate_type TestNonzero() const { return simd::compare_test_nonzero(vec_, argon_type{1}); }
1249#endif
1250
1254 requires(std::is_integral_v<scalar_type>)
1255 {
1256 return simd::count_leading_sign_bits(vec_);
1257 }
1258
1261 ace argon_type CountLeadingZeroBits() const { return simd::count_leading_zero_bits(vec_); }
1262
1266#ifdef ARGON_PLATFORM_MVE
1267 auto new_vec = vec_;
1268 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1269 new_vec[i] = std::popcount(vec_[i]);
1270 });
1271 return new_vec;
1272#else
1273 return neon::count_active_bits(vec_);
1274#endif
1275 }
1276
1278 ace argon_type Popcount() const { return CountActiveBits(); }
1279
1283 template <int n>
1285#ifdef ARGON_PLATFORM_MVE
1286 auto new_vec = vec_;
1287 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1288 if (i < n) {
1289 new_vec[i] = b.vec_[i];
1290 }
1291 });
1292 return new_vec;
1293#else
1294 return simd::extract<n>(vec_, b);
1295#endif
1296 }
1297
1298 ace argon_type Reverse64bit() const { return simd::reverse_64bit(vec_); }
1299 ace argon_type Reverse32bit() const { return simd::reverse_32bit(vec_); }
1300 ace argon_type Reverse16bit() const { return simd::reverse_16bit(vec_); }
1301
1305 ace std::array<argon_type, 2> ZipWith(argon_type b) const {
1306#ifdef ARGON_PLATFORM_MVE
1307 std::array<argon_type, 2> new_vec;
1308 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1309 if (i % 2 == 0) {
1310 new_vec[0][i] = vec_[i / 2];
1311 new_vec[1][i] = vec_[(i + lanes) / 2];
1312 } else {
1313 new_vec[0][i] = b.vec_[i / 2];
1314 new_vec[1][i] = b.vec_[(i + lanes) / 2];
1315 }
1316 });
1317 return new_vec;
1318#else
1319 return argon::to_array(neon::zip(vec_, b.vec()).val);
1320#endif
1321 }
1322
1326 std::array<argon_type, 2> UnzipWith(argon_type b) {
1327#ifdef ARGON_PLATFORM_MVE
1328 std::array<argon_type, 2> new_vec;
1329 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1330 if ((i * 2) < lanes) {
1331 new_vec[0][i] = vec_[i * 2];
1332 new_vec[1][i] = vec_[i * 2 + 1];
1333 } else {
1334 new_vec[0][i] = b.vec_[i * 2];
1335 new_vec[1][i] = b.vec_[i * 2 + 1];
1336 }
1337 });
1338 return new_vec;
1339#else
1340 return argon::to_array(neon::unzip(vec_, b.vec()).val);
1341#endif
1342 }
1343
1346 // {b0, b1, b2, b3}}
1348 // {a1, b1, a3, b3}}
1349 std::array<argon_type, 2> TransposeWith(argon_type b) const {
1350#ifdef ARGON_PLATFORM_MVE
1351 std::array<argon_type, 2> new_vec;
1352 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1353 if (i % 2 == 1) {
1354 new_vec[0][i] = vec_[i];
1355 new_vec[1][i] = vec_[i + 1];
1356 } else {
1357 new_vec[0][i] = b.vec_[i + 1];
1358 new_vec[1][i] = b.vec_[i];
1359 }
1360 });
1361 return new_vec;
1362#else
1363 return argon::to_array(simd::transpose(vec_, b.vec()).val);
1364#endif
1365 }
1366
1368 ace static int size() { return lanes; }
1369
1370 template <typename FuncType>
1371 requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type)>>
1372 ace argon_type map(FuncType body) const {
1373 VectorType out;
1374 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1375 out[i] = body(vec_[i]);
1376 });
1377 return out;
1378 }
1379
1380 template <typename FuncType>
1381 requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type, int)>>
1382 ace argon_type map_with_index(FuncType body) const {
1383 VectorType out;
1384 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1385 out[i] = body(vec_[i], i);
1386 });
1387 return out;
1388 }
1389
1390 template <typename FuncType>
1391 requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type, scalar_type)>>
1392 ace argon_type map2(argon_type other, FuncType body) const {
1393 VectorType out;
1394 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1395 out[i] = body(vec_[i], other.vec_[i]);
1396 });
1397 return out;
1398 }
1399
1400 template <typename FuncType>
1401 requires std::convertible_to<FuncType, std::function<void(scalar_type&)>>
1402 ace argon_type each_lane(FuncType body) {
1403 VectorType out = vec_;
1404 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1405 body(out[i]);
1406 });
1407 return out;
1408 }
1409
1410 template <typename FuncType>
1411 requires std::convertible_to<FuncType, std::function<void(scalar_type&, int)>>
1412 ace argon_type each_lane_with_index(FuncType body) {
1413 VectorType out = vec_;
1414 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1415 body(out[i], i);
1416 });
1417 return out;
1418 }
1419
1420 template <typename FuncType>
1421 requires std::convertible_to<FuncType, std::function<void()>>
1422 ace void if_lane(FuncType true_branch) {
1423 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1424 if (vec_[i] != 0) {
1425 true_branch();
1426 }
1427 });
1428 }
1429
1430 template <typename FuncType>
1431 requires std::convertible_to<FuncType, std::function<void()>>
1432 ace void if_else_lane(FuncType true_branch, FuncType false_branch) {
1433 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1434 if (vec_[i] != 0) {
1435 true_branch();
1436 } else {
1437 false_branch();
1438 }
1439 });
1440 }
1441
1442 template <typename FuncType>
1443 requires std::convertible_to<FuncType, std::function<void(int)>>
1444 ace void if_lane_with_index(FuncType true_branch) {
1445 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1446 if (vec_[i] != 0) {
1447 true_branch(i);
1448 }
1449 });
1450 }
1451
1452 template <typename FuncType1, typename FuncType2>
1453 requires std::convertible_to<FuncType1, std::function<void(int)>> &&
1454 std::convertible_to<FuncType2, std::function<void(int)>>
1455 ace void if_else_lane_with_index(FuncType1 true_branch, FuncType2 false_branch) {
1456 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1457 if (vec_[i] != 0) {
1458 true_branch(i);
1459 } else {
1460 false_branch(i);
1461 }
1462 });
1463 }
1464
1465 ace bool any() {
1466 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1467 if (vec_[i]) {
1468 return true;
1469 }
1470 });
1471 return false;
1472 }
1473
1474 ace bool all() {
1475#ifdef ARGON_PLATFORM_MVE
1476 return mve::max_reduce_max(vec_, vec_) != 0;
1477#else
1478 auto nonzero = TestNonzero();
1479 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1480 if (nonzero[i] == 0) {
1481 return false;
1482 }
1483 });
1484 return true;
1485#endif
1486 }
1487
1488 template <std::size_t Index>
1489 std::tuple_element_t<Index, argon_type> get() {
1490#ifdef ARGON_PLATFORM_MVE
1491 return vec_[Index];
1492#else
1493 return GetLane<Index>();
1494#endif
1495 }
1496
1497 protected:
1498 template <std::size_t... Ints>
1499 ace static argon_type IotaHelper(scalar_type start, std::index_sequence<Ints...>) {
1500 return VectorType{static_cast<scalar_type>(start + Ints)...};
1501 }
1502
1503 VectorType vec_;
1504};
1505
1506} // namespace argon
1507
1511namespace std {
1512template <typename T>
1513struct tuple_size<argon::Vector<T>> {
1514 static constexpr size_t value = argon::Vector<T>::lanes;
1515};
1516
1517template <size_t Index, typename T>
1518struct tuple_element<Index, argon::Vector<T>> {
1519 static_assert(Index < argon::Vector<T>::lanes);
1521};
1522} // namespace std
1523
1524#undef ace
1525#undef simd
Provides utility templates and concepts for type traits and compile-time iteration.
A 128-bit SIMD vector wrapping a scalar type, providing arithmetic, logical, and data-movement operat...
Definition argon_full.hpp:29
Represents a single lane of a SIMD vector with the lane index known at compile time.
Definition lane.hpp:46
ace int lane()
On ARM32, return the local lane index within the 64-bit half-register returned by vec().
Definition lane.hpp:94
ace neon::Vec64_t< scalar_type > vec()
On ARM32, return the 64-bit half-register that contains this lane.
Definition lane.hpp:82
ace scalar_type Get() const
Get the scalar value of this lane.
Definition lane.hpp:73
Represents a single lane of a SIMD vector with a runtime-determined index.
Definition lane.hpp:116
ace int lane()
On ARM32, return the local lane index within the 64-bit half-register returned by vec().
Definition lane.hpp:160
ace neon::Vec64_t< scalar_type > vec()
On ARM32, return the 64-bit half-register that contains this lane.
Definition lane.hpp:148
ace scalar_type Get() const
Get the scalar value of this lane.
Definition lane.hpp:137
ace argon_type PairwiseMin(argon_type b) const
Select the maximum of each pair of lanes in the two vectors.
Definition vector.hpp:1163
Lane< simd::Vec128_t< ScalarType > > lane_type
Definition vector.hpp:54
ace argon_type MultiplySubtract(argon_type b, lane_type c) const
Multiply a vector by a lane value and subtract from a third vector.
Definition vector.hpp:522
ace Vector(VectorType vector)
Constructs a Vector from a SIMD vector type.
Definition vector.hpp:74
simd::Vec128_t< ScalarType > vector_type
Definition vector.hpp:56
ace argon_type Modulo(argon_type b) const
Get the modulo of two vectors.
Definition vector.hpp:690
ace argon_bool_type operator<(argon_type b) const
Compare two vectors, checking if this vector is less than the other.
Definition vector.hpp:221
ace argon_type ShiftLeftRound(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, rounding the result.
Definition vector.hpp:793
std::array< argon_type, 2 > TransposeWith(argon_type b) const
Perform a 2x2 matrix transpose on two vectors, returning two vectors of pairs.
Definition vector.hpp:1349
constexpr Vector(const Vector &other)=default
Copy constructor for the Vector class.
static ace std::array< argon_type, stride > LoadInterleaved(const scalar_type *ptr)
Load multiple vectors from a pointer, de-interleaving.
Definition vector.hpp:962
static ace argon_type Iota(scalar_type start)
Constructs a Vector from an incrementing sequence.
Definition vector.hpp:161
static ace argon_type FromScalar(scalar_type scalar)
Definition vector.hpp:110
ace argon_type Multiply(scalar_type b) const
Multiply a vector by a scalar value.
Definition vector.hpp:427
ace argon_type ReciprocalEstimateRefine(int n_iters=1) const
Compute a refined reciprocal estimate using Newton-Raphson iterations.
Definition vector.hpp:627
ace argon_type MultiplySubtract(argon_type b, argon_type c) const
Multiply two vectors and subtract from a third vector.
Definition vector.hpp:497
ace argon_type Extract(argon_type b) const
Extract n elements from the lower end of the operand, and the remaining elements from the top end of ...
Definition vector.hpp:1284
constexpr VectorType vec() const
Get the underlying SIMD vector.
Definition vector.hpp:275
ace argon_type ReciprocalEstimate() const
1 / value, using an estimate for speed
Definition vector.hpp:568
ace argon_type Multiply(argon_type b) const
Multiply two vectors.
Definition vector.hpp:418
ace argon_bool_type LessThanOrEqual(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than or equal to ...
Definition vector.hpp:739
ace argon_type operator--() const
Decrement the vector by 1 and return the result.
Definition vector.hpp:236
ace argon_type ShiftLeft(const int i) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:354
static ace argon_type Load(const scalar_type *ptr)
Load a vector from a pointer.
Definition vector.hpp:863
ace argon_type operator+(argon_type b) const
Add a vector and return the result.
Definition vector.hpp:203
ace argon_type ShiftLeft(helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const
Shift the elemnets of the vector to the left by a specified number of bits.
Definition vector.hpp:751
ace argon_type Multiply(const_lane_type< LaneIndex > b) const
Multiply a vector by a lane value.
Definition vector.hpp:441
ace argon_type PairwiseAdd(argon_type b) const
Pairwise ops.
Definition vector.hpp:1153
ace argon_type MultiplyAdd(scalar_type b, argon_type c) const
Multiply a vector by a scalar value and add a third vector.
Definition vector.hpp:468
ace argon_bool_type operator>=(argon_type b) const
Compare two vectors, checking if this vector is greater than or equal to the other.
Definition vector.hpp:230
ace argon_type MultiplyAdd(argon_type b, lane_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:473
ace argon_bool_type operator==(argon_type b) const
Compare two vectors for equality.
Definition vector.hpp:215
ace argon_type MultiplyAdd(argon_type b, argon_type c) const
Multiply two vectors and add a third vector.
Definition vector.hpp:448
ace argon_type BitwiseSelect(ArgType true_value, ArgType false_value) const
Bitwise select between two vectors, using the current vector as a mask.
Definition vector.hpp:1232
ace argon_type ShiftLeft(std::make_signed_t< simd::Scalar_t< Bool_t< VectorType > > > n) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:763
ace argon_type BitwiseOrNot(argon_type b) const
Bitwise OR of the vector with the NOT of another vector.
Definition vector.hpp:1206
ace helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > CountLeadingSignBits() const
Count the number of consecutive bits following the sign bit that are set to the same value as the sig...
Definition vector.hpp:1253
static ace argon_type FromLane(argon::ConstLane< LaneIndex, VectorType > lane)
Constructs a Vector from a ConstLane object.
Definition vector.hpp:136
ace argon_type AddSaturate(argon_type b) const
Adds two vectors, saturating the result.
Definition vector.hpp:385
std::array< argon_type, 2 > UnzipWith(argon_type b)
Unzip two vectors, returning two vectors of pairs.
Definition vector.hpp:1326
ace argon_type operator++() const
Increment the vector by 1 and return the result.
Definition vector.hpp:233
ace argon_type MultiplyAddFixedQMax(argon_type b, arg_type c) const
Multiply-add three fixed-point vectors, returning a fixed-point sum.
Definition vector.hpp:655
ace argon_type operator^(argon_type b) const
Bitwise XOR two vectors and return the result.
Definition vector.hpp:245
ace argon_type ReciprocalSqrtStep(argon_type b) const
Newton-Raphson step for reciprocal-sqrt refinement: (3 - a * b) / 2.
Definition vector.hpp:614
ace argon_bool_type Equal(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if are equal.
Definition vector.hpp:731
ace void StoreTo(scalar_type *ptr) const
Store the vector to a pointer.
Definition vector.hpp:1133
ConstLane< LaneIndex, simd::Vec128_t< ScalarType > > const_lane_type
Definition vector.hpp:53
ace argon_type MultiplyRoundFixedQMax(lane_type l) const
Multiply a fixed-point vector by a lane value, returning a fixed-point product.
Definition vector.hpp:558
ace argon_type CountActiveBits() const
Count the number of bits that are set to one in the vector.
Definition vector.hpp:1265
ace argon_type MultiplyFixedQMax(scalar_type s) const
Multiply a QMax fixed-point vector by a scalar value, returning a fixed-point product.
Definition vector.hpp:533
ace predicate_type CompareTestNonzero(argon_type b) const
Ands the current vector with the given vector, then checks if nonzero.
Definition vector.hpp:1245
static ace std::array< argon_type, stride > LoadToLaneInterleaved(std::array< argon_type, stride > multi, const scalar_type *ptr)
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
Definition vector.hpp:1054
ace argon_type PairwiseMax(argon_type b) const
Select the maximum of each pair of lanes in the two vectors.
Definition vector.hpp:1158
ace argon_type operator/(argon_type b) const
Divide a vector and return the result.
Definition vector.hpp:212
ace argon_type BitwiseNot() const
Bitwise ops.
Definition vector.hpp:1169
ace argon_type Max(argon_type b) const
Compare the lanes of two vectors, copying the larger of each lane to the result.
Definition vector.hpp:712
static ace argon_type FromLane(argon::Lane< IntrinsicType > lane)
Definition vector.hpp:123
constexpr Vector & operator=(Vector &&other)=default
Move assignment operator for the Vector class.
ace argon_type ShiftLeftInsert(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, and then OR the result wi...
Definition vector.hpp:810
ace const lane_type GetLane(const size_t i) const
Get a single lane of the vector by index.
Definition vector.hpp:292
ace argon_type BitwiseAnd(argon_type b) const
Bitwise AND of the vector with another vector.
Definition vector.hpp:1178
ace argon_type MultiplyAdd(argon_type b, const_lane_type< LaneIndex > c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:484
ace argon_type Modulo(scalar_type b) const
Get the modulo of a vector and a scalar value.
Definition vector.hpp:702
ace argon_type Popcount() const
Count the number of bits that are set to one in the vector.
Definition vector.hpp:1278
ace argon_bool_type LessThan(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than b.
Definition vector.hpp:747
ace argon_type ShiftRight(const int i) const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:351
ace argon_type ShiftLeftRoundSaturate(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, rounding and saturating t...
Definition vector.hpp:796
ace Vector(scalar_type scalar)
Constructs a Vector from a scalar value.
Definition vector.hpp:79
ace argon_type ShiftRight() const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:816
ace argon_type Negate() const
Bitwise negate the vector and return the result.
Definition vector.hpp:357
ace void StoreLaneTo(scalar_type *ptr)
Store a lane of the vector to a pointer.
Definition vector.hpp:1139
ace argon_type Multiply(lane_type b) const
Multiply a vector by a lane value.
Definition vector.hpp:437
ace argon_type MultiplySubtract(argon_type b, scalar_type c) const
Multiply a vector by a scalar value and subtract from a third vector.
Definition vector.hpp:507
ace argon_type MultiplyAdd(const_lane_type< LaneIndex > b, argon_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:490
ace argon_type MultiplyRoundFixedQMax(scalar_type s) const
Multiply a fixed-point vector by a scalar value, returning a fixed-point product.
Definition vector.hpp:551
ace argon_bool_type GreaterThanOrEqual(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than or equal ...
Definition vector.hpp:735
ace argon_bool_type operator>(argon_type b) const
Compare two vectors, checking if this vector is greater than the other.
Definition vector.hpp:224
ace argon_type operator<<(const int i) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:266
ace argon_type AddHalve(argon_type b) const
Adds two vectors, halving the result.
Definition vector.hpp:376
static ace std::array< argon_type, stride > LoadGatherOffsetIndexInterleaved(const scalar_type *base_ptr, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Perform a Load-Gather of interleaved elements.
Definition vector.hpp:1071
ace argon_type Divide(argon_type b) const
Divide two vectors.
Definition vector.hpp:679
ace argon_type ShiftRightRound() const
Shift the elements of the vector to the right by a specified number of bits, rounding the result.
Definition vector.hpp:826
Bool_t< simd::Vec128_t< ScalarType > > predicate_type
Definition vector.hpp:58
static ace std::array< argon_type, stride > LoadCopyInterleaved(const scalar_type *ptr)
Load multiple vectors from a pointer, duplicating the value across all lanes.
Definition vector.hpp:990
static ace int size()
Get the number of elements.
Definition vector.hpp:1368
static ace argon_type LoadGatherOffsetBytes(const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Using a base address and a vector of offset bytes and a base pointer, create a new vector.
Definition vector.hpp:887
ace argon_bool_type GreaterThan(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than b.
Definition vector.hpp:743
ace argon_type ReciprocalStep(argon_type b) const
Newton-Raphson step for reciprocal refinement: (2 - a * b) / 2.
Definition vector.hpp:601
ace std::array< argon_type, 2 > ZipWith(argon_type b) const
Zip two vectors together, returning two vectors of pairs.
Definition vector.hpp:1305
ace argon_type operator-(argon_type b) const
Subtract a vector and return the result.
Definition vector.hpp:206
ace argon_type MultiplyRoundAddFixedQMax(argon_type b, arg_type c) const
Multiply-round-add three fixed-point vectors, returning a fixed-point sum.
Definition vector.hpp:664
helpers::ArgonFor_t< simd::Vec128_t< ScalarType > > argon_type
Definition vector.hpp:57
static ace argon_type LoadScalar(const scalar_type *ptr)
Constructs a Vector from a scalar pointer.
Definition vector.hpp:104
ace argon_type ShiftRightAccumulateRound(argon_type b) const
Shift the elements of the b vector to the right by a specified number of bits, and then add the resul...
Definition vector.hpp:846
ace argon_type operator*(argon_type b) const
Multiply a vector and return the result.
Definition vector.hpp:209
ace predicate_type TestNonzero() const
Ands the current vector with the given vector, then checks if nonzero.
Definition vector.hpp:1248
static ace argon_type LoadCopy(const scalar_type *ptr)
Load a vector from a pointer, duplicating the value across all lanes.
Definition vector.hpp:872
ace argon_type LoadToLane(const scalar_type *ptr)
Load a lane from a pointer.
Definition vector.hpp:950
ace argon_type SubtractAbs(argon_type b) const
Subtract two vectors, taking the absolute value of the result.
Definition vector.hpp:405
ace argon_type ShiftLeft() const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:777
ace const_lane_type< lanes - 1 > LastLane()
Get the last lane of the vector.
Definition vector.hpp:348
ace argon_type ShiftLeftSaturate(helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const
Shift the elements of the vector to the left by a specified number of bits, saturating the result.
Definition vector.hpp:786
ace argon_type MultiplyFixedQMax(lane_type l) const
Multiply a QMax fixed-point vector by a lane value, returning a fixed-point product.
Definition vector.hpp:538
ace argon_type MultiplySubtract(scalar_type b, argon_type c) const
Multiply a vector by a scalar value and subtract from a third vector.
Definition vector.hpp:517
ace argon_bool_type operator<=(argon_type b) const
Compare two vectors, checking if this vector is less than or equal to the other.
Definition vector.hpp:227
ace argon_type ShiftLeftSaturate() const
Shift the elements of the vector to the left by a specified number of bits, saturating the result.
Definition vector.hpp:800
ace argon_type operator&(argon_type b) const
Bitwise AND two vectors and return the result.
Definition vector.hpp:239
ace argon_type MultiplyRoundFixedQMax(argon_type v) const
Multiply two fixed-point vectors, returning a fixed-point product.
Definition vector.hpp:545
ace argon_type SubtractSaturate(argon_type b) const
Subtract two vectors, saturating the result.
Definition vector.hpp:401
static ace std::array< argon_type, Stride > LoadToLaneInterleaved(simd::MultiVector_t< VectorType, Stride > multi, const scalar_type *ptr)
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
Definition vector.hpp:1020
ace argon_type ReciprocalSqrtEstimateRefine(int n_iters=1) const
Compute a refined reciprocal-sqrt estimate using Newton-Raphson iterations.
Definition vector.hpp:640
static ace std::array< argon_type, n > LoadMulti(const scalar_type *ptr)
Load n vectors from a single contiguous set of memory.
Definition vector.hpp:1091
static ace argon_type LoadGatherOffsetIndex(const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Using a base address and a vector of offset indices and a base pointer, create a new vector.
Definition vector.hpp:919
helpers::ArgonFor_t< predicate_type > argon_bool_type
Definition vector.hpp:59
ace argon_type operator|(argon_type b) const
Bitwise OR two vectors and return the result.
Definition vector.hpp:242
ace argon_type MultiplyFixedQMax(argon_type v) const
Multiply two QMax fixed-point vectors, returning a fixed-point product.
Definition vector.hpp:529
ace argon_type MultiplyAdd(argon_type b, scalar_type c) const
Multiply a vector by a scalar value and add a third vector.
Definition vector.hpp:458
simd::Scalar_t< simd::Vec128_t< ScalarType > > scalar_type
Definition vector.hpp:55
ace const const_lane_type< LaneIndex > GetLane() const
Get a single lane of the vector by index.
Definition vector.hpp:330
ace argon_type Select(ArgType true_value, ArgType false_value) const
Bitwise select between two vectors, using the current vector as a mask.
Definition vector.hpp:1239
ace argon_type operator~() const
Bitwise NOT the vector and return the result.
Definition vector.hpp:248
ace Vector(argon::ConstLane< LaneIndex, VectorType > lane)
Constructs a Vector from a ConstLane object.
Definition vector.hpp:92
ace std::array< scalar_type, lanes > to_array()
Convert the vector to an array of scalar values.
Definition vector.hpp:282
ace argon_type Absolute() const
Get the absolute value of the vector.
Definition vector.hpp:564
static constexpr size_t lanes
Definition vector.hpp:62
ace lane_type operator[](const size_t i)
Access a lane of the vector by index.
Definition vector.hpp:254
ace argon_type BitwiseClear(argon_type b) const
Bitwise AND of the vector with the NOT of another vector.
Definition vector.hpp:1225
ace argon_type BitwiseAndNot(argon_type b) const
Bitwise AND of the vector with the NOT of another vector.
Definition vector.hpp:1216
ace argon_type ShiftRightAccumulate(argon_type b) const
Shift the elements of the b vector to the right by a specified number of bits, and then add the resul...
Definition vector.hpp:834
ace argon_type operator>>(const int i) const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:257
ace argon_type Add(argon_type b) const
Add two vectors.
Definition vector.hpp:366
ace argon_type BitwiseOr(argon_type b) const
Bitwise OR of the vector with another vector.
Definition vector.hpp:1187
ace argon_type ReciprocalSqrtEstimate() const
1 / sqrt(value), using an estimate for speed
Definition vector.hpp:584
constexpr Vector()=default
The default constructor for the Vector class.
ace const lane_type GetLane(const int i) const
Get a single lane of the vector by index.
Definition vector.hpp:310
constexpr Vector & operator=(const Vector &other)=default
Copy assignment operator for the Vector class.
static ace argon_type GenerateWithIndex(FuncType body)
Constructs a Vector from a function that generates values with an index.
Definition vector.hpp:191
ace argon_bool_type operator!=(argon_type b) const
Compare two vectors for inequality.
Definition vector.hpp:218
ace argon_type CountLeadingZeroBits() const
Count the number of consecutive top bits that are set to zero.
Definition vector.hpp:1261
ace argon_type Min(argon_type b) const
Compare the lanes of two vectors, copying the smaller of each lane to the result.
Definition vector.hpp:721
ace argon_type operator-() const
Negate the SIMD vector and return the result.
Definition vector.hpp:200
ace argon_type SubtractHalve(argon_type b) const
Subtract two vectors, halving the result.
Definition vector.hpp:398
ace argon_type MultiplyAdd(lane_type b, argon_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:479
ace argon_type ShiftRightInsert(argon_type b) const
Shift the elements of the vector to the right by a specified number of bits, ORing the result with th...
Definition vector.hpp:858
ace argon_type SubtractAbsAdd(argon_type b, argon_type c) const
Subtract two vectors, taking the absolute value of the result and adding a third vector.
Definition vector.hpp:409
constexpr Vector(Vector &&other)=default
Move constructor for the Vector class.
ace argon_type Subtract(argon_type b) const
Subtract two vectors.
Definition vector.hpp:388
ace argon_type BitwiseXor(argon_type b) const
Bitwise XOR of the vector with another vector.
Definition vector.hpp:1196
ace Vector(argon::Lane< VectorType > lane)
Constructs a Vector from a Lane object.
Definition vector.hpp:85
static ace argon_type Generate(FuncType body)
Constructs a Vector from a function that generates values.
Definition vector.hpp:176
ace argon_type AddHalveRound(argon_type b) const
Adds two vectors, halving and rounding the result.
Definition vector.hpp:380
ace Lane< const VectorType > operator[](const size_t i) const
Access a lane of the vector by index.
Definition vector.hpp:251
Definition vector.hpp:36
Header file for SIMD features and platform detection.
typename ArgonFor< std::remove_cv_t< T > >::type ArgonFor_t
Helper alias to get the Argon type for a given vector type.
Definition argon_for.hpp:45
Lane deconstruction feature.
Definition argon_full.hpp:399
Helper functions to convert C-style arrays to std::array of Argon types, mimicking std::to_array.
constexpr std::array< helpers::ArgonFor_t< T >, N > to_array(T(&a)[N])
Convert a C-style array of vector types to a std::array of Argon types.
Definition to_array.hpp:29