Argon 0.1.0
Loading...
Searching...
No Matches
vector.hpp
1#pragma once
2#include <array>
3#include <bit>
4#include <cmath>
5#include <functional>
6#include <tuple>
7#include <type_traits>
8#include <utility>
9#include "arm_simd.hpp"
10#include "arm_simd/helpers.hpp"
11#include "arm_simd/helpers/multivector.hpp"
12#include "arm_simd/helpers/scalar.hpp"
13#include "arm_simd/helpers/vec64.hpp"
14#include "features.h"
15#include "helpers.hpp"
16#include "helpers/bool.hpp"
17#include "helpers/to_array.hpp"
18#include "lane.hpp"
19
20#ifdef __ARM_FEATURE_MVE
21#define simd mve
22#else
23#define simd neon
24#endif
25
26#ifdef ARGON_PLATFORM_SIMDE
27#define ace
28#elifdef __clang__
29#define ace [[gnu::always_inline]] constexpr
30#else
31#define ace [[gnu::always_inline]] inline
32#endif
33
34namespace argon {
35template <typename T>
36concept arithmetic = std::is_arithmetic_v<T>;
37
42template <typename T, typename... Ts>
43inline constexpr bool is_one_of = std::disjunction_v<std::is_same<T, Ts>...>;
44
49template <typename VectorType>
50class Vector {
51 public:
52 template <size_t LaneIndex>
55 using scalar_type = simd::Scalar_t<VectorType>;
56 using vector_type = VectorType;
58 using predicate_type = Bool_t<VectorType>;
60
62 static constexpr size_t lanes = (simd::is_quadword_v<VectorType> ? 16 : 8) / sizeof(scalar_type);
63
65 constexpr Vector() = default;
66
67 constexpr Vector(Vector&& other) = default;
68 constexpr Vector(const Vector& other) = default;
69 constexpr Vector& operator=(Vector&& other) = default;
70 constexpr Vector& operator=(const Vector& other) = default;
71
74 ace Vector(VectorType vector) : vec_{std::move(vector)} {};
75
79 ace Vector(scalar_type scalar) : vec_(FromScalar(scalar)) {};
80
81#ifndef ARGON_PLATFORM_MVE
85 ace Vector(argon::Lane<VectorType> lane) : vec_(FromLane(lane)) {};
86
91 template <size_t LaneIndex>
93#endif
94
95 template <typename... ArgTypes>
96 requires(sizeof...(ArgTypes) > 1)
97 ace Vector(ArgTypes... args) : vec_{std::forward<ArgTypes>(args)...} {}
98
104 ace static argon_type LoadScalar(const scalar_type* ptr) { return LoadCopy(ptr); }
105
110 ace static argon_type FromScalar(scalar_type scalar) {
111#ifdef ARGON_PLATFORM_MVE
112 return simd::duplicate(scalar);
113#else
114 return simd::duplicate<VectorType>(scalar);
115#endif
116 }
117
122 template <simd::is_vector_type IntrinsicType>
124#ifdef ARGON_PLATFORM_MVE
125 return simd::duplicate(lane.Get());
126#else
127 return simd::duplicate_lane<vector_type>(lane.vec(), lane.lane());
128#endif
129 }
130
135 template <size_t LaneIndex>
137#ifdef ARGON_PLATFORM_MVE
138 return simd::duplicate(lane.Get());
139#else
140 if constexpr (simd::is_quadword_v<VectorType>) {
141 return simd::duplicate_lane_quad<LaneIndex>(lane.vec());
142 } else {
143 return simd::duplicate_lane<LaneIndex>(lane.vec());
144 }
145#endif
146 }
147
154 ace static argon_type Iota(scalar_type start) {
155 // TODO: Remove this once MSVC 19.44 is released.
156#if __cpp_if_consteval >= 202106L
157 return IotaHelper(start, std::make_index_sequence<lanes>{});
158#else
159 return Argon{start}.Add(VectorType{0, 1, 2, 3});
160#endif
161 }
162
167 template <typename FuncType>
168 requires std::convertible_to<FuncType, std::function<scalar_type()>>
169 ace static argon_type Generate(FuncType body) {
170 VectorType out;
171 utility::constexpr_for<0, lanes, 1>([&](size_t i) { //
172 out[i] = body();
173 });
174 return out;
175 }
176
182 template <typename FuncType>
183 requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type)>>
184 ace static argon_type GenerateWithIndex(FuncType body) {
185 VectorType out;
186 utility::constexpr_for<0, lanes, 1>([&]<size_t i>() { //
187 out[i] = body(i);
188 });
189 return out;
190 }
191
193 ace argon_type operator-() const { return Negate(); }
194
196 ace argon_type operator+(argon_type b) const { return Add(b); }
197
199 ace argon_type operator-(argon_type b) const { return Subtract(b); }
200
202 ace argon_type operator*(argon_type b) const { return Multiply(b); }
203
205 ace argon_type operator/(argon_type b) const { return Divide(b); }
206
208 ace argon_bool_type operator==(argon_type b) const { return Equal(b); }
209
211 ace argon_bool_type operator!=(argon_type b) const { return ~Equal(b); }
212
214 ace argon_bool_type operator<(argon_type b) const { return LessThan(b); }
215
218
221
224
226 ace argon_type operator++() const { return Add(1); }
227
229 ace argon_type operator--() const { return Subtract(1); }
230
232 ace argon_type operator&(argon_type b) const { return BitwiseAnd(b); }
233
235 ace argon_type operator|(argon_type b) const { return BitwiseOr(b); }
236
238 ace argon_type operator^(argon_type b) const { return BitwiseXor(b); }
239
241 ace argon_type operator~() const { return BitwiseNot(); }
242
244 ace Lane<const VectorType> operator[](const size_t i) const { return GetLane(i); }
245
247 ace lane_type operator[](const size_t i) { return GetLane(i); }
248
250 ace argon_type operator>>(const int i) const {
251#if ARGON_USE_COMPILER_EXTENSIONS
252 return vec_ >> i;
253#else
254 return ShiftRight(i);
255#endif
256 }
257
259 ace argon_type operator<<(const int i) const {
260#if ARGON_USE_COMPILER_EXTENSIONS
261 return vec_ << i;
262#else
263 return ShiftLeft(i);
264#endif
265 }
266
268 [[gnu::always_inline]] constexpr VectorType vec() const { return vec_; }
269
271 [[gnu::always_inline]] constexpr operator VectorType() const { return vec_; }
272
275 ace std::array<scalar_type, lanes> to_array() {
276 std::array<scalar_type, lanes> out;
277 simd::store1(out.data(), vec_);
278 return out;
279 }
280
285 ace const lane_type GetLane(const size_t i) const {
286#ifdef ARGON_PLATFORM_MVE
287 return vec_[i];
288#else
289 return {vec_, static_cast<int>(i)};
290#endif
291 }
292 ace lane_type GetLane(const size_t i) {
293#ifdef ARGON_PLATFORM_MVE
294 return vec_[i];
295#else
296 return {vec_, static_cast<int>(i)};
297#endif
298 }
299
303 ace const lane_type GetLane(const int i) const {
304#ifdef ARGON_PLATFORM_MVE
305 return vec_[i];
306#else
307 return {vec_, i};
308#endif
309 }
310
311 ace lane_type GetLane(const int i) {
312#ifdef ARGON_PLATFORM_MVE
313 return vec_[i];
314#else
315 return {vec_, i};
316#endif
317 }
318
322 template <size_t LaneIndex>
324#ifdef ARGON_PLATFORM_MVE
325 return vec_[LaneIndex];
326#else
327 return vec_;
328#endif
329 }
330
331 template <size_t LaneIndex>
333#ifdef ARGON_PLATFORM_MVE
334 return vec_[LaneIndex];
335#else
336 return vec_;
337#endif
338 }
339
341 ace const_lane_type<lanes - 1> LastLane() { return vec_; }
342
344 ace argon_type ShiftRight(const int i) const { return simd::shift_right(vec_, i); }
345
347 ace argon_type ShiftLeft(const int i) const { return simd::shift_left(vec_, i); }
348
350 ace argon_type Negate() const {
351 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
352 return -vec_;
353 } else {
354 return simd::negate(vec_);
355 }
356 }
357
359 ace argon_type Add(argon_type b) const {
360 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
361 return vec_ + b.vec_;
362 } else {
363 return simd::add(vec_, b);
364 }
365 }
366
369 ace argon_type AddHalve(argon_type b) const { return simd::add_halve(vec_, b); }
370
373 ace argon_type AddHalveRound(argon_type b) const { return simd::add_halve_round(vec_, b); }
374
378 ace argon_type AddSaturate(argon_type b) const { return simd::add_saturate(vec_, b); }
379
382 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
383 return vec_ - b.vec_;
384 } else {
385 return simd::subtract(vec_, b);
386 }
387 }
388
391 ace argon_type SubtractHalve(argon_type b) const { return simd::subtract_halve(vec_, b); }
392
394 ace argon_type SubtractSaturate(argon_type b) const { return simd::subtract_saturate(vec_, b); }
395
398 ace argon_type SubtractAbs(argon_type b) const { return simd::subtract_absolute(vec_, b); }
399
403#ifdef ARGON_PLATFORM_MVE
404 return mve::add(vec_, mve::subtract_absolute(b, c));
405#else
406 return neon::subtract_absolute_add(vec_, b, c);
407#endif
408 }
409
412 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
413 return vec_ * b.vec_;
414 } else {
415 return simd::multiply(vec_, b);
416 }
417 }
418
421 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
422 return vec_ * b;
423 } else {
424 return simd::multiply(vec_, b);
425 }
426 }
427
428#ifndef ARGON_PLATFORM_MVE
430 ace argon_type Multiply(lane_type b) const { return neon::multiply_lane(vec_, b.vec(), b.lane()); }
431
433 template <size_t LaneIndex>
435 return neon::multiply_lane(vec_, b.vec(), b.lane());
436 }
437#endif
438
442 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
443 return vec_ + b.vec_ * c.vec_;
444 } else {
445 return simd::multiply_add(vec_, b, c);
446 }
447 }
448
452 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
453 return vec_ + b.vec_ * c;
454 } else {
455 return simd::multiply_add(vec_, b, c);
456 }
457 }
458
461 ace argon_type MultiplyAdd(scalar_type b, argon_type c) const { return MultiplyAdd(c, b); }
462
463#ifndef ARGON_PLATFORM_MVE
467 return simd::multiply_add_lane(vec_, b.vec(), c.vec(), c.lane());
468 }
469
472 ace argon_type MultiplyAdd(lane_type b, argon_type c) const { return MultiplyAdd(c, b); }
473
476 template <size_t LaneIndex>
478 return simd::multiply_add_lane(vec_, b.vec(), c.vec(), c.lane());
479 }
480
482 template <size_t LaneIndex>
484 return MultiplyAdd(c, b);
485 }
486#endif
487
491#if ARGON_USE_COMPILER_EXTENSIONS
492 return vec_ - b.vec_ * c.vec_;
493#else
494 return simd::multiply_subtract(vec_, b, c);
495#endif
496 }
497
501#if ARGON_USE_COMPILER_EXTENSIONS
502 return vec_ - b.vec_ * c;
503#else
504 return simd::multiply_subtract(vec_, b, c);
505#endif
506 }
507
511
512#ifndef ARGON_PLATFORM_MVE
516 return simd::multiply_subtract_lane(vec_, b.vec(), c.vec(), c.lane());
517 }
518#endif
519
522 ace argon_type MultiplyFixedQMax(argon_type v) const { return simd::multiply_double_saturate_high(vec_, v); }
523
526 ace argon_type MultiplyFixedQMax(scalar_type s) const { return simd::multiply_double_saturate_high(vec_, s); }
527
528#ifndef ARGON_PLATFORM_MVE
532 return simd::multiply_double_saturate_high_lane(vec_, l.vec(), l.lane());
533 }
534#endif
535
539 return simd::multiply_double_round_saturate_high(vec_, v);
540 }
541
545 return simd::multiply_double_round_saturate_high(vec_, s);
546 }
547
548#ifndef ARGON_PLATFORM_MVE
552 return simd::multiply_double_round_saturate_high_lane(vec_, l.vec(), l.lane());
553 }
554#endif
555
557 ace argon_type Absolute() const { return simd::abs(vec_); }
558
562 requires std::floating_point<scalar_type> || std::is_same_v<scalar_type, uint32_t>
563 {
564#ifdef ARGON_PLATFORM_MVE
565 if constexpr (std::is_same_v<scalar_type, uint32_t>) {
566 std::numeric_limits<uint32_t>::max() / vec_;
567 } else {
568 return 1.f / vec_;
569 }
570#else
571 return simd::reciprocal_estimate(vec_);
572#endif
573 }
574
577 template <typename arg_type>
578 requires(is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> ||
579 std::is_convertible_v<arg_type, scalar_type>)
580 ace argon_type MultiplyAddFixedQMax(argon_type b, arg_type c) const {
581 return Add(b.MultiplyFixedQMax(c));
582 }
583
586 template <typename arg_type>
587 requires(is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> ||
588 std::is_convertible_v<arg_type, scalar_type>)
590 return Add(b.MultiplyRoundFixedQMax(c));
591 }
592
593#ifdef __aarch64__
595 ace argon_type Divide(argon_type b) const {
596 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
597 return vec_ / b.vec_;
598 } else {
599 return simd::divide(vec_, b);
600 }
601 }
602#else
605 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
606 return vec_ / b.vec_;
607 } else {
608 return this->map2(b, [](scalar_type lane1, scalar_type lane2) { return lane1 / lane2; });
609 }
610 }
611#endif
612
616 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
617 return vec_ % b.vec_;
618 } else if constexpr (std::floating_point<scalar_type>) {
619 return this->map2(b, [](scalar_type lane1, scalar_type lane2) { return std::fmod(lane1, lane2); });
620 } else {
621 return this->map2(b, [](scalar_type lane1, scalar_type lane2) { return lane1 % lane2; });
622 }
623 }
624
628 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
629 return vec_ % b;
630 } else {
631 return this->map([b](scalar_type lane1) { return std::fmod(lane1, b); });
632 }
633 }
634
637 ace argon_type Max(argon_type b) const {
638 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
639 return vec_ > b.vec_ ? vec_ : b.vec_;
640 } else {
641 return simd::max(vec_, b);
642 }
643 }
644
646 ace argon_type Min(argon_type b) const {
647 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
648 return vec_ < b.vec_ ? vec_ : b.vec_;
649 } else {
650 return simd::min(vec_, b);
651 }
652 }
653
656 ace argon_bool_type Equal(argon_type b) const { return simd::equal(vec_, b); }
657
660 ace argon_bool_type GreaterThanOrEqual(argon_type b) const { return simd::greater_than_or_equal(vec_, b); }
661
664 ace argon_bool_type LessThanOrEqual(argon_type b) const { return simd::less_than_or_equal(vec_, b); }
665
668 ace argon_bool_type GreaterThan(argon_type b) const { return simd::greater_than(vec_, b); }
669
672 ace argon_bool_type LessThan(argon_type b) const { return simd::less_than(vec_, b); }
673
676 ace argon_type ShiftLeft(helpers::ArgonFor_t<simd::make_signed_t<Bool_t<VectorType>>> b) const
677 requires std::is_integral_v<scalar_type>
678 {
679 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
680 return vec_ << b.vec_;
681 } else {
682 return simd::shift_left(vec_, b.vec_);
683 }
684 }
685
688 ace argon_type ShiftLeft(std::make_signed_t<simd::Scalar_t<Bool_t<VectorType>>> n) const
689 requires std::is_integral_v<scalar_type>
690 {
691 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
692 return vec_ << n;
693 } else {
695 return simd::shift_left(vec_, b.vec_);
696 }
697 }
698
701 template <int n>
702 ace argon_type ShiftLeft() const {
703 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
704 return vec_ << n;
705 } else {
706 return simd::shift_left<n>(vec_);
707 }
708 }
709
711 ace argon_type ShiftLeftSaturate(helpers::ArgonFor_t<simd::make_signed_t<Bool_t<VectorType>>> b) const
712 requires(std::is_integral_v<scalar_type>)
713 {
714 return simd::shift_left_saturate(vec_, b);
715 }
716
718 ace argon_type ShiftLeftRound(argon_type b) const { return simd::shift_left_round(vec_, b); }
719
721 ace argon_type ShiftLeftRoundSaturate(argon_type b) const { return simd::shift_left_round_saturate(vec_, b); }
722
724 template <int n>
726 return simd::shift_left_saturate<n>(vec_);
727 }
728
734 template <int n>
736 return simd::shift_left_insert<n>(vec_, b);
737 }
738
740 template <int n>
741 ace argon_type ShiftRight() const {
742 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
743 return vec_ >> n;
744 } else {
745 return simd::shift_right<n>(vec_);
746 }
747 }
748
750 template <int n>
752 return simd::shift_right_round<n>(vec_);
753 }
754
758 template <int n>
760#ifdef ARGON_PLATFORM_MVE
761 return vec_ + (b >> n);
762#else
763 return simd::shift_right_accumulate<n>(vec_, b);
764#endif
765 }
766
770 template <int n>
772#ifdef ARGON_PLATFORM_MVE
773 return vec_ + mve::shift_right_round<n>(b);
774#else
775 return simd::shift_right_accumulate_round<n>(vec_, b);
776#endif
777 }
778
782 template <int n>
784 return simd::shift_right_insert<n>(vec_, b);
785 }
786
788 ace static argon_type Load(const scalar_type* ptr) {
789#ifdef ARGON_PLATFORM_MVE
790 return mve::load1(ptr);
791#else
792 return neon::load1<VectorType>(ptr);
793#endif
794 }
795
797 ace static argon_type LoadCopy(const scalar_type* ptr) {
798#ifdef ARGON_PLATFORM_MVE
799 scalar_type val = *ptr;
800 VectorType vec;
801 utility::constexpr_for<0, lanes, 1>([val, &vec]<int i>() { vec[i] = val; });
802#else
803 return simd::load1_duplicate<VectorType>(ptr);
804#endif
805 }
806
813 const scalar_type* base,
814 helpers::ArgonFor_t<simd::make_unsigned_t<Bool_t<VectorType>>> offset_vector) {
815#ifdef ARGON_PLATFORM_MVE
816 static_assert(
817 sizeof(scalar_type) == 1 || sizeof(scalar_type) == 2 || sizeof(scalar_type) == 4 || sizeof(scalar_type) == 8,
818 "Unsupported size for gather load");
819
820 if constexpr (sizeof(scalar_type) == 1) {
821 return mve::load_byte_gather_offset(base, offset_vector);
822 } else if constexpr (sizeof(scalar_type) == 2) {
823 return mve::load_halfword_gather_offset(base, offset_vector);
824 } else if constexpr (sizeof(scalar_type) == 4) {
825 return mve::load_word_gather_offset(base, offset_vector);
826 } else if constexpr (sizeof(scalar_type) == 8) {
827 return mve::load_doubleword_gather_offset(base, offset_vector);
828 }
829#else
830 argon_type destination;
831 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
832 auto lane_val = neon::get_lane<i>(offset_vector);
833 destination = destination.template LoadToLane<i>(base + (lane_val * sizeof(scalar_type)));
834 });
835 return destination;
836#endif
837 }
838
845 const scalar_type* base,
846 helpers::ArgonFor_t<simd::make_unsigned_t<Bool_t<VectorType>>> offset_vector) {
847#ifdef ARGON_PLATFORM_MVE
848 static_assert(
849 sizeof(scalar_type) == 1 || sizeof(scalar_type) == 2 || sizeof(scalar_type) == 4 || sizeof(scalar_type) == 8,
850 "Unsupported size for gather load");
851
852 if constexpr (sizeof(scalar_type) == 1) {
853 return mve::load_byte_gather_offset(base, offset_vector);
854 } else if constexpr (sizeof(scalar_type) == 2) {
855 return mve::load_halfword_gather_offset(base, offset_vector * sizeof(scalar_type));
856 } else if constexpr (sizeof(scalar_type) == 4) {
857 return mve::load_word_gather_offset(base, offset_vector * sizeof(scalar_type));
858 } else if constexpr (sizeof(scalar_type) == 8) {
859 return mve::load_doubleword_gather_offset(base, offset_vector * sizeof(scalar_type));
860 }
861#else
862 argon_type destination;
863 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
864 auto lane_val = neon::get_lane<i>(offset_vector);
865 destination = destination.template LoadToLane<i>(base + lane_val);
866 });
867 return destination;
868#endif
869 }
870
874 template <size_t lane>
876 argon_type new_argon = *this;
877 return new_argon.template GetLane<lane>().Load(ptr);
878 }
879
886 template <size_t stride>
887 ace static std::array<argon_type, stride> LoadInterleaved(const scalar_type* ptr) {
888#ifdef ARGON_PLATFORM_MVE
889 static_assert(stride == 2 || stride == 4,
890 "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
891 if constexpr (stride == 2) {
892 return argon::to_array(mve::load2(ptr).val);
893 } else if constexpr (stride == 4) {
894 return argon::to_array(mve::load4(ptr).val);
895 }
896#else
897 static_assert(stride > 1 && stride < 5, "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
898 using multivec_type = simd::MultiVector_t<VectorType, stride>;
899 if constexpr (stride == 2) {
900 return argon::to_array(neon::load2<multivec_type>(ptr).val);
901 } else if constexpr (stride == 3) {
902 return argon::to_array(neon::load3<multivec_type>(ptr).val);
903 } else if constexpr (stride == 4) {
904 return argon::to_array(neon::load4<multivec_type>(ptr).val);
905 }
906#endif
907 }
908
914 template <size_t stride>
915 ace static std::array<argon_type, stride> LoadCopyInterleaved(const scalar_type* ptr) {
916#ifdef ARGON_PLATFORM_MVE
917 static_assert(stride == 2 || stride == 4,
918 "De-interleaving LoadCopy can only be performed with a stride of 2, 3, or 4");
919 if constexpr (stride == 2) {
920 return {mve::duplicate(*ptr++), mve::duplicate(*ptr++)};
921 } else if constexpr (stride == 4) {
922 return {mve::duplicate(*ptr++), mve::duplicate(*ptr++), mve::duplicate(*ptr++), mve::duplicate(*ptr)};
923 }
924#else
925 static_assert(stride > 1 && stride < 5,
926 "De-interleaving LoadCopy can only be performed with a stride of 2, 3, or 4");
927 using multivec_type = simd::MultiVector<VectorType, stride>::type;
928 if constexpr (stride == 2) {
929 return argon::to_array(simd::load2_duplicate<multivec_type>(ptr).val);
930 } else if constexpr (stride == 3) {
931 return argon::to_array(simd::load3_duplicate<multivec_type>(ptr).val);
932 } else if constexpr (stride == 4) {
933 return argon::to_array(simd::load4_duplicate<multivec_type>(ptr).val);
934 }
935#endif
936 }
937
944 template <size_t LaneIndex, size_t Stride>
945 ace static std::array<argon_type, Stride> LoadToLaneInterleaved(simd::MultiVector_t<VectorType, Stride> multi,
946 const scalar_type* ptr) {
947 static_assert(Stride > 1 && Stride < 5, "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
948#ifdef ARGON_PLATFORM_MVE
949 auto out = multi;
950 utility::constexpr_for<0, Stride, 1>([&]<int i>() { //<
951 out.val[i][LaneIndex] = ptr[i];
952 });
953 return argon::to_array(out.val);
954#else
955 if constexpr (Stride == 2) {
956 if constexpr (simd::is_quadword_v<VectorType>) {
957 return argon::to_array(simd::load2_lane_quad<LaneIndex>(ptr, multi).val);
958 } else {
959 return argon::to_array(simd::load2_lane<LaneIndex>(ptr, multi).val);
960 }
961 } else if constexpr (Stride == 3) {
962 if constexpr (simd::is_quadword_v<VectorType>) {
963 return argon::to_array(simd::load3_lane_quad<LaneIndex>(ptr, multi).val);
964 } else {
965 return argon::to_array(simd::load3_lane<LaneIndex>(ptr, multi).val);
966 }
967 } else if constexpr (Stride == 4) {
968 if constexpr (simd::is_quadword_v<VectorType>) {
969 return argon::to_array(simd::load4_lane_quad<LaneIndex>(ptr, multi).val);
970 } else {
971 return argon::to_array(simd::load4_lane<LaneIndex>(ptr, multi).val);
972 }
973 }
974#endif
975 }
976
978 template <size_t lane, size_t stride>
979 ace static std::array<argon_type, stride> LoadToLaneInterleaved(std::array<argon_type, stride> multi,
980 const scalar_type* ptr) {
981 using multivec_type = simd::MultiVector_t<VectorType, stride>;
982 return LoadToLaneInterleaved<lane, stride>(*(multivec_type*)multi.data(), ptr);
983 }
984
995 template <size_t stride>
996 ace static std::array<argon_type, stride> LoadGatherOffsetIndexInterleaved(
997 const scalar_type* base_ptr,
998 helpers::ArgonFor_t<simd::make_unsigned_t<Bool_t<VectorType>>> offset_vector) {
999 static_assert(stride > 1 && stride < 5, "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
1000 std::array<argon_type, stride> multi{};
1001 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1002 auto lane_val = simd::get_lane<i>(offset_vector);
1003 multi = LoadToLaneInterleaved<i, stride>(multi, base_ptr + (lane_val * stride));
1004 });
1005 return multi;
1006 }
1007
1015 template <size_t n>
1016 ace static std::array<argon_type, n> LoadMulti(const scalar_type* ptr) {
1017 static_assert(n > 1 && n < 5, "LoadMulti can only be performed with a size of 2, 3, or 4");
1018#ifdef ARGON_PLATFORM_MVE
1019 std::array<argon_type, n> multi{};
1020 utility::constexpr_for<0, n, 1>([&]<int i>() { //<
1021 multi[i] = *ptr;
1022 ptr += lanes;
1023 });
1024 return multi;
1025#else
1026#if defined(__clang__) || (__GNUC__ > 13)
1027 using multi_type = simd::MultiVector_t<VectorType, n>;
1028 if constexpr (n == 2) {
1029 return argon::to_array(simd::load1_x2<multi_type>(ptr).val);
1030 } else if constexpr (n == 3) {
1031 return argon::to_array(simd::load1_x3<multi_type>(ptr).val);
1032 } else if constexpr (n == 4) {
1033 return argon::to_array(simd::load1_x4<multi_type>(ptr).val);
1034 }
1035#else
1036 if constexpr (n == 2) {
1037 auto a = simd::load1(ptr);
1038 auto b = simd::load1(ptr + lanes);
1039 return {a, b};
1040 } else if constexpr (n == 3) {
1041 auto a = simd::load1(ptr);
1042 auto b = simd::load1(ptr + lanes);
1043 auto c = simd::load1(ptr + 2 * lanes);
1044 return {a, b, c};
1045 } else if constexpr (n == 4) {
1046 auto a = simd::load1(ptr);
1047 auto b = simd::load1(ptr + lanes);
1048 auto c = simd::load1(ptr + 2 * lanes);
1049 auto d = simd::load1(ptr + 3 * lanes);
1050 return {a, b, c, d};
1051 }
1052#endif
1053#endif
1054 }
1055
1058 ace void StoreTo(scalar_type* ptr) const { simd::store1(ptr, vec_); }
1059
1063 template <int LaneIndex>
1064 ace void StoreLaneTo(scalar_type* ptr) {
1065#ifdef ARGON_PLATFORM_MVE
1066 *ptr = vec_[LaneIndex];
1067#else
1068 simd::store1_lane<LaneIndex>(ptr, vec_);
1069#endif
1070 }
1071
1072#ifndef ARGON_PLATFORM_MVE
1074
1078 ace argon_type PairwiseAdd(argon_type b) const { return simd::pairwise_add(vec_, b); }
1079
1083 ace argon_type PairwiseMax(argon_type b) const { return simd::pairwise_max(vec_, b); }
1084
1088 ace argon_type PairwiseMin(argon_type b) const { return simd::pairwise_min(vec_, b); }
1089#endif
1090
1092
1095 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1096 return ~vec_;
1097 } else {
1098 return simd::bitwise_not(vec_);
1099 }
1100 }
1101
1104 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1105 return vec_ & b.vec_;
1106 } else {
1107 return simd::bitwise_and(vec_, b);
1108 }
1109 }
1110
1113 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1114 return vec_ | b.vec_;
1115 } else {
1116 return simd::bitwise_or(vec_, b);
1117 }
1118 }
1119
1122 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1123 return vec_ ^ b.vec_;
1124 } else {
1125 return simd::bitwise_xor(vec_, b);
1126 }
1127 }
1128
1132 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1133 return vec_ | ~b.vec_;
1134 } else {
1135 return simd::bitwise_or_not(vec_, b);
1136 }
1137 }
1138
1142 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1143 return vec_ & ~b.vec_;
1144 } else {
1145 return simd::bitwise_clear(vec_, b);
1146 }
1147 }
1148
1151
1152#ifndef ARGON_PLATFORM_MVE
1155 template <typename ArgType>
1156 requires std::is_unsigned_v<scalar_type>
1157 ace argon_type BitwiseSelect(ArgType true_value, ArgType false_value) const {
1158 return simd::bitwise_select(vec_, true_value, false_value);
1159 }
1160
1162 template <typename ArgType>
1163 requires std::is_unsigned_v<scalar_type>
1164 ace argon_type Select(ArgType true_value, ArgType false_value) const {
1165 return simd::bitwise_select(true_value, false_value);
1166 }
1167
1170 ace predicate_type CompareTestNonzero(argon_type b) const { return simd::compare_test_nonzero(vec_, b); }
1171
1173 ace predicate_type TestNonzero() const { return simd::compare_test_nonzero(vec_, argon_type{1}); }
1174#endif
1175
1179 requires(std::is_integral_v<scalar_type>)
1180 {
1181 return simd::count_leading_sign_bits(vec_);
1182 }
1183
1186 ace argon_type CountLeadingZeroBits() const { return simd::count_leading_zero_bits(vec_); }
1187
1191#ifdef ARGON_PLATFORM_MVE
1192 auto new_vec = vec_;
1193 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1194 new_vec[i] = std::popcount(vec_[i]);
1195 });
1196 return new_vec;
1197#else
1198 return neon::count_active_bits(vec_);
1199#endif
1200 }
1201
1203 ace argon_type Popcount() const { return CountActiveBits(); }
1204
1208 template <int n>
1210#ifdef ARGON_PLATFORM_MVE
1211 auto new_vec = vec_;
1212 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1213 if (i < n) {
1214 new_vec[i] = b.vec_[i];
1215 }
1216 });
1217 return new_vec;
1218#else
1219 return simd::extract<n>(vec_, b);
1220#endif
1221 }
1222
1223 ace argon_type Reverse64bit() const { return simd::reverse_64bit(vec_); }
1224 ace argon_type Reverse32bit() const { return simd::reverse_32bit(vec_); }
1225 ace argon_type Reverse16bit() const { return simd::reverse_16bit(vec_); }
1226
1230 ace std::array<argon_type, 2> ZipWith(argon_type b) const {
1231#ifdef ARGON_PLATFORM_MVE
1232 std::array<argon_type, 2> new_vec;
1233 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1234 if (i % 2 == 0) {
1235 new_vec[0][i] = vec_[i / 2];
1236 new_vec[1][i] = vec_[(i + lanes) / 2];
1237 } else {
1238 new_vec[0][i] = b.vec_[i / 2];
1239 new_vec[1][i] = b.vec_[(i + lanes) / 2];
1240 }
1241 });
1242 return new_vec;
1243#else
1244 return argon::to_array(neon::zip(vec_, b.vec()).val);
1245#endif
1246 }
1247
1251 std::array<argon_type, 2> UnzipWith(argon_type b) {
1252#ifdef ARGON_PLATFORM_MVE
1253 std::array<argon_type, 2> new_vec;
1254 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1255 if ((i * 2) < lanes) {
1256 new_vec[0][i] = vec_[i * 2];
1257 new_vec[1][i] = vec_[i * 2 + 1];
1258 } else {
1259 new_vec[0][i] = b.vec_[i * 2];
1260 new_vec[1][i] = b.vec_[i * 2 + 1];
1261 }
1262 });
1263 return new_vec;
1264#else
1265 return argon::to_array(neon::unzip(vec_, b.vec()).val);
1266#endif
1267 }
1268
1271 // {b0, b1, b2, b3}}
1273 // {a1, b1, a3, b3}}
1274 std::array<argon_type, 2> TransposeWith(argon_type b) const {
1275#ifdef ARGON_PLATFORM_MVE
1276 std::array<argon_type, 2> new_vec;
1277 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1278 if (i % 2 == 1) {
1279 new_vec[0][i] = vec_[i];
1280 new_vec[1][i] = vec_[i + 1];
1281 } else {
1282 new_vec[0][i] = b.vec_[i + 1];
1283 new_vec[1][i] = b.vec_[i];
1284 }
1285 });
1286 return new_vec;
1287#else
1288 return argon::to_array(simd::transpose(vec_, b.vec()).val);
1289#endif
1290 }
1291
1293 ace static int size() { return lanes; }
1294
1295 template <typename FuncType>
1296 requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type)>>
1297 ace argon_type map(FuncType body) const {
1298 VectorType out;
1299 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1300 out[i] = body(vec_[i]);
1301 });
1302 return out;
1303 }
1304
1305 template <typename FuncType>
1306 requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type, int)>>
1307 ace argon_type map_with_index(FuncType body) const {
1308 VectorType out;
1309 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1310 out[i] = body(vec_[i], i);
1311 });
1312 return out;
1313 }
1314
1315 template <typename FuncType>
1316 requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type, scalar_type)>>
1317 ace argon_type map2(argon_type other, FuncType body) const {
1318 VectorType out;
1319 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1320 out[i] = body(vec_[i], other.vec_[i]);
1321 });
1322 return out;
1323 }
1324
1325 template <typename FuncType>
1326 requires std::convertible_to<FuncType, std::function<void(scalar_type&)>>
1327 ace argon_type each_lane(FuncType body) {
1328 VectorType out = vec_;
1329 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1330 body(out[i]);
1331 });
1332 return out;
1333 }
1334
1335 template <typename FuncType>
1336 requires std::convertible_to<FuncType, std::function<void(scalar_type&, int)>>
1337 ace argon_type each_lane_with_index(FuncType body) {
1338 VectorType out = vec_;
1339 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1340 body(out[i], i);
1341 });
1342 return out;
1343 }
1344
1345 template <typename FuncType>
1346 requires std::convertible_to<FuncType, std::function<void()>>
1347 ace void if_lane(FuncType true_branch) {
1348 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1349 if (vec_[i] != 0) {
1350 true_branch();
1351 }
1352 });
1353 }
1354
1355 template <typename FuncType>
1356 requires std::convertible_to<FuncType, std::function<void()>>
1357 ace void if_else_lane(FuncType true_branch, FuncType false_branch) {
1358 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1359 if (vec_[i] != 0) {
1360 true_branch();
1361 } else {
1362 false_branch();
1363 }
1364 });
1365 }
1366
1367 template <typename FuncType>
1368 requires std::convertible_to<FuncType, std::function<void(int)>>
1369 ace void if_lane_with_index(FuncType true_branch) {
1370 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1371 if (vec_[i] != 0) {
1372 true_branch(i);
1373 }
1374 });
1375 }
1376
1377 template <typename FuncType1, typename FuncType2>
1378 requires std::convertible_to<FuncType1, std::function<void(int)>> &&
1379 std::convertible_to<FuncType2, std::function<void(int)>>
1380 ace void if_else_lane_with_index(FuncType1 true_branch, FuncType2 false_branch) {
1381 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1382 if (vec_[i] != 0) {
1383 true_branch(i);
1384 } else {
1385 false_branch(i);
1386 }
1387 });
1388 }
1389
1390 ace bool any() {
1391 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1392 if (vec_[i]) {
1393 return true;
1394 }
1395 });
1396 return false;
1397 }
1398
1399 ace bool all() {
1400#ifdef ARGON_PLATFORM_MVE
1401 return mve::max_reduce_max(vec_, vec_) != 0;
1402#else
1403 auto nonzero = TestNonzero();
1404 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1405 if (nonzero[i] == 0) {
1406 return false;
1407 }
1408 });
1409 return true;
1410#endif
1411 }
1412
1413 template <std::size_t Index>
1414 std::tuple_element_t<Index, argon_type> get() {
1415#ifdef ARGON_PLATFORM_MVE
1416 return vec_[Index];
1417#else
1418 return GetLane<Index>();
1419#endif
1420 }
1421
1422 protected:
1423 template <std::size_t... Ints>
1424 ace static argon_type IotaHelper(scalar_type start, std::index_sequence<Ints...>) {
1425 return VectorType{static_cast<scalar_type>(start + Ints)...};
1426 }
1427
1428 VectorType vec_;
1429};
1430
1431} // namespace argon
1432
1436namespace std {
1437template <typename T>
1438struct tuple_size<argon::Vector<T>> {
1439 static constexpr size_t value = argon::Vector<T>::lanes;
1440};
1441
1442template <size_t Index, typename T>
1443struct tuple_element<Index, argon::Vector<T>> {
1444 static_assert(Index < argon::Vector<T>::lanes);
1446};
1447} // namespace std
1448
1449#undef ace
1450#undef simd
Provides utility templates and concepts for type traits and compile-time iteration.
Definition argon_full.hpp:24
Represents a single lane of a SIMD vector, where the lane's index is known at compile time.
Definition lane.hpp:44
Represents a single lane of a SIMD vector.
Definition lane.hpp:102
ace argon_type PairwiseMin(argon_type b) const
Select the maximum of each pair of lanes in the two vectors.
Definition vector.hpp:1088
Lane< simd::Vec128_t< ScalarType > > lane_type
Definition vector.hpp:54
ace argon_type MultiplySubtract(argon_type b, lane_type c) const
Multiply a vector by a lane value and subtract from a third vector.
Definition vector.hpp:515
ace Vector(VectorType vector)
Constructs a Vector from a SIMD vector type.
Definition vector.hpp:74
simd::Vec128_t< ScalarType > vector_type
Definition vector.hpp:56
ace argon_type Modulo(argon_type b) const
Get the modulo of two vectors.
Definition vector.hpp:615
ace argon_bool_type operator<(argon_type b) const
Compare two vectors, checking if this vector is less than the other.
Definition vector.hpp:214
ace argon_type ShiftLeftRound(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, rounding the result.
Definition vector.hpp:718
static ace argon_type GenerateWithIndex(FuncType body)
Constructs a Vector from a function that generates values with an index.
Definition vector.hpp:184
std::array< argon_type, 2 > TransposeWith(argon_type b) const
Perform a 2x2 matrix transpose on two vectors, returning two vectors of pairs.
Definition vector.hpp:1274
constexpr Vector(const Vector &other)=default
Copy constructor for the Vector class.
static ace std::array< argon_type, stride > LoadInterleaved(const scalar_type *ptr)
Load multiple vectors from a pointer, de-interleaving.
Definition vector.hpp:887
static ace argon_type Iota(scalar_type start)
Constructs a Vector from an incrementing sequence.
Definition vector.hpp:154
static ace argon_type FromScalar(scalar_type scalar)
Definition vector.hpp:110
ace argon_type Multiply(scalar_type b) const
Multiply a vector by a scalar value.
Definition vector.hpp:420
ace argon_type MultiplySubtract(argon_type b, argon_type c) const
Multiply two vectors and subtract from a third vector.
Definition vector.hpp:490
ace argon_type Extract(argon_type b) const
Extract n elements from the lower end of the operand, and the remaining elements from the top end of ...
Definition vector.hpp:1209
constexpr VectorType vec() const
Get the underlying SIMD vector.
Definition vector.hpp:268
ace argon_type ReciprocalEstimate() const
1 / value, using an estimate for speed
Definition vector.hpp:561
ace argon_type Multiply(argon_type b) const
Multiply two vectors.
Definition vector.hpp:411
ace argon_bool_type LessThanOrEqual(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than or equal to ...
Definition vector.hpp:664
ace argon_type operator--() const
Decrement the vector by 1 and return the result.
Definition vector.hpp:229
ace argon_type ShiftLeft(const int i) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:347
static ace argon_type Load(const scalar_type *ptr)
Load a vector from a pointer.
Definition vector.hpp:788
ace argon_type operator+(argon_type b) const
Add a vector and return the result.
Definition vector.hpp:196
ace argon_type ShiftLeft(helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const
Shift the elemnets of the vector to the left by a specified number of bits.
Definition vector.hpp:676
ace argon_type Multiply(const_lane_type< LaneIndex > b) const
Multiply a vector by a lane value.
Definition vector.hpp:434
ace argon_type PairwiseAdd(argon_type b) const
Pairwise ops.
Definition vector.hpp:1078
ace argon_type MultiplyAdd(scalar_type b, argon_type c) const
Multiply a vector by a scalar value and add a third vector.
Definition vector.hpp:461
ace argon_bool_type operator>=(argon_type b) const
Compare two vectors, checking if this vector is greater than or equal to the other.
Definition vector.hpp:223
ace argon_type MultiplyAdd(argon_type b, lane_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:466
ace argon_bool_type operator==(argon_type b) const
Compare two vectors for equality.
Definition vector.hpp:208
ace argon_type MultiplyAdd(argon_type b, argon_type c) const
Multiply two vectors and add a third vector.
Definition vector.hpp:441
ace argon_type MultiplyRoundAddFixedQMax(argon_type b, arg_type c) const
Multiply-round-add three fixed-point vectors, returning a fixed-point sum.
Definition vector.hpp:589
ace argon_type ShiftLeft(std::make_signed_t< simd::Scalar_t< Bool_t< VectorType > > > n) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:688
ace argon_type BitwiseOrNot(argon_type b) const
Bitwise OR of the vector with the NOT of another vector.
Definition vector.hpp:1131
ace helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > CountLeadingSignBits() const
Count the number of consecutive bits following the sign bit that are set to the same value as the sig...
Definition vector.hpp:1178
static ace argon_type FromLane(argon::ConstLane< LaneIndex, VectorType > lane)
Constructs a Vector from a ConstLane object.
Definition vector.hpp:136
ace argon_type AddSaturate(argon_type b) const
Adds two vectors, saturating the result.
Definition vector.hpp:378
std::array< argon_type, 2 > UnzipWith(argon_type b)
Unzip two vectors, returning two vectors of pairs.
Definition vector.hpp:1251
ace argon_type operator++() const
Increment the vector by 1 and return the result.
Definition vector.hpp:226
ace argon_type operator^(argon_type b) const
Bitwise XOR two vectors and return the result.
Definition vector.hpp:238
ace argon_bool_type Equal(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if are equal.
Definition vector.hpp:656
ace void StoreTo(scalar_type *ptr) const
Store the vector to a pointer.
Definition vector.hpp:1058
ConstLane< LaneIndex, simd::Vec128_t< ScalarType > > const_lane_type
Definition vector.hpp:53
ace argon_type MultiplyRoundFixedQMax(lane_type l) const
Multiply a fixed-point vector by a lane value, returning a fixed-point product.
Definition vector.hpp:551
ace argon_type CountActiveBits() const
Count the number of bits that are set to one in the vector.
Definition vector.hpp:1190
ace argon_type MultiplyFixedQMax(scalar_type s) const
Multiply a QMax fixed-point vector by a scalar value, returning a fixed-point product.
Definition vector.hpp:526
ace predicate_type CompareTestNonzero(argon_type b) const
Ands the current vector with the given vector, then checks if nonzero.
Definition vector.hpp:1170
static ace std::array< argon_type, stride > LoadToLaneInterleaved(std::array< argon_type, stride > multi, const scalar_type *ptr)
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
Definition vector.hpp:979
ace argon_type PairwiseMax(argon_type b) const
Select the maximum of each pair of lanes in the two vectors.
Definition vector.hpp:1083
ace argon_type operator/(argon_type b) const
Divide a vector and return the result.
Definition vector.hpp:205
ace argon_type BitwiseNot() const
Bitwise ops.
Definition vector.hpp:1094
ace argon_type Max(argon_type b) const
Compare the lanes of two vectors, copying the larger of each lane to the result.
Definition vector.hpp:637
static ace argon_type FromLane(argon::Lane< IntrinsicType > lane)
Definition vector.hpp:123
constexpr Vector & operator=(Vector &&other)=default
Move assignment operator for the Vector class.
ace argon_type ShiftLeftInsert(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, and then OR the result wi...
Definition vector.hpp:735
ace const lane_type GetLane(const size_t i) const
Get a single lane of the vector by index.
Definition vector.hpp:285
ace argon_type BitwiseAnd(argon_type b) const
Bitwise AND of the vector with another vector.
Definition vector.hpp:1103
ace argon_type MultiplyAdd(argon_type b, const_lane_type< LaneIndex > c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:477
ace argon_type Modulo(scalar_type b) const
Get the modulo of a vector and a scalar value.
Definition vector.hpp:627
ace argon_type Popcount() const
Count the number of bits that are set to one in the vector.
Definition vector.hpp:1203
ace argon_bool_type LessThan(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than b.
Definition vector.hpp:672
ace argon_type ShiftRight(const int i) const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:344
ace argon_type ShiftLeftRoundSaturate(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, rounding and saturating t...
Definition vector.hpp:721
ace Vector(scalar_type scalar)
Constructs a Vector from a scalar value.
Definition vector.hpp:79
ace argon_type ShiftRight() const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:741
ace argon_type Negate() const
Bitwise negate the vector and return the result.
Definition vector.hpp:350
ace void StoreLaneTo(scalar_type *ptr)
Store a lane of the vector to a pointer.
Definition vector.hpp:1064
ace argon_type Multiply(lane_type b) const
Multiply a vector by a lane value.
Definition vector.hpp:430
ace argon_type MultiplySubtract(argon_type b, scalar_type c) const
Multiply a vector by a scalar value and subtract from a third vector.
Definition vector.hpp:500
ace argon_type MultiplyAdd(const_lane_type< LaneIndex > b, argon_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:483
ace argon_type MultiplyRoundFixedQMax(scalar_type s) const
Multiply a fixed-point vector by a scalar value, returning a fixed-point product.
Definition vector.hpp:544
ace argon_bool_type GreaterThanOrEqual(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than or equal ...
Definition vector.hpp:660
ace argon_bool_type operator>(argon_type b) const
Compare two vectors, checking if this vector is greater than the other.
Definition vector.hpp:217
ace argon_type operator<<(const int i) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:259
ace argon_type AddHalve(argon_type b) const
Adds two vectors, halving the result.
Definition vector.hpp:369
static ace std::array< argon_type, stride > LoadGatherOffsetIndexInterleaved(const scalar_type *base_ptr, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Perform a Load-Gather of interleaved elements.
Definition vector.hpp:996
ace argon_type Divide(argon_type b) const
Divide two vectors.
Definition vector.hpp:604
ace argon_type ShiftRightRound() const
Shift the elements of the vector to the right by a specified number of bits, rounding the result.
Definition vector.hpp:751
Bool_t< simd::Vec128_t< ScalarType > > predicate_type
Definition vector.hpp:58
static ace std::array< argon_type, stride > LoadCopyInterleaved(const scalar_type *ptr)
Load multiple vectors from a pointer, duplicating the value across all lanes.
Definition vector.hpp:915
static ace int size()
Get the number of elements.
Definition vector.hpp:1293
static ace argon_type LoadGatherOffsetBytes(const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Using a base address and a vector of offset bytes and a base pointer, create a new vector.
Definition vector.hpp:812
ace argon_bool_type GreaterThan(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than b.
Definition vector.hpp:668
ace std::array< argon_type, 2 > ZipWith(argon_type b) const
Zip two vectors together, returning two vectors of pairs.
Definition vector.hpp:1230
ace argon_type operator-(argon_type b) const
Subtract a vector and return the result.
Definition vector.hpp:199
helpers::ArgonFor_t< simd::Vec128_t< ScalarType > > argon_type
Definition vector.hpp:57
static ace argon_type LoadScalar(const scalar_type *ptr)
Constructs a Vector from a scalar pointer.
Definition vector.hpp:104
ace argon_type ShiftRightAccumulateRound(argon_type b) const
Shift the elements of the b vector to the right by a specified number of bits, and then add the resul...
Definition vector.hpp:771
ace argon_type operator*(argon_type b) const
Multiply a vector and return the result.
Definition vector.hpp:202
ace predicate_type TestNonzero() const
Ands the current vector with the given vector, then checks if nonzero.
Definition vector.hpp:1173
static ace argon_type LoadCopy(const scalar_type *ptr)
Load a vector from a pointer, duplicating the value across all lanes.
Definition vector.hpp:797
ace argon_type LoadToLane(const scalar_type *ptr)
Load a lane from a pointer.
Definition vector.hpp:875
ace argon_type SubtractAbs(argon_type b) const
Subtract two vectors, taking the absolute value of the result.
Definition vector.hpp:398
ace argon_type ShiftLeft() const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:702
ace const_lane_type< lanes - 1 > LastLane()
Get the last lane of the vector.
Definition vector.hpp:341
ace argon_type ShiftLeftSaturate(helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const
Shift the elements of the vector to the left by a specified number of bits, saturating the result.
Definition vector.hpp:711
ace argon_type MultiplyFixedQMax(lane_type l) const
Multiply a QMax fixed-point vector by a lane value, returning a fixed-point product.
Definition vector.hpp:531
ace argon_type MultiplySubtract(scalar_type b, argon_type c) const
Multiply a vector by a scalar value and subtract from a third vector.
Definition vector.hpp:510
ace argon_bool_type operator<=(argon_type b) const
Compare two vectors, checking if this vector is less than or equal to the other.
Definition vector.hpp:220
ace argon_type ShiftLeftSaturate() const
Shift the elements of the vector to the left by a specified number of bits, saturating the result.
Definition vector.hpp:725
ace argon_type BitwiseSelect(ArgType true_value, ArgType false_value) const
Bitwise select between two vectors, using the current vector as a mask.
Definition vector.hpp:1157
ace argon_type operator&(argon_type b) const
Bitwise AND two vectors and return the result.
Definition vector.hpp:232
ace argon_type MultiplyRoundFixedQMax(argon_type v) const
Multiply two fixed-point vectors, returning a fixed-point product.
Definition vector.hpp:538
ace argon_type SubtractSaturate(argon_type b) const
Subtract two vectors, saturating the result.
Definition vector.hpp:394
static ace std::array< argon_type, Stride > LoadToLaneInterleaved(simd::MultiVector_t< VectorType, Stride > multi, const scalar_type *ptr)
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
Definition vector.hpp:945
static ace std::array< argon_type, n > LoadMulti(const scalar_type *ptr)
Load n vectors from a single contiguous set of memory.
Definition vector.hpp:1016
static ace argon_type LoadGatherOffsetIndex(const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Using a base address and a vector of offset indices and a base pointer, create a new vector.
Definition vector.hpp:844
helpers::ArgonFor_t< predicate_type > argon_bool_type
Definition vector.hpp:59
ace argon_type operator|(argon_type b) const
Bitwise OR two vectors and return the result.
Definition vector.hpp:235
ace argon_type MultiplyFixedQMax(argon_type v) const
Multiply two QMax fixed-point vectors, returning a fixed-point product.
Definition vector.hpp:522
ace argon_type MultiplyAdd(argon_type b, scalar_type c) const
Multiply a vector by a scalar value and add a third vector.
Definition vector.hpp:451
simd::Scalar_t< simd::Vec128_t< ScalarType > > scalar_type
Definition vector.hpp:55
ace const const_lane_type< LaneIndex > GetLane() const
Get a single lane of the vector by index.
Definition vector.hpp:323
ace argon_type operator~() const
Bitwise NOT the vector and return the result.
Definition vector.hpp:241
ace Vector(argon::ConstLane< LaneIndex, VectorType > lane)
Constructs a Vector from a ConstLane object.
Definition vector.hpp:92
ace argon_type Select(ArgType true_value, ArgType false_value) const
Bitwise select between two vectors, using the current vector as a mask.
Definition vector.hpp:1164
ace std::array< scalar_type, lanes > to_array()
Convert the vector to an array of scalar values.
Definition vector.hpp:275
ace argon_type Absolute() const
Get the absolute value of the vector.
Definition vector.hpp:557
static constexpr size_t lanes
Definition vector.hpp:62
ace lane_type operator[](const size_t i)
Access a lane of the vector by index.
Definition vector.hpp:247
ace argon_type BitwiseClear(argon_type b) const
Bitwise AND of the vector with the NOT of another vector.
Definition vector.hpp:1150
ace argon_type BitwiseAndNot(argon_type b) const
Bitwise AND of the vector with the NOT of another vector.
Definition vector.hpp:1141
ace argon_type ShiftRightAccumulate(argon_type b) const
Shift the elements of the b vector to the right by a specified number of bits, and then add the resul...
Definition vector.hpp:759
ace argon_type operator>>(const int i) const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:250
ace argon_type Add(argon_type b) const
Add two vectors.
Definition vector.hpp:359
ace argon_type BitwiseOr(argon_type b) const
Bitwise OR of the vector with another vector.
Definition vector.hpp:1112
constexpr Vector()=default
The default constructor for the Vector class.
ace argon_type MultiplyAddFixedQMax(argon_type b, arg_type c) const
Multiply-add three fixed-point vectors, returning a fixed-point sum.
Definition vector.hpp:580
ace const lane_type GetLane(const int i) const
Get a single lane of the vector by index.
Definition vector.hpp:303
constexpr Vector & operator=(const Vector &other)=default
Copy assignment operator for the Vector class.
ace argon_bool_type operator!=(argon_type b) const
Compare two vectors for inequality.
Definition vector.hpp:211
static ace argon_type Generate(FuncType body)
Constructs a Vector from a function that generates values.
Definition vector.hpp:169
ace argon_type CountLeadingZeroBits() const
Count the number of consecutive top bits that are set to zero.
Definition vector.hpp:1186
ace argon_type Min(argon_type b) const
Compare the lanes of two vectors, copying the smaller of each lane to the result.
Definition vector.hpp:646
ace argon_type operator-() const
Negate the SIMD vector and return the result.
Definition vector.hpp:193
ace argon_type SubtractHalve(argon_type b) const
Subtract two vectors, halving the result.
Definition vector.hpp:391
ace argon_type MultiplyAdd(lane_type b, argon_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:472
ace argon_type ShiftRightInsert(argon_type b) const
Shift the elements of the vector to the right by a specified number of bits, ORing the result with th...
Definition vector.hpp:783
ace argon_type SubtractAbsAdd(argon_type b, argon_type c) const
Subtract two vectors, taking the absolute value of the result and adding a third vector.
Definition vector.hpp:402
constexpr Vector(Vector &&other)=default
Move constructor for the Vector class.
ace argon_type Subtract(argon_type b) const
Subtract two vectors.
Definition vector.hpp:381
ace argon_type BitwiseXor(argon_type b) const
Bitwise XOR of the vector with another vector.
Definition vector.hpp:1121
ace Vector(argon::Lane< VectorType > lane)
Constructs a Vector from a Lane object.
Definition vector.hpp:85
ace argon_type AddHalveRound(argon_type b) const
Adds two vectors, halving and rounding the result.
Definition vector.hpp:373
ace Lane< const VectorType > operator[](const size_t i) const
Access a lane of the vector by index.
Definition vector.hpp:244
Definition vector.hpp:36
Header file for SIMD features and platform detection.
typename ArgonFor< std::remove_cv_t< T > >::type ArgonFor_t
Helper alias to get the Argon type for a given vector type.
Definition argon_for.hpp:45
Lane deconstruction feature.
Definition argon_full.hpp:302
Helper functions to convert C-style arrays to std::array of Argon types, mimicking std::to_array.
constexpr std::array< helpers::ArgonFor_t< T >, N > to_array(T(&a)[N])
Convert a C-style array of vector types to a std::array of Argon types.
Definition to_array.hpp:29