Argon 0.1.0
Loading...
Searching...
No Matches
vector.hpp
1#pragma once
2#include <array>
3#include <bit>
4#include <cmath>
5#include <functional>
6#include <tuple>
7#include <type_traits>
8#include <utility>
9#include "arm_simd.hpp"
10#include "arm_simd/helpers.hpp"
11#include "arm_simd/helpers/multivector.hpp"
12#include "arm_simd/helpers/scalar.hpp"
13#include "arm_simd/helpers/vec64.hpp"
14#include "features.h"
15#include "helpers.hpp"
16#include "helpers/bool.hpp"
17#include "helpers/to_array.hpp"
18#include "lane.hpp"
19
20#ifdef __ARM_FEATURE_MVE
21#define simd mve
22#else
23#define simd neon
24#endif
25
26#ifdef ARGON_PLATFORM_SIMDE
27#define ace [[gnu::always_inline]] inline
28#elifdef __clang__
29#define ace [[gnu::always_inline]] constexpr
30#else
31#define ace [[gnu::always_inline]] inline
32#endif
33
34namespace argon {
35template <typename T>
36concept arithmetic = std::is_arithmetic_v<T>;
37
42template <typename T, typename... Ts>
43inline constexpr bool is_one_of = std::disjunction_v<std::is_same<T, Ts>...>;
44
49template <typename VectorType>
50class Vector {
51 public:
52 template <size_t LaneIndex>
55 using scalar_type = simd::Scalar_t<VectorType>;
56 using vector_type = VectorType;
58 using predicate_type = Bool_t<VectorType>;
60
62 static constexpr size_t lanes = (simd::is_quadword_v<VectorType> ? 16 : 8) / sizeof(scalar_type);
63
65 constexpr Vector() = default;
66
67 constexpr Vector(Vector&& other) = default;
68 constexpr Vector(const Vector& other) = default;
69 constexpr Vector& operator=(Vector&& other) = default;
70 constexpr Vector& operator=(const Vector& other) = default;
71
74 ace Vector(VectorType vector) : vec_{std::move(vector)} {};
75
79 ace Vector(scalar_type scalar) : vec_(FromScalar(scalar)) {};
80
81#ifndef ARGON_PLATFORM_MVE
85 ace Vector(argon::Lane<VectorType> lane) : vec_(FromLane(lane)) {};
86
91 template <size_t LaneIndex>
93#endif
94
95 template <typename... ArgTypes>
96 requires(sizeof...(ArgTypes) > 1)
97 ace Vector(ArgTypes... args) : vec_{std::forward<ArgTypes>(args)...} {}
98
104 ace static argon_type LoadScalar(const scalar_type* ptr) { return LoadCopy(ptr); }
105
110 ace static argon_type FromScalar(scalar_type scalar) {
111#ifdef ARGON_PLATFORM_MVE
112 return simd::duplicate(scalar);
113#else
114 return simd::duplicate<VectorType>(scalar);
115#endif
116 }
117
122 template <simd::is_vector_type IntrinsicType>
124#ifdef ARGON_PLATFORM_MVE
125 return simd::duplicate(lane.Get());
126#else
127 return simd::duplicate_lane<vector_type>(lane.vec(), lane.lane());
128#endif
129 }
130
135 template <size_t LaneIndex>
137#ifdef ARGON_PLATFORM_MVE
138 return simd::duplicate(lane.Get());
139#else
140 if constexpr (simd::is_quadword_v<VectorType>) {
141#if __ARM_ARCH >= 8
142 return simd::duplicate_lane_quad<LaneIndex>(lane.vec());
143#else
144 // On A32, vec() returns the 64-bit half-register (low or high).
145 // The template arg must be the lane index within that half-vector.
146 constexpr size_t local_lane = LaneIndex >= (lanes / 2) ? LaneIndex - (lanes / 2) : LaneIndex;
147 return simd::duplicate_lane_quad<local_lane>(lane.vec());
148#endif
149 } else {
150 return simd::duplicate_lane<LaneIndex>(lane.vec());
151 }
152#endif
153 }
154
161 ace static argon_type Iota(scalar_type start) {
162 // TODO: Remove this once MSVC 19.44 is released.
163#if __cpp_if_consteval >= 202106L
164 return IotaHelper(start, std::make_index_sequence<lanes>{});
165#else
166 return Argon{start}.Add(VectorType{0, 1, 2, 3});
167#endif
168 }
169
174 template <typename FuncType>
175 requires std::convertible_to<FuncType, std::function<scalar_type()>>
176 ace static argon_type Generate(FuncType body) {
177 VectorType out;
178 utility::constexpr_for<0, lanes, 1>([&](size_t i) { //
179 out[i] = body();
180 });
181 return out;
182 }
183
189 template <typename FuncType>
190 requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type)>>
191 ace static argon_type GenerateWithIndex(FuncType body) {
192 VectorType out;
193 utility::constexpr_for<0, lanes, 1>([&]<size_t i>() { //
194 out[i] = body(i);
195 });
196 return out;
197 }
198
200 ace argon_type operator-() const { return Negate(); }
201
203 ace argon_type operator+(argon_type b) const { return Add(b); }
204
206 ace argon_type operator-(argon_type b) const { return Subtract(b); }
207
209 ace argon_type operator*(argon_type b) const { return Multiply(b); }
210
212 ace argon_type operator/(argon_type b) const { return Divide(b); }
213
215 ace argon_bool_type operator==(argon_type b) const { return Equal(b); }
216
218 ace argon_bool_type operator!=(argon_type b) const { return ~Equal(b); }
219
221 ace argon_bool_type operator<(argon_type b) const { return LessThan(b); }
222
225
228
231
233 ace argon_type operator++() const { return Add(1); }
234
236 ace argon_type operator--() const { return Subtract(1); }
237
239 ace argon_type operator&(argon_type b) const { return BitwiseAnd(b); }
240
242 ace argon_type operator|(argon_type b) const { return BitwiseOr(b); }
243
245 ace argon_type operator^(argon_type b) const { return BitwiseXor(b); }
246
248 ace argon_type operator~() const { return BitwiseNot(); }
249
251 ace Lane<const VectorType> operator[](const size_t i) const { return GetLane(i); }
252
254 ace lane_type operator[](const size_t i) { return GetLane(i); }
255
257 ace argon_type operator>>(const int i) const {
258#if ARGON_USE_COMPILER_EXTENSIONS
259 return vec_ >> i;
260#else
261 return ShiftRight(i);
262#endif
263 }
264
266 ace argon_type operator<<(const int i) const {
267#if ARGON_USE_COMPILER_EXTENSIONS
268 return vec_ << i;
269#else
270 return ShiftLeft(i);
271#endif
272 }
273
275 [[gnu::always_inline]] constexpr VectorType vec() const { return vec_; }
276
278 [[gnu::always_inline]] constexpr operator VectorType() const { return vec_; }
279
282 ace std::array<scalar_type, lanes> to_array() {
283 std::array<scalar_type, lanes> out;
284 simd::store1(out.data(), vec_);
285 return out;
286 }
287
292 ace const lane_type GetLane(const size_t i) const {
293#ifdef ARGON_PLATFORM_MVE
294 return vec_[i];
295#else
296 return {vec_, static_cast<int>(i)};
297#endif
298 }
299 ace lane_type GetLane(const size_t i) {
300#ifdef ARGON_PLATFORM_MVE
301 return vec_[i];
302#else
303 return {vec_, static_cast<int>(i)};
304#endif
305 }
306
310 ace const lane_type GetLane(const int i) const {
311#ifdef ARGON_PLATFORM_MVE
312 return vec_[i];
313#else
314 return {vec_, i};
315#endif
316 }
317
318 ace lane_type GetLane(const int i) {
319#ifdef ARGON_PLATFORM_MVE
320 return vec_[i];
321#else
322 return {vec_, i};
323#endif
324 }
325
329 template <size_t LaneIndex>
331#ifdef ARGON_PLATFORM_MVE
332 return vec_[LaneIndex];
333#else
334 return vec_;
335#endif
336 }
337
338 template <size_t LaneIndex>
340#ifdef ARGON_PLATFORM_MVE
341 return vec_[LaneIndex];
342#else
343 return vec_;
344#endif
345 }
346
348 ace const_lane_type<lanes - 1> LastLane() { return vec_; }
349
351 ace argon_type ShiftRight(const int i) const { return simd::shift_right(vec_, i); }
352
354 ace argon_type ShiftLeft(const int i) const { return simd::shift_left(vec_, i); }
355
357 ace argon_type Negate() const {
358 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
359 return -vec_;
360 } else {
361 return simd::negate(vec_);
362 }
363 }
364
366 ace argon_type Add(argon_type b) const {
367 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
368 return vec_ + b.vec_;
369 } else {
370 return simd::add(vec_, b);
371 }
372 }
373
376 ace argon_type AddHalve(argon_type b) const { return simd::add_halve(vec_, b); }
377
380 ace argon_type AddHalveRound(argon_type b) const { return simd::add_halve_round(vec_, b); }
381
385 ace argon_type AddSaturate(argon_type b) const { return simd::add_saturate(vec_, b); }
386
389 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
390 return vec_ - b.vec_;
391 } else {
392 return simd::subtract(vec_, b);
393 }
394 }
395
398 ace argon_type SubtractHalve(argon_type b) const { return simd::subtract_halve(vec_, b); }
399
401 ace argon_type SubtractSaturate(argon_type b) const { return simd::subtract_saturate(vec_, b); }
402
405 ace argon_type SubtractAbs(argon_type b) const { return simd::subtract_absolute(vec_, b); }
406
410#ifdef ARGON_PLATFORM_MVE
411 return mve::add(vec_, mve::subtract_absolute(b, c));
412#else
413 return neon::subtract_absolute_add(vec_, b, c);
414#endif
415 }
416
419 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
420 return vec_ * b.vec_;
421 } else {
422 return simd::multiply(vec_, b);
423 }
424 }
425
428 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
429 return vec_ * b;
430 } else {
431 return simd::multiply(vec_, b);
432 }
433 }
434
435#ifndef ARGON_PLATFORM_MVE
437 ace argon_type Multiply(lane_type b) const { return neon::multiply_lane(vec_, b.vec(), b.lane()); }
438
440 template <size_t LaneIndex>
442 return neon::multiply_lane(vec_, b.vec(), b.lane());
443 }
444#endif
445
449 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
450 return vec_ + b.vec_ * c.vec_;
451 } else {
452 return simd::multiply_add(vec_, b, c);
453 }
454 }
455
459 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
460 return vec_ + b.vec_ * c;
461 } else {
462 return simd::multiply_add(vec_, b, c);
463 }
464 }
465
468 ace argon_type MultiplyAdd(scalar_type b, argon_type c) const { return MultiplyAdd(c, b); }
469
470#ifndef ARGON_PLATFORM_MVE
474 return simd::multiply_add_lane(vec_, b.vec(), c.vec(), c.lane());
475 }
476
479 ace argon_type MultiplyAdd(lane_type b, argon_type c) const { return MultiplyAdd(c, b); }
480
483 template <size_t LaneIndex>
485 return simd::multiply_add_lane(vec_, b.vec(), c.vec(), c.lane());
486 }
487
489 template <size_t LaneIndex>
491 return MultiplyAdd(c, b);
492 }
493#endif
494
498#if ARGON_USE_COMPILER_EXTENSIONS
499 return vec_ - b.vec_ * c.vec_;
500#else
501 return simd::multiply_subtract(vec_, b, c);
502#endif
503 }
504
508#if ARGON_USE_COMPILER_EXTENSIONS
509 return vec_ - b.vec_ * c;
510#else
511 return simd::multiply_subtract(vec_, b, c);
512#endif
513 }
514
518
519#ifndef ARGON_PLATFORM_MVE
523 return simd::multiply_subtract_lane(vec_, b.vec(), c.vec(), c.lane());
524 }
525#endif
526
529 ace argon_type MultiplyFixedQMax(argon_type v) const { return simd::multiply_double_saturate_high(vec_, v); }
530
533 ace argon_type MultiplyFixedQMax(scalar_type s) const { return simd::multiply_double_saturate_high(vec_, s); }
534
535#ifndef ARGON_PLATFORM_MVE
539 return simd::multiply_double_saturate_high_lane(vec_, l.vec(), l.lane());
540 }
541#endif
542
546 return simd::multiply_double_round_saturate_high(vec_, v);
547 }
548
552 return simd::multiply_double_round_saturate_high(vec_, s);
553 }
554
555#ifndef ARGON_PLATFORM_MVE
559 return simd::multiply_double_round_saturate_high_lane(vec_, l.vec(), l.lane());
560 }
561#endif
562
564 ace argon_type Absolute() const { return simd::abs(vec_); }
565
569 requires std::floating_point<scalar_type> || std::is_same_v<scalar_type, uint32_t>
570 {
571#ifdef ARGON_PLATFORM_MVE
572 if constexpr (std::is_same_v<scalar_type, uint32_t>) {
573 std::numeric_limits<uint32_t>::max() / vec_;
574 } else {
575 return 1.f / vec_;
576 }
577#else
578 return simd::reciprocal_estimate(vec_);
579#endif
580 }
581
585 requires std::floating_point<scalar_type> || std::is_same_v<scalar_type, uint32_t>
586 {
587#ifdef ARGON_PLATFORM_MVE
588 if constexpr (std::is_same_v<scalar_type, uint32_t>) {
589 return std::numeric_limits<uint32_t>::max() / (vec_ * vec_);
590 } else {
591 return 1.f / (vec_ * vec_);
592 }
593#else
594 return simd::reciprocal_sqrt_estimate(vec_);
595#endif
596 }
597
602 requires std::floating_point<scalar_type>
603 {
604#ifdef ARGON_PLATFORM_MVE
605 return 2.f - vec_ * b.vec_;
606#else
607 return simd::reciprocal_step(vec_, b.vec_);
608#endif
609 }
610
615 requires std::floating_point<scalar_type>
616 {
617#ifdef ARGON_PLATFORM_MVE
618 return (3.f - vec_ * b.vec_) * 0.5f;
619#else
620 return simd::reciprocal_sqrt_step(vec_, b.vec_);
621#endif
622 }
623
627 ace argon_type ReciprocalEstimateRefine(int n_iters = 1) const
628 requires std::floating_point<scalar_type>
629 {
631 for (int i = 0; i < n_iters; ++i) {
632 est = est * ReciprocalStep(est);
633 }
634 return est;
635 }
636
640 ace argon_type ReciprocalSqrtEstimateRefine(int n_iters = 1) const
641 requires std::floating_point<scalar_type>
642 {
644 for (int i = 0; i < n_iters; ++i) {
645 est = est * (*this * est).ReciprocalSqrtStep(est);
646 }
647 return est;
648 }
649
652 template <typename arg_type>
653 requires(is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> ||
654 std::is_convertible_v<arg_type, scalar_type>)
655 ace argon_type MultiplyAddFixedQMax(argon_type b, arg_type c) const {
656 return Add(b.MultiplyFixedQMax(c));
657 }
658
661 template <typename arg_type>
662 requires(is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> ||
663 std::is_convertible_v<arg_type, scalar_type>)
665 return Add(b.MultiplyRoundFixedQMax(c));
666 }
667
668#ifdef __aarch64__
670 ace argon_type Divide(argon_type b) const {
671 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
672 return vec_ / b.vec_;
673 } else {
674 return simd::divide(vec_, b);
675 }
676 }
677#else
680 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
681 return vec_ / b.vec_;
682 } else {
683 return this->map2(b, [](scalar_type lane1, scalar_type lane2) { return lane1 / lane2; });
684 }
685 }
686#endif
687
691 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
692 return vec_ % b.vec_;
693 } else if constexpr (std::floating_point<scalar_type>) {
694 return this->map2(b, [](scalar_type lane1, scalar_type lane2) { return std::fmod(lane1, lane2); });
695 } else {
696 return this->map2(b, [](scalar_type lane1, scalar_type lane2) { return lane1 % lane2; });
697 }
698 }
699
703 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
704 return vec_ % b;
705 } else {
706 return this->map([b](scalar_type lane1) { return std::fmod(lane1, b); });
707 }
708 }
709
712 ace argon_type Max(argon_type b) const {
713 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
714 return vec_ > b.vec_ ? vec_ : b.vec_;
715 } else {
716 return simd::max(vec_, b);
717 }
718 }
719
721 ace argon_type Min(argon_type b) const {
722 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
723 return vec_ < b.vec_ ? vec_ : b.vec_;
724 } else {
725 return simd::min(vec_, b);
726 }
727 }
728
731 ace argon_bool_type Equal(argon_type b) const { return simd::equal(vec_, b); }
732
735 ace argon_bool_type GreaterThanOrEqual(argon_type b) const { return simd::greater_than_or_equal(vec_, b); }
736
739 ace argon_bool_type LessThanOrEqual(argon_type b) const { return simd::less_than_or_equal(vec_, b); }
740
743 ace argon_bool_type GreaterThan(argon_type b) const { return simd::greater_than(vec_, b); }
744
747 ace argon_bool_type LessThan(argon_type b) const { return simd::less_than(vec_, b); }
748
751 ace argon_type ShiftLeft(helpers::ArgonFor_t<simd::make_signed_t<Bool_t<VectorType>>> b) const
752 requires std::is_integral_v<scalar_type>
753 {
754 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
755 return vec_ << b.vec_;
756 } else {
757 return simd::shift_left(vec_, b.vec_);
758 }
759 }
760
763 ace argon_type ShiftLeft(std::make_signed_t<simd::Scalar_t<Bool_t<VectorType>>> n) const
764 requires std::is_integral_v<scalar_type>
765 {
766 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
767 return vec_ << n;
768 } else {
770 return simd::shift_left(vec_, b.vec_);
771 }
772 }
773
776 template <int n>
777 ace argon_type ShiftLeft() const {
778 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
779 return vec_ << n;
780 } else {
781 return simd::shift_left<n>(vec_);
782 }
783 }
784
786 ace argon_type ShiftLeftSaturate(helpers::ArgonFor_t<simd::make_signed_t<Bool_t<VectorType>>> b) const
787 requires(std::is_integral_v<scalar_type>)
788 {
789 return simd::shift_left_saturate(vec_, b);
790 }
791
793 ace argon_type ShiftLeftRound(argon_type b) const { return simd::shift_left_round(vec_, b); }
794
796 ace argon_type ShiftLeftRoundSaturate(argon_type b) const { return simd::shift_left_round_saturate(vec_, b); }
797
799 template <int n>
801 return simd::shift_left_saturate<n>(vec_);
802 }
803
809 template <int n>
811 return simd::shift_left_insert<n>(vec_, b);
812 }
813
815 template <int n>
816 ace argon_type ShiftRight() const {
817 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
818 return vec_ >> n;
819 } else {
820 return simd::shift_right<n>(vec_);
821 }
822 }
823
825 template <int n>
827 return simd::shift_right_round<n>(vec_);
828 }
829
833 template <int n>
835#ifdef ARGON_PLATFORM_MVE
836 return vec_ + (b >> n);
837#else
838 return simd::shift_right_accumulate<n>(vec_, b);
839#endif
840 }
841
845 template <int n>
847#ifdef ARGON_PLATFORM_MVE
848 return vec_ + mve::shift_right_round<n>(b);
849#else
850 return simd::shift_right_accumulate_round<n>(vec_, b);
851#endif
852 }
853
857 template <int n>
859 return simd::shift_right_insert<n>(vec_, b);
860 }
861
863 ace static argon_type Load(const scalar_type* ptr) {
864#ifdef ARGON_PLATFORM_MVE
865 return mve::load1(ptr);
866#else
867 return neon::load1<VectorType>(ptr);
868#endif
869 }
870
872 ace static argon_type LoadCopy(const scalar_type* ptr) {
873#ifdef ARGON_PLATFORM_MVE
874 scalar_type val = *ptr;
875 VectorType vec;
876 utility::constexpr_for<0, lanes, 1>([val, &vec]<int i>() { vec[i] = val; });
877#else
878 return simd::load1_duplicate<VectorType>(ptr);
879#endif
880 }
881
888 const scalar_type* base,
889 helpers::ArgonFor_t<simd::make_unsigned_t<Bool_t<VectorType>>> offset_vector) {
890#ifdef ARGON_PLATFORM_MVE
891 static_assert(
892 sizeof(scalar_type) == 1 || sizeof(scalar_type) == 2 || sizeof(scalar_type) == 4 || sizeof(scalar_type) == 8,
893 "Unsupported size for gather load");
894
895 if constexpr (sizeof(scalar_type) == 1) {
896 return mve::load_byte_gather_offset(base, offset_vector);
897 } else if constexpr (sizeof(scalar_type) == 2) {
898 return mve::load_halfword_gather_offset(base, offset_vector);
899 } else if constexpr (sizeof(scalar_type) == 4) {
900 return mve::load_word_gather_offset(base, offset_vector);
901 } else if constexpr (sizeof(scalar_type) == 8) {
902 return mve::load_doubleword_gather_offset(base, offset_vector);
903 }
904#else
905 argon_type destination;
906 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
907 auto lane_val = neon::get_lane<i>(offset_vector);
908 destination = destination.template LoadToLane<i>(base + (lane_val * sizeof(scalar_type)));
909 });
910 return destination;
911#endif
912 }
913
920 const scalar_type* base,
921 helpers::ArgonFor_t<simd::make_unsigned_t<Bool_t<VectorType>>> offset_vector) {
922#ifdef ARGON_PLATFORM_MVE
923 static_assert(
924 sizeof(scalar_type) == 1 || sizeof(scalar_type) == 2 || sizeof(scalar_type) == 4 || sizeof(scalar_type) == 8,
925 "Unsupported size for gather load");
926
927 if constexpr (sizeof(scalar_type) == 1) {
928 return mve::load_byte_gather_offset(base, offset_vector);
929 } else if constexpr (sizeof(scalar_type) == 2) {
930 return mve::load_halfword_gather_offset(base, offset_vector * sizeof(scalar_type));
931 } else if constexpr (sizeof(scalar_type) == 4) {
932 return mve::load_word_gather_offset(base, offset_vector * sizeof(scalar_type));
933 } else if constexpr (sizeof(scalar_type) == 8) {
934 return mve::load_doubleword_gather_offset(base, offset_vector * sizeof(scalar_type));
935 }
936#else
937 argon_type destination;
938 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
939 auto lane_val = neon::get_lane<i>(offset_vector);
940 destination = destination.template LoadToLane<i>(base + lane_val);
941 });
942 return destination;
943#endif
944 }
945
949 template <size_t lane>
951 return ConstLane<lane, VectorType>{vec_}.Load(ptr);
952 }
953
960 template <size_t stride>
961 ace static std::array<argon_type, stride> LoadInterleaved(const scalar_type* ptr) {
962#ifdef ARGON_PLATFORM_MVE
963 static_assert(stride == 2 || stride == 4,
964 "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
965 if constexpr (stride == 2) {
966 return argon::to_array(mve::load2(ptr).val);
967 } else if constexpr (stride == 4) {
968 return argon::to_array(mve::load4(ptr).val);
969 }
970#else
971 static_assert(stride > 1 && stride < 5, "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
972 using multivec_type = simd::MultiVector_t<VectorType, stride>;
973 if constexpr (stride == 2) {
974 return argon::to_array(neon::load2<multivec_type>(ptr).val);
975 } else if constexpr (stride == 3) {
976 return argon::to_array(neon::load3<multivec_type>(ptr).val);
977 } else if constexpr (stride == 4) {
978 return argon::to_array(neon::load4<multivec_type>(ptr).val);
979 }
980#endif
981 }
982
988 template <size_t stride>
989 ace static std::array<argon_type, stride> LoadCopyInterleaved(const scalar_type* ptr) {
990#ifdef ARGON_PLATFORM_MVE
991 static_assert(stride == 2 || stride == 4,
992 "De-interleaving LoadCopy can only be performed with a stride of 2, 3, or 4");
993 if constexpr (stride == 2) {
994 return {mve::duplicate(*ptr++), mve::duplicate(*ptr++)};
995 } else if constexpr (stride == 4) {
996 return {mve::duplicate(*ptr++), mve::duplicate(*ptr++), mve::duplicate(*ptr++), mve::duplicate(*ptr)};
997 }
998#else
999 static_assert(stride > 1 && stride < 5,
1000 "De-interleaving LoadCopy can only be performed with a stride of 2, 3, or 4");
1001 using multivec_type = simd::MultiVector<VectorType, stride>::type;
1002 if constexpr (stride == 2) {
1003 return argon::to_array(simd::load2_duplicate<multivec_type>(ptr).val);
1004 } else if constexpr (stride == 3) {
1005 return argon::to_array(simd::load3_duplicate<multivec_type>(ptr).val);
1006 } else if constexpr (stride == 4) {
1007 return argon::to_array(simd::load4_duplicate<multivec_type>(ptr).val);
1008 }
1009#endif
1010 }
1011
1018 template <size_t LaneIndex, size_t Stride>
1019 ace static std::array<argon_type, Stride> LoadToLaneInterleaved(simd::MultiVector_t<VectorType, Stride> multi,
1020 const scalar_type* ptr) {
1021 static_assert(Stride > 1 && Stride < 5, "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
1022#ifdef ARGON_PLATFORM_MVE
1023 auto out = multi;
1024 utility::constexpr_for<0, Stride, 1>([&]<int i>() { //<
1025 out.val[i][LaneIndex] = ptr[i];
1026 });
1027 return argon::to_array(out.val);
1028#else
1029 if constexpr (Stride == 2) {
1030 if constexpr (simd::is_quadword_v<VectorType>) {
1031 return argon::to_array(simd::load2_lane_quad<LaneIndex>(ptr, multi).val);
1032 } else {
1033 return argon::to_array(simd::load2_lane<LaneIndex>(ptr, multi).val);
1034 }
1035 } else if constexpr (Stride == 3) {
1036 if constexpr (simd::is_quadword_v<VectorType>) {
1037 return argon::to_array(simd::load3_lane_quad<LaneIndex>(ptr, multi).val);
1038 } else {
1039 return argon::to_array(simd::load3_lane<LaneIndex>(ptr, multi).val);
1040 }
1041 } else if constexpr (Stride == 4) {
1042 if constexpr (simd::is_quadword_v<VectorType>) {
1043 return argon::to_array(simd::load4_lane_quad<LaneIndex>(ptr, multi).val);
1044 } else {
1045 return argon::to_array(simd::load4_lane<LaneIndex>(ptr, multi).val);
1046 }
1047 }
1048#endif
1049 }
1050
1052 template <size_t lane, size_t stride>
1053 ace static std::array<argon_type, stride> LoadToLaneInterleaved(std::array<argon_type, stride> multi,
1054 const scalar_type* ptr) {
1055 using multivec_type = simd::MultiVector_t<VectorType, stride>;
1056 return LoadToLaneInterleaved<lane, stride>(*(multivec_type*)multi.data(), ptr);
1057 }
1058
1069 template <size_t stride>
1070 ace static std::array<argon_type, stride> LoadGatherOffsetIndexInterleaved(
1071 const scalar_type* base_ptr,
1072 helpers::ArgonFor_t<simd::make_unsigned_t<Bool_t<VectorType>>> offset_vector) {
1073 static_assert(stride > 1 && stride < 5, "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");
1074 std::array<argon_type, stride> multi{};
1075 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1076 auto lane_val = simd::get_lane<i>(offset_vector);
1077 multi = LoadToLaneInterleaved<i, stride>(multi, base_ptr + (lane_val * stride));
1078 });
1079 return multi;
1080 }
1081
1089 template <size_t n>
1090 ace static std::array<argon_type, n> LoadMulti(const scalar_type* ptr) {
1091 static_assert(n > 1 && n < 5, "LoadMulti can only be performed with a size of 2, 3, or 4");
1092#ifdef ARGON_PLATFORM_MVE
1093 std::array<argon_type, n> multi{};
1094 utility::constexpr_for<0, n, 1>([&]<int i>() { //<
1095 multi[i] = *ptr;
1096 ptr += lanes;
1097 });
1098 return multi;
1099#else
1100#if defined(__clang__) || (__GNUC__ > 13)
1101 using multi_type = simd::MultiVector_t<VectorType, n>;
1102 if constexpr (n == 2) {
1103 return argon::to_array(simd::load1_x2<multi_type>(ptr).val);
1104 } else if constexpr (n == 3) {
1105 return argon::to_array(simd::load1_x3<multi_type>(ptr).val);
1106 } else if constexpr (n == 4) {
1107 return argon::to_array(simd::load1_x4<multi_type>(ptr).val);
1108 }
1109#else
1110 if constexpr (n == 2) {
1111 auto a = simd::load1(ptr);
1112 auto b = simd::load1(ptr + lanes);
1113 return {a, b};
1114 } else if constexpr (n == 3) {
1115 auto a = simd::load1(ptr);
1116 auto b = simd::load1(ptr + lanes);
1117 auto c = simd::load1(ptr + 2 * lanes);
1118 return {a, b, c};
1119 } else if constexpr (n == 4) {
1120 auto a = simd::load1(ptr);
1121 auto b = simd::load1(ptr + lanes);
1122 auto c = simd::load1(ptr + 2 * lanes);
1123 auto d = simd::load1(ptr + 3 * lanes);
1124 return {a, b, c, d};
1125 }
1126#endif
1127#endif
1128 }
1129
1132 ace void StoreTo(scalar_type* ptr) const { simd::store1(ptr, vec_); }
1133
1137 template <int LaneIndex>
1138 ace void StoreLaneTo(scalar_type* ptr) {
1139#ifdef ARGON_PLATFORM_MVE
1140 *ptr = vec_[LaneIndex];
1141#else
1142 simd::store1_lane<LaneIndex>(ptr, vec_);
1143#endif
1144 }
1145
1146#ifndef ARGON_PLATFORM_MVE
1148
1152 ace argon_type PairwiseAdd(argon_type b) const { return simd::pairwise_add(vec_, b); }
1153
1157 ace argon_type PairwiseMax(argon_type b) const { return simd::pairwise_max(vec_, b); }
1158
1162 ace argon_type PairwiseMin(argon_type b) const { return simd::pairwise_min(vec_, b); }
1163#endif
1164
1166
1169 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1170 return ~vec_;
1171 } else {
1172 return simd::bitwise_not(vec_);
1173 }
1174 }
1175
1178 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1179 return vec_ & b.vec_;
1180 } else {
1181 return simd::bitwise_and(vec_, b);
1182 }
1183 }
1184
1187 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1188 return vec_ | b.vec_;
1189 } else {
1190 return simd::bitwise_or(vec_, b);
1191 }
1192 }
1193
1196 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1197 return vec_ ^ b.vec_;
1198 } else {
1199 return simd::bitwise_xor(vec_, b);
1200 }
1201 }
1202
1206 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1207 return vec_ | ~b.vec_;
1208 } else {
1209 return simd::bitwise_or_not(vec_, b);
1210 }
1211 }
1212
1216 if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {
1217 return vec_ & ~b.vec_;
1218 } else {
1219 return simd::bitwise_clear(vec_, b);
1220 }
1221 }
1222
1225
1226#ifndef ARGON_PLATFORM_MVE
1229 template <typename ArgType>
1230 requires std::is_unsigned_v<scalar_type>
1231 ace argon_type BitwiseSelect(ArgType true_value, ArgType false_value) const {
1232 return simd::bitwise_select(vec_, true_value, false_value);
1233 }
1234
1236 template <typename ArgType>
1237 requires std::is_unsigned_v<scalar_type>
1238 ace argon_type Select(ArgType true_value, ArgType false_value) const {
1239 return simd::bitwise_select(true_value, false_value);
1240 }
1241
1244 ace predicate_type CompareTestNonzero(argon_type b) const { return simd::compare_test_nonzero(vec_, b); }
1245
1247 ace predicate_type TestNonzero() const { return simd::compare_test_nonzero(vec_, argon_type{1}); }
1248#endif
1249
1253 requires(std::is_integral_v<scalar_type>)
1254 {
1255 return simd::count_leading_sign_bits(vec_);
1256 }
1257
1260 ace argon_type CountLeadingZeroBits() const { return simd::count_leading_zero_bits(vec_); }
1261
1265#ifdef ARGON_PLATFORM_MVE
1266 auto new_vec = vec_;
1267 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1268 new_vec[i] = std::popcount(vec_[i]);
1269 });
1270 return new_vec;
1271#else
1272 return neon::count_active_bits(vec_);
1273#endif
1274 }
1275
1277 ace argon_type Popcount() const { return CountActiveBits(); }
1278
1282 template <int n>
1284#ifdef ARGON_PLATFORM_MVE
1285 auto new_vec = vec_;
1286 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1287 if (i < n) {
1288 new_vec[i] = b.vec_[i];
1289 }
1290 });
1291 return new_vec;
1292#else
1293 return simd::extract<n>(vec_, b);
1294#endif
1295 }
1296
1297 ace argon_type Reverse64bit() const { return simd::reverse_64bit(vec_); }
1298 ace argon_type Reverse32bit() const { return simd::reverse_32bit(vec_); }
1299 ace argon_type Reverse16bit() const { return simd::reverse_16bit(vec_); }
1300
1304 ace std::array<argon_type, 2> ZipWith(argon_type b) const {
1305#ifdef ARGON_PLATFORM_MVE
1306 std::array<argon_type, 2> new_vec;
1307 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1308 if (i % 2 == 0) {
1309 new_vec[0][i] = vec_[i / 2];
1310 new_vec[1][i] = vec_[(i + lanes) / 2];
1311 } else {
1312 new_vec[0][i] = b.vec_[i / 2];
1313 new_vec[1][i] = b.vec_[(i + lanes) / 2];
1314 }
1315 });
1316 return new_vec;
1317#else
1318 return argon::to_array(neon::zip(vec_, b.vec()).val);
1319#endif
1320 }
1321
1325 std::array<argon_type, 2> UnzipWith(argon_type b) {
1326#ifdef ARGON_PLATFORM_MVE
1327 std::array<argon_type, 2> new_vec;
1328 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1329 if ((i * 2) < lanes) {
1330 new_vec[0][i] = vec_[i * 2];
1331 new_vec[1][i] = vec_[i * 2 + 1];
1332 } else {
1333 new_vec[0][i] = b.vec_[i * 2];
1334 new_vec[1][i] = b.vec_[i * 2 + 1];
1335 }
1336 });
1337 return new_vec;
1338#else
1339 return argon::to_array(neon::unzip(vec_, b.vec()).val);
1340#endif
1341 }
1342
1345 // {b0, b1, b2, b3}}
1347 // {a1, b1, a3, b3}}
1348 std::array<argon_type, 2> TransposeWith(argon_type b) const {
1349#ifdef ARGON_PLATFORM_MVE
1350 std::array<argon_type, 2> new_vec;
1351 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1352 if (i % 2 == 1) {
1353 new_vec[0][i] = vec_[i];
1354 new_vec[1][i] = vec_[i + 1];
1355 } else {
1356 new_vec[0][i] = b.vec_[i + 1];
1357 new_vec[1][i] = b.vec_[i];
1358 }
1359 });
1360 return new_vec;
1361#else
1362 return argon::to_array(simd::transpose(vec_, b.vec()).val);
1363#endif
1364 }
1365
1367 ace static int size() { return lanes; }
1368
1369 template <typename FuncType>
1370 requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type)>>
1371 ace argon_type map(FuncType body) const {
1372 VectorType out;
1373 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1374 out[i] = body(vec_[i]);
1375 });
1376 return out;
1377 }
1378
1379 template <typename FuncType>
1380 requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type, int)>>
1381 ace argon_type map_with_index(FuncType body) const {
1382 VectorType out;
1383 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1384 out[i] = body(vec_[i], i);
1385 });
1386 return out;
1387 }
1388
1389 template <typename FuncType>
1390 requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type, scalar_type)>>
1391 ace argon_type map2(argon_type other, FuncType body) const {
1392 VectorType out;
1393 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1394 out[i] = body(vec_[i], other.vec_[i]);
1395 });
1396 return out;
1397 }
1398
1399 template <typename FuncType>
1400 requires std::convertible_to<FuncType, std::function<void(scalar_type&)>>
1401 ace argon_type each_lane(FuncType body) {
1402 VectorType out = vec_;
1403 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1404 body(out[i]);
1405 });
1406 return out;
1407 }
1408
1409 template <typename FuncType>
1410 requires std::convertible_to<FuncType, std::function<void(scalar_type&, int)>>
1411 ace argon_type each_lane_with_index(FuncType body) {
1412 VectorType out = vec_;
1413 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1414 body(out[i], i);
1415 });
1416 return out;
1417 }
1418
1419 template <typename FuncType>
1420 requires std::convertible_to<FuncType, std::function<void()>>
1421 ace void if_lane(FuncType true_branch) {
1422 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1423 if (vec_[i] != 0) {
1424 true_branch();
1425 }
1426 });
1427 }
1428
1429 template <typename FuncType>
1430 requires std::convertible_to<FuncType, std::function<void()>>
1431 ace void if_else_lane(FuncType true_branch, FuncType false_branch) {
1432 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1433 if (vec_[i] != 0) {
1434 true_branch();
1435 } else {
1436 false_branch();
1437 }
1438 });
1439 }
1440
1441 template <typename FuncType>
1442 requires std::convertible_to<FuncType, std::function<void(int)>>
1443 ace void if_lane_with_index(FuncType true_branch) {
1444 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1445 if (vec_[i] != 0) {
1446 true_branch(i);
1447 }
1448 });
1449 }
1450
1451 template <typename FuncType1, typename FuncType2>
1452 requires std::convertible_to<FuncType1, std::function<void(int)>> &&
1453 std::convertible_to<FuncType2, std::function<void(int)>>
1454 ace void if_else_lane_with_index(FuncType1 true_branch, FuncType2 false_branch) {
1455 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1456 if (vec_[i] != 0) {
1457 true_branch(i);
1458 } else {
1459 false_branch(i);
1460 }
1461 });
1462 }
1463
1464 ace bool any() {
1465 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1466 if (vec_[i]) {
1467 return true;
1468 }
1469 });
1470 return false;
1471 }
1472
1473 ace bool all() {
1474#ifdef ARGON_PLATFORM_MVE
1475 return mve::max_reduce_max(vec_, vec_) != 0;
1476#else
1477 auto nonzero = TestNonzero();
1478 utility::constexpr_for<0, lanes, 1>([&]<int i>() { //<
1479 if (nonzero[i] == 0) {
1480 return false;
1481 }
1482 });
1483 return true;
1484#endif
1485 }
1486
1487 template <std::size_t Index>
1488 std::tuple_element_t<Index, argon_type> get() {
1489#ifdef ARGON_PLATFORM_MVE
1490 return vec_[Index];
1491#else
1492 return GetLane<Index>();
1493#endif
1494 }
1495
1496 protected:
1497 template <std::size_t... Ints>
1498 ace static argon_type IotaHelper(scalar_type start, std::index_sequence<Ints...>) {
1499 return VectorType{static_cast<scalar_type>(start + Ints)...};
1500 }
1501
1502 VectorType vec_;
1503};
1504
1505} // namespace argon
1506
1510namespace std {
1511template <typename T>
1512struct tuple_size<argon::Vector<T>> {
1513 static constexpr size_t value = argon::Vector<T>::lanes;
1514};
1515
1516template <size_t Index, typename T>
1517struct tuple_element<Index, argon::Vector<T>> {
1518 static_assert(Index < argon::Vector<T>::lanes);
1520};
1521} // namespace std
1522
1523#undef ace
1524#undef simd
Provides utility templates and concepts for type traits and compile-time iteration.
A 128-bit SIMD vector wrapping a scalar type, providing arithmetic, logical, and data-movement operat...
Definition argon_full.hpp:29
Represents a single lane of a SIMD vector with the lane index known at compile time.
Definition lane.hpp:46
ace int lane()
On ARM32, return the local lane index within the 64-bit half-register returned by vec().
Definition lane.hpp:95
ace neon::Vec64_t< scalar_type > vec()
On ARM32, return the 64-bit half-register that contains this lane.
Definition lane.hpp:83
ace scalar_type Get() const
Get the scalar value of this lane.
Definition lane.hpp:74
ace argon_type Load(const scalar_type *ptr)
Load a scalar from ptr into this lane and return the updated vector.
Definition lane.hpp:60
Represents a single lane of a SIMD vector with a runtime-determined index.
Definition lane.hpp:117
ace int lane()
On ARM32, return the local lane index within the 64-bit half-register returned by vec().
Definition lane.hpp:162
ace neon::Vec64_t< scalar_type > vec()
On ARM32, return the 64-bit half-register that contains this lane.
Definition lane.hpp:150
ace scalar_type Get() const
Get the scalar value of this lane.
Definition lane.hpp:139
ace argon_type PairwiseMin(argon_type b) const
Select the maximum of each pair of lanes in the two vectors.
Definition vector.hpp:1162
Lane< simd::Vec128_t< ScalarType > > lane_type
Definition vector.hpp:54
ace argon_type MultiplySubtract(argon_type b, lane_type c) const
Multiply a vector by a lane value and subtract from a third vector.
Definition vector.hpp:522
ace Vector(VectorType vector)
Constructs a Vector from a SIMD vector type.
Definition vector.hpp:74
simd::Vec128_t< ScalarType > vector_type
Definition vector.hpp:56
ace argon_type Modulo(argon_type b) const
Get the modulo of two vectors.
Definition vector.hpp:690
ace argon_bool_type operator<(argon_type b) const
Compare two vectors, checking if this vector is less than the other.
Definition vector.hpp:221
ace argon_type ShiftLeftRound(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, rounding the result.
Definition vector.hpp:793
std::array< argon_type, 2 > TransposeWith(argon_type b) const
Perform a 2x2 matrix transpose on two vectors, returning two vectors of pairs.
Definition vector.hpp:1348
constexpr Vector(const Vector &other)=default
Copy constructor for the Vector class.
static ace std::array< argon_type, stride > LoadInterleaved(const scalar_type *ptr)
Load multiple vectors from a pointer, de-interleaving.
Definition vector.hpp:961
static ace argon_type Iota(scalar_type start)
Constructs a Vector from an incrementing sequence.
Definition vector.hpp:161
static ace argon_type FromScalar(scalar_type scalar)
Definition vector.hpp:110
ace argon_type Multiply(scalar_type b) const
Multiply a vector by a scalar value.
Definition vector.hpp:427
ace argon_type ReciprocalEstimateRefine(int n_iters=1) const
Compute a refined reciprocal estimate using Newton-Raphson iterations.
Definition vector.hpp:627
ace argon_type MultiplySubtract(argon_type b, argon_type c) const
Multiply two vectors and subtract from a third vector.
Definition vector.hpp:497
ace argon_type Extract(argon_type b) const
Extract n elements from the lower end of the operand, and the remaining elements from the top end of ...
Definition vector.hpp:1283
constexpr VectorType vec() const
Get the underlying SIMD vector.
Definition vector.hpp:275
ace argon_type ReciprocalEstimate() const
1 / value, using an estimate for speed
Definition vector.hpp:568
ace argon_type Multiply(argon_type b) const
Multiply two vectors.
Definition vector.hpp:418
ace argon_bool_type LessThanOrEqual(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than or equal to ...
Definition vector.hpp:739
ace argon_type operator--() const
Decrement the vector by 1 and return the result.
Definition vector.hpp:236
ace argon_type ShiftLeft(const int i) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:354
static ace argon_type Load(const scalar_type *ptr)
Load a vector from a pointer.
Definition vector.hpp:863
ace argon_type operator+(argon_type b) const
Add a vector and return the result.
Definition vector.hpp:203
ace argon_type ShiftLeft(helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const
Shift the elemnets of the vector to the left by a specified number of bits.
Definition vector.hpp:751
ace argon_type Multiply(const_lane_type< LaneIndex > b) const
Multiply a vector by a lane value.
Definition vector.hpp:441
ace argon_type PairwiseAdd(argon_type b) const
Pairwise ops.
Definition vector.hpp:1152
ace argon_type MultiplyAdd(scalar_type b, argon_type c) const
Multiply a vector by a scalar value and add a third vector.
Definition vector.hpp:468
ace argon_bool_type operator>=(argon_type b) const
Compare two vectors, checking if this vector is greater than or equal to the other.
Definition vector.hpp:230
ace argon_type MultiplyAdd(argon_type b, lane_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:473
ace argon_bool_type operator==(argon_type b) const
Compare two vectors for equality.
Definition vector.hpp:215
ace argon_type MultiplyAdd(argon_type b, argon_type c) const
Multiply two vectors and add a third vector.
Definition vector.hpp:448
ace argon_type BitwiseSelect(ArgType true_value, ArgType false_value) const
Bitwise select between two vectors, using the current vector as a mask.
Definition vector.hpp:1231
ace argon_type ShiftLeft(std::make_signed_t< simd::Scalar_t< Bool_t< VectorType > > > n) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:763
ace argon_type BitwiseOrNot(argon_type b) const
Bitwise OR of the vector with the NOT of another vector.
Definition vector.hpp:1205
ace helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > CountLeadingSignBits() const
Count the number of consecutive bits following the sign bit that are set to the same value as the sig...
Definition vector.hpp:1252
static ace argon_type FromLane(argon::ConstLane< LaneIndex, VectorType > lane)
Constructs a Vector from a ConstLane object.
Definition vector.hpp:136
ace argon_type AddSaturate(argon_type b) const
Adds two vectors, saturating the result.
Definition vector.hpp:385
std::array< argon_type, 2 > UnzipWith(argon_type b)
Unzip two vectors, returning two vectors of pairs.
Definition vector.hpp:1325
ace argon_type operator++() const
Increment the vector by 1 and return the result.
Definition vector.hpp:233
ace argon_type MultiplyAddFixedQMax(argon_type b, arg_type c) const
Multiply-add three fixed-point vectors, returning a fixed-point sum.
Definition vector.hpp:655
ace argon_type operator^(argon_type b) const
Bitwise XOR two vectors and return the result.
Definition vector.hpp:245
ace argon_type ReciprocalSqrtStep(argon_type b) const
Newton-Raphson step for reciprocal-sqrt refinement: (3 - a * b) / 2.
Definition vector.hpp:614
ace argon_bool_type Equal(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if are equal.
Definition vector.hpp:731
ace void StoreTo(scalar_type *ptr) const
Store the vector to a pointer.
Definition vector.hpp:1132
ConstLane< LaneIndex, simd::Vec128_t< ScalarType > > const_lane_type
Definition vector.hpp:53
ace argon_type MultiplyRoundFixedQMax(lane_type l) const
Multiply a fixed-point vector by a lane value, returning a fixed-point product.
Definition vector.hpp:558
ace argon_type CountActiveBits() const
Count the number of bits that are set to one in the vector.
Definition vector.hpp:1264
ace argon_type MultiplyFixedQMax(scalar_type s) const
Multiply a QMax fixed-point vector by a scalar value, returning a fixed-point product.
Definition vector.hpp:533
ace predicate_type CompareTestNonzero(argon_type b) const
Ands the current vector with the given vector, then checks if nonzero.
Definition vector.hpp:1244
static ace std::array< argon_type, stride > LoadToLaneInterleaved(std::array< argon_type, stride > multi, const scalar_type *ptr)
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
Definition vector.hpp:1053
ace argon_type PairwiseMax(argon_type b) const
Select the maximum of each pair of lanes in the two vectors.
Definition vector.hpp:1157
ace argon_type operator/(argon_type b) const
Divide a vector and return the result.
Definition vector.hpp:212
ace argon_type BitwiseNot() const
Bitwise ops.
Definition vector.hpp:1168
ace argon_type Max(argon_type b) const
Compare the lanes of two vectors, copying the larger of each lane to the result.
Definition vector.hpp:712
static ace argon_type FromLane(argon::Lane< IntrinsicType > lane)
Definition vector.hpp:123
constexpr Vector & operator=(Vector &&other)=default
Move assignment operator for the Vector class.
ace argon_type ShiftLeftInsert(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, and then OR the result wi...
Definition vector.hpp:810
ace const lane_type GetLane(const size_t i) const
Get a single lane of the vector by index.
Definition vector.hpp:292
ace argon_type BitwiseAnd(argon_type b) const
Bitwise AND of the vector with another vector.
Definition vector.hpp:1177
ace argon_type MultiplyAdd(argon_type b, const_lane_type< LaneIndex > c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:484
ace argon_type Modulo(scalar_type b) const
Get the modulo of a vector and a scalar value.
Definition vector.hpp:702
ace argon_type Popcount() const
Count the number of bits that are set to one in the vector.
Definition vector.hpp:1277
ace argon_bool_type LessThan(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than b.
Definition vector.hpp:747
ace argon_type ShiftRight(const int i) const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:351
ace argon_type ShiftLeftRoundSaturate(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, rounding and saturating t...
Definition vector.hpp:796
ace Vector(scalar_type scalar)
Constructs a Vector from a scalar value.
Definition vector.hpp:79
ace argon_type ShiftRight() const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:816
ace argon_type Negate() const
Bitwise negate the vector and return the result.
Definition vector.hpp:357
ace void StoreLaneTo(scalar_type *ptr)
Store a lane of the vector to a pointer.
Definition vector.hpp:1138
ace argon_type Multiply(lane_type b) const
Multiply a vector by a lane value.
Definition vector.hpp:437
ace argon_type MultiplySubtract(argon_type b, scalar_type c) const
Multiply a vector by a scalar value and subtract from a third vector.
Definition vector.hpp:507
ace argon_type MultiplyAdd(const_lane_type< LaneIndex > b, argon_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:490
ace argon_type MultiplyRoundFixedQMax(scalar_type s) const
Multiply a fixed-point vector by a scalar value, returning a fixed-point product.
Definition vector.hpp:551
ace argon_bool_type GreaterThanOrEqual(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than or equal ...
Definition vector.hpp:735
ace argon_bool_type operator>(argon_type b) const
Compare two vectors, checking if this vector is greater than the other.
Definition vector.hpp:224
ace argon_type operator<<(const int i) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:266
ace argon_type AddHalve(argon_type b) const
Adds two vectors, halving the result.
Definition vector.hpp:376
static ace std::array< argon_type, stride > LoadGatherOffsetIndexInterleaved(const scalar_type *base_ptr, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Perform a Load-Gather of interleaved elements.
Definition vector.hpp:1070
ace argon_type Divide(argon_type b) const
Divide two vectors.
Definition vector.hpp:679
ace argon_type ShiftRightRound() const
Shift the elements of the vector to the right by a specified number of bits, rounding the result.
Definition vector.hpp:826
Bool_t< simd::Vec128_t< ScalarType > > predicate_type
Definition vector.hpp:58
static ace std::array< argon_type, stride > LoadCopyInterleaved(const scalar_type *ptr)
Load multiple vectors from a pointer, duplicating the value across all lanes.
Definition vector.hpp:989
static ace int size()
Get the number of elements.
Definition vector.hpp:1367
static ace argon_type LoadGatherOffsetBytes(const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Using a base address and a vector of offset bytes and a base pointer, create a new vector.
Definition vector.hpp:887
ace argon_bool_type GreaterThan(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than b.
Definition vector.hpp:743
ace argon_type ReciprocalStep(argon_type b) const
Newton-Raphson step for reciprocal refinement: (2 - a * b) / 2.
Definition vector.hpp:601
ace std::array< argon_type, 2 > ZipWith(argon_type b) const
Zip two vectors together, returning two vectors of pairs.
Definition vector.hpp:1304
ace argon_type operator-(argon_type b) const
Subtract a vector and return the result.
Definition vector.hpp:206
ace argon_type MultiplyRoundAddFixedQMax(argon_type b, arg_type c) const
Multiply-round-add three fixed-point vectors, returning a fixed-point sum.
Definition vector.hpp:664
helpers::ArgonFor_t< simd::Vec128_t< ScalarType > > argon_type
Definition vector.hpp:57
static ace argon_type LoadScalar(const scalar_type *ptr)
Constructs a Vector from a scalar pointer.
Definition vector.hpp:104
ace argon_type ShiftRightAccumulateRound(argon_type b) const
Shift the elements of the b vector to the right by a specified number of bits, and then add the resul...
Definition vector.hpp:846
ace argon_type operator*(argon_type b) const
Multiply a vector and return the result.
Definition vector.hpp:209
ace predicate_type TestNonzero() const
Ands the current vector with the given vector, then checks if nonzero.
Definition vector.hpp:1247
static ace argon_type LoadCopy(const scalar_type *ptr)
Load a vector from a pointer, duplicating the value across all lanes.
Definition vector.hpp:872
ace argon_type LoadToLane(const scalar_type *ptr)
Load a lane from a pointer.
Definition vector.hpp:950
ace argon_type SubtractAbs(argon_type b) const
Subtract two vectors, taking the absolute value of the result.
Definition vector.hpp:405
ace argon_type ShiftLeft() const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:777
ace const_lane_type< lanes - 1 > LastLane()
Get the last lane of the vector.
Definition vector.hpp:348
ace argon_type ShiftLeftSaturate(helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const
Shift the elements of the vector to the left by a specified number of bits, saturating the result.
Definition vector.hpp:786
ace argon_type MultiplyFixedQMax(lane_type l) const
Multiply a QMax fixed-point vector by a lane value, returning a fixed-point product.
Definition vector.hpp:538
ace argon_type MultiplySubtract(scalar_type b, argon_type c) const
Multiply a vector by a scalar value and subtract from a third vector.
Definition vector.hpp:517
ace argon_bool_type operator<=(argon_type b) const
Compare two vectors, checking if this vector is less than or equal to the other.
Definition vector.hpp:227
ace argon_type ShiftLeftSaturate() const
Shift the elements of the vector to the left by a specified number of bits, saturating the result.
Definition vector.hpp:800
ace argon_type operator&(argon_type b) const
Bitwise AND two vectors and return the result.
Definition vector.hpp:239
ace argon_type MultiplyRoundFixedQMax(argon_type v) const
Multiply two fixed-point vectors, returning a fixed-point product.
Definition vector.hpp:545
ace argon_type SubtractSaturate(argon_type b) const
Subtract two vectors, saturating the result.
Definition vector.hpp:401
static ace std::array< argon_type, Stride > LoadToLaneInterleaved(simd::MultiVector_t< VectorType, Stride > multi, const scalar_type *ptr)
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
Definition vector.hpp:1019
ace argon_type ReciprocalSqrtEstimateRefine(int n_iters=1) const
Compute a refined reciprocal-sqrt estimate using Newton-Raphson iterations.
Definition vector.hpp:640
static ace std::array< argon_type, n > LoadMulti(const scalar_type *ptr)
Load n vectors from a single contiguous set of memory.
Definition vector.hpp:1090
static ace argon_type LoadGatherOffsetIndex(const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Using a base address and a vector of offset indices and a base pointer, create a new vector.
Definition vector.hpp:919
helpers::ArgonFor_t< predicate_type > argon_bool_type
Definition vector.hpp:59
ace argon_type operator|(argon_type b) const
Bitwise OR two vectors and return the result.
Definition vector.hpp:242
ace argon_type MultiplyFixedQMax(argon_type v) const
Multiply two QMax fixed-point vectors, returning a fixed-point product.
Definition vector.hpp:529
ace argon_type MultiplyAdd(argon_type b, scalar_type c) const
Multiply a vector by a scalar value and add a third vector.
Definition vector.hpp:458
simd::Scalar_t< simd::Vec128_t< ScalarType > > scalar_type
Definition vector.hpp:55
ace const const_lane_type< LaneIndex > GetLane() const
Get a single lane of the vector by index.
Definition vector.hpp:330
ace argon_type Select(ArgType true_value, ArgType false_value) const
Bitwise select between two vectors, using the current vector as a mask.
Definition vector.hpp:1238
ace argon_type operator~() const
Bitwise NOT the vector and return the result.
Definition vector.hpp:248
ace Vector(argon::ConstLane< LaneIndex, VectorType > lane)
Constructs a Vector from a ConstLane object.
Definition vector.hpp:92
ace std::array< scalar_type, lanes > to_array()
Convert the vector to an array of scalar values.
Definition vector.hpp:282
ace argon_type Absolute() const
Get the absolute value of the vector.
Definition vector.hpp:564
static constexpr size_t lanes
Definition vector.hpp:62
ace lane_type operator[](const size_t i)
Access a lane of the vector by index.
Definition vector.hpp:254
ace argon_type BitwiseClear(argon_type b) const
Bitwise AND of the vector with the NOT of another vector.
Definition vector.hpp:1224
ace argon_type BitwiseAndNot(argon_type b) const
Bitwise AND of the vector with the NOT of another vector.
Definition vector.hpp:1215
ace argon_type ShiftRightAccumulate(argon_type b) const
Shift the elements of the b vector to the right by a specified number of bits, and then add the resul...
Definition vector.hpp:834
ace argon_type operator>>(const int i) const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:257
ace argon_type Add(argon_type b) const
Add two vectors.
Definition vector.hpp:366
ace argon_type BitwiseOr(argon_type b) const
Bitwise OR of the vector with another vector.
Definition vector.hpp:1186
ace argon_type ReciprocalSqrtEstimate() const
1 / sqrt(value), using an estimate for speed
Definition vector.hpp:584
constexpr Vector()=default
The default constructor for the Vector class.
ace const lane_type GetLane(const int i) const
Get a single lane of the vector by index.
Definition vector.hpp:310
constexpr Vector & operator=(const Vector &other)=default
Copy assignment operator for the Vector class.
static ace argon_type GenerateWithIndex(FuncType body)
Constructs a Vector from a function that generates values with an index.
Definition vector.hpp:191
ace argon_bool_type operator!=(argon_type b) const
Compare two vectors for inequality.
Definition vector.hpp:218
ace argon_type CountLeadingZeroBits() const
Count the number of consecutive top bits that are set to zero.
Definition vector.hpp:1260
ace argon_type Min(argon_type b) const
Compare the lanes of two vectors, copying the smaller of each lane to the result.
Definition vector.hpp:721
ace argon_type operator-() const
Negate the SIMD vector and return the result.
Definition vector.hpp:200
ace argon_type SubtractHalve(argon_type b) const
Subtract two vectors, halving the result.
Definition vector.hpp:398
ace argon_type MultiplyAdd(lane_type b, argon_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:479
ace argon_type ShiftRightInsert(argon_type b) const
Shift the elements of the vector to the right by a specified number of bits, ORing the result with th...
Definition vector.hpp:858
ace argon_type SubtractAbsAdd(argon_type b, argon_type c) const
Subtract two vectors, taking the absolute value of the result and adding a third vector.
Definition vector.hpp:409
constexpr Vector(Vector &&other)=default
Move constructor for the Vector class.
ace argon_type Subtract(argon_type b) const
Subtract two vectors.
Definition vector.hpp:388
ace argon_type BitwiseXor(argon_type b) const
Bitwise XOR of the vector with another vector.
Definition vector.hpp:1195
ace Vector(argon::Lane< VectorType > lane)
Constructs a Vector from a Lane object.
Definition vector.hpp:85
static ace argon_type Generate(FuncType body)
Constructs a Vector from a function that generates values.
Definition vector.hpp:176
ace argon_type AddHalveRound(argon_type b) const
Adds two vectors, halving and rounding the result.
Definition vector.hpp:380
ace Lane< const VectorType > operator[](const size_t i) const
Access a lane of the vector by index.
Definition vector.hpp:251
Definition vector.hpp:36
Header file for SIMD features and platform detection.
typename ArgonFor< std::remove_cv_t< T > >::type ArgonFor_t
Helper alias to get the Argon type for a given vector type.
Definition argon_for.hpp:45
Lane deconstruction feature.
Definition argon_full.hpp:399
Helper functions to convert C-style arrays to std::array of Argon types, mimicking std::to_array.
constexpr std::array< helpers::ArgonFor_t< T >, N > to_array(T(&a)[N])
Convert a C-style array of vector types to a std::array of Argon types.
Definition to_array.hpp:29