argon/argon_2vector_8hpp_source.html

#pragma once

#include <array>

#include <bit>

#include <cmath>

#include <functional>

#include <tuple>

#include <type_traits>

#include <utility>

#include "arm_simd.hpp"

#include "arm_simd/helpers.hpp"

#include "arm_simd/helpers/multivector.hpp"

#include "arm_simd/helpers/scalar.hpp"

#include "arm_simd/helpers/vec64.hpp"

#include "features.h"

#include "helpers.hpp"

#include "helpers/bool.hpp"

#include "helpers/to_array.hpp"

#include "lane.hpp"


#ifdef __ARM_FEATURE_MVE

#define simd mve

#else

#define simd neon

#endif


#ifdef ARGON_PLATFORM_SIMDE

#define ace

#elifdef __clang__

#define ace [[gnu::always_inline]] constexpr

#else

#define ace [[gnu::always_inline]] inline

#endif


namespace argon {

template <typename T>

concept arithmetic = std::is_arithmetic_v<T>;


template <typename T, typename... Ts>

inline constexpr bool is_one_of = std::disjunction_v<std::is_same<T, Ts>...>;


template <typename VectorType>


class Vector {

 public:

  template <size_t LaneIndex>

  using const_lane_type = ConstLane<LaneIndex, VectorType>;

  using lane_type = Lane<VectorType>;

  using scalar_type = simd::Scalar_t<VectorType>;

  using vector_type = VectorType;

  using argon_type = helpers::ArgonFor_t<VectorType>;

  using predicate_type = Bool_t<VectorType>;

  using argon_bool_type = helpers::ArgonFor_t<predicate_type>;


  static constexpr size_t lanes = (simd::is_quadword_v<VectorType> ? 16 : 8) / sizeof(scalar_type);


  constexpr Vector() = default;


  constexpr Vector(Vector&& other) = default;

  constexpr Vector(const Vector& other) = default;

  constexpr Vector& operator=(Vector&& other) = default;

  constexpr Vector& operator=(const Vector& other) = default;


  ace Vector(VectorType vector) : vec_{std::move(vector)} {};


  ace Vector(scalar_type scalar) : vec_(FromScalar(scalar)) {};


#ifndef ARGON_PLATFORM_MVE

  ace Vector(argon::Lane<VectorType> lane) : vec_(FromLane(lane)) {};


  template <size_t LaneIndex>

  ace Vector(argon::ConstLane<LaneIndex, VectorType> lane) : vec_(FromLane(lane)) {};

#endif


  template <typename... ArgTypes>

    requires(sizeof...(ArgTypes) > 1)

  ace Vector(ArgTypes... args) : vec_{std::forward<ArgTypes>(args)...} {}


  ace static argon_type LoadScalar(const scalar_type* ptr) { return LoadCopy(ptr); }


  ace static argon_type FromScalar(scalar_type scalar) {

#ifdef ARGON_PLATFORM_MVE

    return simd::duplicate(scalar);

#else

    return simd::duplicate<VectorType>(scalar);

#endif

  }


  template <simd::is_vector_type IntrinsicType>


  ace static argon_type FromLane(argon::Lane<IntrinsicType> lane) {

#ifdef ARGON_PLATFORM_MVE

    return simd::duplicate(lane.Get());

#else

    return simd::duplicate_lane<vector_type>(lane.vec(), lane.lane());

#endif

  }


  template <size_t LaneIndex>


  ace static argon_type FromLane(argon::ConstLane<LaneIndex, VectorType> lane) {

#ifdef ARGON_PLATFORM_MVE

    return simd::duplicate(lane.Get());

#else

    if constexpr (simd::is_quadword_v<VectorType>) {

      return simd::duplicate_lane_quad<LaneIndex>(lane.vec());

    } else {

      return simd::duplicate_lane<LaneIndex>(lane.vec());

    }

#endif

  }


  ace static argon_type Iota(scalar_type start) {

    // TODO: Remove this once MSVC 19.44 is released.

#if __cpp_if_consteval >= 202106L

    return IotaHelper(start, std::make_index_sequence<lanes>{});

#else

    return Argon{start}.Add(VectorType{0, 1, 2, 3});

#endif

  }


  template <typename FuncType>

    requires std::convertible_to<FuncType, std::function<scalar_type()>>


  ace static argon_type Generate(FuncType body) {

    VectorType out;

    utility::constexpr_for<0, lanes, 1>([&](size_t i) {  //

      out[i] = body();

    });

    return out;

  }


  template <typename FuncType>

    requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type)>>


  ace static argon_type GenerateWithIndex(FuncType body) {

    VectorType out;

    utility::constexpr_for<0, lanes, 1>([&]<size_t i>() {  //

      out[i] = body(i);

    });

    return out;

  }


  ace argon_type operator-() const { return Negate(); }


  ace argon_type operator+(argon_type b) const { return Add(b); }


  ace argon_type operator-(argon_type b) const { return Subtract(b); }


  ace argon_type operator*(argon_type b) const { return Multiply(b); }


  ace argon_type operator/(argon_type b) const { return Divide(b); }


  ace argon_bool_type operator==(argon_type b) const { return Equal(b); }


  ace argon_bool_type operator!=(argon_type b) const { return ~Equal(b); }


  ace argon_bool_type operator<(argon_type b) const { return LessThan(b); }


  ace argon_bool_type operator>(argon_type b) const { return GreaterThan(b); }


  ace argon_bool_type operator<=(argon_type b) const { return LessThanOrEqual(b); }


  ace argon_bool_type operator>=(argon_type b) const { return GreaterThanOrEqual(b); }


  ace argon_type operator++() const { return Add(1); }


  ace argon_type operator--() const { return Subtract(1); }


  ace argon_type operator&(argon_type b) const { return BitwiseAnd(b); }


  ace argon_type operator|(argon_type b) const { return BitwiseOr(b); }


  ace argon_type operator^(argon_type b) const { return BitwiseXor(b); }


  ace argon_type operator~() const { return BitwiseNot(); }


  ace Lane<const VectorType> operator[](const size_t i) const { return GetLane(i); }


  ace lane_type operator[](const size_t i) { return GetLane(i); }


  ace argon_type operator>>(const int i) const {

#if ARGON_USE_COMPILER_EXTENSIONS

    return vec_ >> i;

#else

    return ShiftRight(i);

#endif

  }


  ace argon_type operator<<(const int i) const {

#if ARGON_USE_COMPILER_EXTENSIONS

    return vec_ << i;

#else

    return ShiftLeft(i);

#endif

  }


  [[gnu::always_inline]] constexpr VectorType vec() const { return vec_; }


  [[gnu::always_inline]] constexpr operator VectorType() const { return vec_; }


  ace std::array<scalar_type, lanes> to_array() {

    std::array<scalar_type, lanes> out;

    simd::store1(out.data(), vec_);

    return out;

  }


  ace const lane_type GetLane(const size_t i) const {

#ifdef ARGON_PLATFORM_MVE

    return vec_[i];

#else

    return {vec_, static_cast<int>(i)};

#endif

  }


  ace lane_type GetLane(const size_t i) {

#ifdef ARGON_PLATFORM_MVE

    return vec_[i];

#else

    return {vec_, static_cast<int>(i)};

#endif

  }


  ace const lane_type GetLane(const int i) const {

#ifdef ARGON_PLATFORM_MVE

    return vec_[i];

#else

    return {vec_, i};

#endif

  }


  ace lane_type GetLane(const int i) {

#ifdef ARGON_PLATFORM_MVE

    return vec_[i];

#else

    return {vec_, i};

#endif

  }


  template <size_t LaneIndex>


  ace const const_lane_type<LaneIndex> GetLane() const {

#ifdef ARGON_PLATFORM_MVE

    return vec_[LaneIndex];

#else

    return vec_;

#endif

  }


  template <size_t LaneIndex>

  ace const_lane_type<LaneIndex> GetLane() {

#ifdef ARGON_PLATFORM_MVE

    return vec_[LaneIndex];

#else

    return vec_;

#endif

  }


  ace const_lane_type<lanes - 1> LastLane() { return vec_; }


  ace argon_type ShiftRight(const int i) const { return simd::shift_right(vec_, i); }


  ace argon_type ShiftLeft(const int i) const { return simd::shift_left(vec_, i); }


  ace argon_type Negate() const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return -vec_;

    } else {

      return simd::negate(vec_);

    }

  }


  ace argon_type Add(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ + b.vec_;

    } else {

      return simd::add(vec_, b);

    }

  }


  ace argon_type AddHalve(argon_type b) const { return simd::add_halve(vec_, b); }


  ace argon_type AddHalveRound(argon_type b) const { return simd::add_halve_round(vec_, b); }


  ace argon_type AddSaturate(argon_type b) const { return simd::add_saturate(vec_, b); }


  ace argon_type Subtract(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ - b.vec_;

    } else {

      return simd::subtract(vec_, b);

    }

  }


  ace argon_type SubtractHalve(argon_type b) const { return simd::subtract_halve(vec_, b); }


  ace argon_type SubtractSaturate(argon_type b) const { return simd::subtract_saturate(vec_, b); }


  ace argon_type SubtractAbs(argon_type b) const { return simd::subtract_absolute(vec_, b); }


  ace argon_type SubtractAbsAdd(argon_type b, argon_type c) const {

#ifdef ARGON_PLATFORM_MVE

    return mve::add(vec_, mve::subtract_absolute(b, c));

#else

    return neon::subtract_absolute_add(vec_, b, c);

#endif

  }


  ace argon_type Multiply(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ * b.vec_;

    } else {

      return simd::multiply(vec_, b);

    }

  }


  ace argon_type Multiply(scalar_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ * b;

    } else {

      return simd::multiply(vec_, b);

    }

  }


#ifndef ARGON_PLATFORM_MVE

  ace argon_type Multiply(lane_type b) const { return neon::multiply_lane(vec_, b.vec(), b.lane()); }


  template <size_t LaneIndex>


  ace argon_type Multiply(const_lane_type<LaneIndex> b) const {

    return neon::multiply_lane(vec_, b.vec(), b.lane());

  }


#endif


  ace argon_type MultiplyAdd(argon_type b, argon_type c) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ + b.vec_ * c.vec_;

    } else {

      return simd::multiply_add(vec_, b, c);

    }

  }


  ace argon_type MultiplyAdd(argon_type b, scalar_type c) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ + b.vec_ * c;

    } else {

      return simd::multiply_add(vec_, b, c);

    }

  }


  ace argon_type MultiplyAdd(scalar_type b, argon_type c) const { return MultiplyAdd(c, b); }


#ifndef ARGON_PLATFORM_MVE


  ace argon_type MultiplyAdd(argon_type b, lane_type c) const {

    return simd::multiply_add_lane(vec_, b.vec(), c.vec(), c.lane());

  }


  ace argon_type MultiplyAdd(lane_type b, argon_type c) const { return MultiplyAdd(c, b); }


  template <size_t LaneIndex>


  ace argon_type MultiplyAdd(argon_type b, const_lane_type<LaneIndex> c) const {

    return simd::multiply_add_lane(vec_, b.vec(), c.vec(), c.lane());

  }


  template <size_t LaneIndex>


  ace argon_type MultiplyAdd(const_lane_type<LaneIndex> b, argon_type c) const {

    return MultiplyAdd(c, b);

  }


#endif


  ace argon_type MultiplySubtract(argon_type b, argon_type c) const {

#if ARGON_USE_COMPILER_EXTENSIONS

    return vec_ - b.vec_ * c.vec_;

#else

    return simd::multiply_subtract(vec_, b, c);

#endif

  }


  ace argon_type MultiplySubtract(argon_type b, scalar_type c) const {

#if ARGON_USE_COMPILER_EXTENSIONS

    return vec_ - b.vec_ * c;

#else

    return simd::multiply_subtract(vec_, b, c);

#endif

  }


  ace argon_type MultiplySubtract(scalar_type b, argon_type c) const { return MultiplySubtract(c, b); }


#ifndef ARGON_PLATFORM_MVE


  ace argon_type MultiplySubtract(argon_type b, lane_type c) const {

    return simd::multiply_subtract_lane(vec_, b.vec(), c.vec(), c.lane());

  }


#endif


  ace argon_type MultiplyFixedQMax(argon_type v) const { return simd::multiply_double_saturate_high(vec_, v); }


  ace argon_type MultiplyFixedQMax(scalar_type s) const { return simd::multiply_double_saturate_high(vec_, s); }


#ifndef ARGON_PLATFORM_MVE


  ace argon_type MultiplyFixedQMax(lane_type l) const {

    return simd::multiply_double_saturate_high_lane(vec_, l.vec(), l.lane());

  }


#endif


  ace argon_type MultiplyRoundFixedQMax(argon_type v) const {

    return simd::multiply_double_round_saturate_high(vec_, v);

  }


  ace argon_type MultiplyRoundFixedQMax(scalar_type s) const {

    return simd::multiply_double_round_saturate_high(vec_, s);

  }


#ifndef ARGON_PLATFORM_MVE


  ace argon_type MultiplyRoundFixedQMax(lane_type l) const {

    return simd::multiply_double_round_saturate_high_lane(vec_, l.vec(), l.lane());

  }


#endif


  ace argon_type Absolute() const { return simd::abs(vec_); }


  ace argon_type ReciprocalEstimate() const

    requires std::floating_point<scalar_type> || std::is_same_v<scalar_type, uint32_t>

  {

#ifdef ARGON_PLATFORM_MVE

    if constexpr (std::is_same_v<scalar_type, uint32_t>) {

      std::numeric_limits<uint32_t>::max() / vec_;

    } else {

      return 1.f / vec_;

    }

#else

    return simd::reciprocal_estimate(vec_);

#endif

  }


  template <typename arg_type>

    requires(is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> ||

             std::is_convertible_v<arg_type, scalar_type>)


  ace argon_type MultiplyAddFixedQMax(argon_type b, arg_type c) const {

    return Add(b.MultiplyFixedQMax(c));

  }


  template <typename arg_type>

    requires(is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> ||

             std::is_convertible_v<arg_type, scalar_type>)


  ace argon_type MultiplyRoundAddFixedQMax(argon_type b, arg_type c) const {

    return Add(b.MultiplyRoundFixedQMax(c));

  }


#ifdef __aarch64__

  ace argon_type Divide(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ / b.vec_;

    } else {

      return simd::divide(vec_, b);

    }

  }

#else


  ace argon_type Divide(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ / b.vec_;

    } else {

      return this->map2(b, [](scalar_type lane1, scalar_type lane2) { return lane1 / lane2; });

    }

  }


#endif


  ace argon_type Modulo(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ % b.vec_;

    } else if constexpr (std::floating_point<scalar_type>) {

      return this->map2(b, [](scalar_type lane1, scalar_type lane2) { return std::fmod(lane1, lane2); });

    } else {

      return this->map2(b, [](scalar_type lane1, scalar_type lane2) { return lane1 % lane2; });

    }

  }


  ace argon_type Modulo(scalar_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ % b;

    } else {

      return this->map([b](scalar_type lane1) { return std::fmod(lane1, b); });

    }

  }


  ace argon_type Max(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ > b.vec_ ? vec_ : b.vec_;

    } else {

      return simd::max(vec_, b);

    }

  }


  ace argon_type Min(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ < b.vec_ ? vec_ : b.vec_;

    } else {

      return simd::min(vec_, b);

    }

  }


  ace argon_bool_type Equal(argon_type b) const { return simd::equal(vec_, b); }


  ace argon_bool_type GreaterThanOrEqual(argon_type b) const { return simd::greater_than_or_equal(vec_, b); }


  ace argon_bool_type LessThanOrEqual(argon_type b) const { return simd::less_than_or_equal(vec_, b); }


  ace argon_bool_type GreaterThan(argon_type b) const { return simd::greater_than(vec_, b); }


  ace argon_bool_type LessThan(argon_type b) const { return simd::less_than(vec_, b); }


  ace argon_type ShiftLeft(helpers::ArgonFor_t<simd::make_signed_t<Bool_t<VectorType>>> b) const

    requires std::is_integral_v<scalar_type>

  {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ << b.vec_;

    } else {

      return simd::shift_left(vec_, b.vec_);

    }

  }


  ace argon_type ShiftLeft(std::make_signed_t<simd::Scalar_t<Bool_t<VectorType>>> n) const

    requires std::is_integral_v<scalar_type>

  {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ << n;

    } else {

      helpers::ArgonFor_t<simd::make_signed_t<VectorType>> b{n};

      return simd::shift_left(vec_, b.vec_);

    }

  }


  template <int n>


  ace argon_type ShiftLeft() const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ << n;

    } else {

      return simd::shift_left<n>(vec_);

    }

  }


  ace argon_type ShiftLeftSaturate(helpers::ArgonFor_t<simd::make_signed_t<Bool_t<VectorType>>> b) const

    requires(std::is_integral_v<scalar_type>)

  {

    return simd::shift_left_saturate(vec_, b);

  }


  ace argon_type ShiftLeftRound(argon_type b) const { return simd::shift_left_round(vec_, b); }


  ace argon_type ShiftLeftRoundSaturate(argon_type b) const { return simd::shift_left_round_saturate(vec_, b); }


  template <int n>


  ace argon_type ShiftLeftSaturate() const {

    return simd::shift_left_saturate<n>(vec_);

  }


  template <int n>


  ace argon_type ShiftLeftInsert(argon_type b) const {

    return simd::shift_left_insert<n>(vec_, b);

  }


  template <int n>


  ace argon_type ShiftRight() const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ >> n;

    } else {

      return simd::shift_right<n>(vec_);

    }

  }


  template <int n>


  ace argon_type ShiftRightRound() const {

    return simd::shift_right_round<n>(vec_);

  }


  template <int n>


  ace argon_type ShiftRightAccumulate(argon_type b) const {

#ifdef ARGON_PLATFORM_MVE

    return vec_ + (b >> n);

#else

    return simd::shift_right_accumulate<n>(vec_, b);

#endif

  }


  template <int n>


  ace argon_type ShiftRightAccumulateRound(argon_type b) const {

#ifdef ARGON_PLATFORM_MVE

    return vec_ + mve::shift_right_round<n>(b);

#else

    return simd::shift_right_accumulate_round<n>(vec_, b);

#endif

  }


  template <int n>


  ace argon_type ShiftRightInsert(argon_type b) const {

    return simd::shift_right_insert<n>(vec_, b);

  }


  ace static argon_type Load(const scalar_type* ptr) {

#ifdef ARGON_PLATFORM_MVE

    return mve::load1(ptr);

#else

    return neon::load1<VectorType>(ptr);

#endif

  }


  ace static argon_type LoadCopy(const scalar_type* ptr) {

#ifdef ARGON_PLATFORM_MVE

    scalar_type val = *ptr;

    VectorType vec;

    utility::constexpr_for<0, lanes, 1>([val, &vec]<int i>() { vec[i] = val; });

#else

    return simd::load1_duplicate<VectorType>(ptr);

#endif

  }


  ace static argon_type LoadGatherOffsetBytes(

      const scalar_type* base,

      helpers::ArgonFor_t<simd::make_unsigned_t<Bool_t<VectorType>>> offset_vector) {

#ifdef ARGON_PLATFORM_MVE

    static_assert(

        sizeof(scalar_type) == 1 || sizeof(scalar_type) == 2 || sizeof(scalar_type) == 4 || sizeof(scalar_type) == 8,

        "Unsupported size for gather load");


    if constexpr (sizeof(scalar_type) == 1) {

      return mve::load_byte_gather_offset(base, offset_vector);

    } else if constexpr (sizeof(scalar_type) == 2) {

      return mve::load_halfword_gather_offset(base, offset_vector);

    } else if constexpr (sizeof(scalar_type) == 4) {

      return mve::load_word_gather_offset(base, offset_vector);

    } else if constexpr (sizeof(scalar_type) == 8) {

      return mve::load_doubleword_gather_offset(base, offset_vector);

    }

#else

    argon_type destination;

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      auto lane_val = neon::get_lane<i>(offset_vector);

      destination = destination.template LoadToLane<i>(base + (lane_val * sizeof(scalar_type)));

    });

    return destination;

#endif

  }


  ace static argon_type LoadGatherOffsetIndex(

      const scalar_type* base,

      helpers::ArgonFor_t<simd::make_unsigned_t<Bool_t<VectorType>>> offset_vector) {

#ifdef ARGON_PLATFORM_MVE

    static_assert(

        sizeof(scalar_type) == 1 || sizeof(scalar_type) == 2 || sizeof(scalar_type) == 4 || sizeof(scalar_type) == 8,

        "Unsupported size for gather load");


    if constexpr (sizeof(scalar_type) == 1) {

      return mve::load_byte_gather_offset(base, offset_vector);

    } else if constexpr (sizeof(scalar_type) == 2) {

      return mve::load_halfword_gather_offset(base, offset_vector * sizeof(scalar_type));

    } else if constexpr (sizeof(scalar_type) == 4) {

      return mve::load_word_gather_offset(base, offset_vector * sizeof(scalar_type));

    } else if constexpr (sizeof(scalar_type) == 8) {

      return mve::load_doubleword_gather_offset(base, offset_vector * sizeof(scalar_type));

    }

#else

    argon_type destination;

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      auto lane_val = neon::get_lane<i>(offset_vector);

      destination = destination.template LoadToLane<i>(base + lane_val);

    });

    return destination;

#endif

  }


  template <size_t lane>


  ace argon_type LoadToLane(const scalar_type* ptr) {

    argon_type new_argon = *this;

    return new_argon.template GetLane<lane>().Load(ptr);

  }


  template <size_t stride>


  ace static std::array<argon_type, stride> LoadInterleaved(const scalar_type* ptr) {

#ifdef ARGON_PLATFORM_MVE

    static_assert(stride == 2 || stride == 4,

                  "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");

    if constexpr (stride == 2) {

      return argon::to_array(mve::load2(ptr).val);

    } else if constexpr (stride == 4) {

      return argon::to_array(mve::load4(ptr).val);

    }

#else

    static_assert(stride > 1 && stride < 5, "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");

    using multivec_type = simd::MultiVector_t<VectorType, stride>;

    if constexpr (stride == 2) {

      return argon::to_array(neon::load2<multivec_type>(ptr).val);

    } else if constexpr (stride == 3) {

      return argon::to_array(neon::load3<multivec_type>(ptr).val);

    } else if constexpr (stride == 4) {

      return argon::to_array(neon::load4<multivec_type>(ptr).val);

    }

#endif

  }


  template <size_t stride>


  ace static std::array<argon_type, stride> LoadCopyInterleaved(const scalar_type* ptr) {

#ifdef ARGON_PLATFORM_MVE

    static_assert(stride == 2 || stride == 4,

                  "De-interleaving LoadCopy can only be performed with a stride of 2, 3, or 4");

    if constexpr (stride == 2) {

      return {mve::duplicate(*ptr++), mve::duplicate(*ptr++)};

    } else if constexpr (stride == 4) {

      return {mve::duplicate(*ptr++), mve::duplicate(*ptr++), mve::duplicate(*ptr++), mve::duplicate(*ptr)};

    }

#else

    static_assert(stride > 1 && stride < 5,

                  "De-interleaving LoadCopy can only be performed with a stride of 2, 3, or 4");

    using multivec_type = simd::MultiVector<VectorType, stride>::type;

    if constexpr (stride == 2) {

      return argon::to_array(simd::load2_duplicate<multivec_type>(ptr).val);

    } else if constexpr (stride == 3) {

      return argon::to_array(simd::load3_duplicate<multivec_type>(ptr).val);

    } else if constexpr (stride == 4) {

      return argon::to_array(simd::load4_duplicate<multivec_type>(ptr).val);

    }

#endif

  }


  template <size_t LaneIndex, size_t Stride>


  ace static std::array<argon_type, Stride> LoadToLaneInterleaved(simd::MultiVector_t<VectorType, Stride> multi,

                                                                  const scalar_type* ptr) {

    static_assert(Stride > 1 && Stride < 5, "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");

#ifdef ARGON_PLATFORM_MVE

    auto out = multi;

    utility::constexpr_for<0, Stride, 1>([&]<int i>() {  //<

      out.val[i][LaneIndex] = ptr[i];

    });

    return argon::to_array(out.val);

#else

    if constexpr (Stride == 2) {

      if constexpr (simd::is_quadword_v<VectorType>) {

        return argon::to_array(simd::load2_lane_quad<LaneIndex>(ptr, multi).val);

      } else {

        return argon::to_array(simd::load2_lane<LaneIndex>(ptr, multi).val);

      }

    } else if constexpr (Stride == 3) {

      if constexpr (simd::is_quadword_v<VectorType>) {

        return argon::to_array(simd::load3_lane_quad<LaneIndex>(ptr, multi).val);

      } else {

        return argon::to_array(simd::load3_lane<LaneIndex>(ptr, multi).val);

      }

    } else if constexpr (Stride == 4) {

      if constexpr (simd::is_quadword_v<VectorType>) {

        return argon::to_array(simd::load4_lane_quad<LaneIndex>(ptr, multi).val);

      } else {

        return argon::to_array(simd::load4_lane<LaneIndex>(ptr, multi).val);

      }

    }

#endif

  }


  template <size_t lane, size_t stride>


  ace static std::array<argon_type, stride> LoadToLaneInterleaved(std::array<argon_type, stride> multi,

                                                                  const scalar_type* ptr) {

    using multivec_type = simd::MultiVector_t<VectorType, stride>;

    return LoadToLaneInterleaved<lane, stride>(*(multivec_type*)multi.data(), ptr);

  }


  template <size_t stride>


  ace static std::array<argon_type, stride> LoadGatherOffsetIndexInterleaved(

      const scalar_type* base_ptr,

      helpers::ArgonFor_t<simd::make_unsigned_t<Bool_t<VectorType>>> offset_vector) {

    static_assert(stride > 1 && stride < 5, "De-interleaving Loads can only be performed with a stride of 2, 3, or 4");

    std::array<argon_type, stride> multi{};

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      auto lane_val = simd::get_lane<i>(offset_vector);

      multi = LoadToLaneInterleaved<i, stride>(multi, base_ptr + (lane_val * stride));

    });

    return multi;

  }


  template <size_t n>


  ace static std::array<argon_type, n> LoadMulti(const scalar_type* ptr) {

    static_assert(n > 1 && n < 5, "LoadMulti can only be performed with a size of 2, 3, or 4");

#ifdef ARGON_PLATFORM_MVE

    std::array<argon_type, n> multi{};

    utility::constexpr_for<0, n, 1>([&]<int i>() {  //<

      multi[i] = *ptr;

      ptr += lanes;

    });

    return multi;

#else

#if defined(__clang__) || (__GNUC__ > 13)

    using multi_type = simd::MultiVector_t<VectorType, n>;

    if constexpr (n == 2) {

      return argon::to_array(simd::load1_x2<multi_type>(ptr).val);

    } else if constexpr (n == 3) {

      return argon::to_array(simd::load1_x3<multi_type>(ptr).val);

    } else if constexpr (n == 4) {

      return argon::to_array(simd::load1_x4<multi_type>(ptr).val);

    }

#else

    if constexpr (n == 2) {

      auto a = simd::load1(ptr);

      auto b = simd::load1(ptr + lanes);

      return {a, b};

    } else if constexpr (n == 3) {

      auto a = simd::load1(ptr);

      auto b = simd::load1(ptr + lanes);

      auto c = simd::load1(ptr + 2 * lanes);

      return {a, b, c};

    } else if constexpr (n == 4) {

      auto a = simd::load1(ptr);

      auto b = simd::load1(ptr + lanes);

      auto c = simd::load1(ptr + 2 * lanes);

      auto d = simd::load1(ptr + 3 * lanes);

      return {a, b, c, d};

    }

#endif

#endif

  }


  ace void StoreTo(scalar_type* ptr) const { simd::store1(ptr, vec_); }


  template <int LaneIndex>


  ace void StoreLaneTo(scalar_type* ptr) {

#ifdef ARGON_PLATFORM_MVE

    *ptr = vec_[LaneIndex];

#else

    simd::store1_lane<LaneIndex>(ptr, vec_);

#endif

  }


#ifndef ARGON_PLATFORM_MVE


  ace argon_type PairwiseAdd(argon_type b) const { return simd::pairwise_add(vec_, b); }


  ace argon_type PairwiseMax(argon_type b) const { return simd::pairwise_max(vec_, b); }


  ace argon_type PairwiseMin(argon_type b) const { return simd::pairwise_min(vec_, b); }

#endif


  ace argon_type BitwiseNot() const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return ~vec_;

    } else {

      return simd::bitwise_not(vec_);

    }

  }


  ace argon_type BitwiseAnd(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ & b.vec_;

    } else {

      return simd::bitwise_and(vec_, b);

    }

  }


  ace argon_type BitwiseOr(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ | b.vec_;

    } else {

      return simd::bitwise_or(vec_, b);

    }

  }


  ace argon_type BitwiseXor(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ ^ b.vec_;

    } else {

      return simd::bitwise_xor(vec_, b);

    }

  }


  ace argon_type BitwiseOrNot(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ | ~b.vec_;

    } else {

      return simd::bitwise_or_not(vec_, b);

    }

  }


  ace argon_type BitwiseAndNot(argon_type b) const {

    if constexpr (ARGON_USE_COMPILER_EXTENSIONS) {

      return vec_ & ~b.vec_;

    } else {

      return simd::bitwise_clear(vec_, b);

    }

  }


  ace argon_type BitwiseClear(argon_type b) const { BitwiseAndNot(b); }


#ifndef ARGON_PLATFORM_MVE

  template <typename ArgType>

    requires std::is_unsigned_v<scalar_type>


  ace argon_type BitwiseSelect(ArgType true_value, ArgType false_value) const {

    return simd::bitwise_select(vec_, true_value, false_value);

  }


  template <typename ArgType>

    requires std::is_unsigned_v<scalar_type>


  ace argon_type Select(ArgType true_value, ArgType false_value) const {

    return simd::bitwise_select(true_value, false_value);

  }


  ace predicate_type CompareTestNonzero(argon_type b) const { return simd::compare_test_nonzero(vec_, b); }


  ace predicate_type TestNonzero() const { return simd::compare_test_nonzero(vec_, argon_type{1}); }

#endif


  ace helpers::ArgonFor_t<simd::make_signed_t<Bool_t<VectorType>>> CountLeadingSignBits() const

    requires(std::is_integral_v<scalar_type>)

  {

    return simd::count_leading_sign_bits(vec_);

  }


  ace argon_type CountLeadingZeroBits() const { return simd::count_leading_zero_bits(vec_); }


  ace argon_type CountActiveBits() const {

#ifdef ARGON_PLATFORM_MVE

    auto new_vec = vec_;

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      new_vec[i] = std::popcount(vec_[i]);

    });

    return new_vec;

#else

    return neon::count_active_bits(vec_);

#endif

  }


  ace argon_type Popcount() const { return CountActiveBits(); }


  template <int n>


  ace argon_type Extract(argon_type b) const {

#ifdef ARGON_PLATFORM_MVE

    auto new_vec = vec_;

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      if (i < n) {

        new_vec[i] = b.vec_[i];

      }

    });

    return new_vec;

#else

    return simd::extract<n>(vec_, b);

#endif

  }


  ace argon_type Reverse64bit() const { return simd::reverse_64bit(vec_); }

  ace argon_type Reverse32bit() const { return simd::reverse_32bit(vec_); }

  ace argon_type Reverse16bit() const { return simd::reverse_16bit(vec_); }


  ace std::array<argon_type, 2> ZipWith(argon_type b) const {

#ifdef ARGON_PLATFORM_MVE

    std::array<argon_type, 2> new_vec;

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      if (i % 2 == 0) {

        new_vec[0][i] = vec_[i / 2];

        new_vec[1][i] = vec_[(i + lanes) / 2];

      } else {

        new_vec[0][i] = b.vec_[i / 2];

        new_vec[1][i] = b.vec_[(i + lanes) / 2];

      }

    });

    return new_vec;

#else

    return argon::to_array(neon::zip(vec_, b.vec()).val);

#endif

  }


  std::array<argon_type, 2> UnzipWith(argon_type b) {

#ifdef ARGON_PLATFORM_MVE

    std::array<argon_type, 2> new_vec;

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      if ((i * 2) < lanes) {

        new_vec[0][i] = vec_[i * 2];

        new_vec[1][i] = vec_[i * 2 + 1];

      } else {

        new_vec[0][i] = b.vec_[i * 2];

        new_vec[1][i] = b.vec_[i * 2 + 1];

      }

    });

    return new_vec;

#else

    return argon::to_array(neon::unzip(vec_, b.vec()).val);

#endif

  }


  //                                    {b0, b1, b2, b3}}

  //                                    {a1, b1, a3, b3}}


  std::array<argon_type, 2> TransposeWith(argon_type b) const {

#ifdef ARGON_PLATFORM_MVE

    std::array<argon_type, 2> new_vec;

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      if (i % 2 == 1) {

        new_vec[0][i] = vec_[i];

        new_vec[1][i] = vec_[i + 1];

      } else {

        new_vec[0][i] = b.vec_[i + 1];

        new_vec[1][i] = b.vec_[i];

      }

    });

    return new_vec;

#else

    return argon::to_array(simd::transpose(vec_, b.vec()).val);

#endif

  }


  ace static int size() { return lanes; }


  template <typename FuncType>

    requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type)>>

  ace argon_type map(FuncType body) const {

    VectorType out;

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      out[i] = body(vec_[i]);

    });

    return out;

  }


  template <typename FuncType>

    requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type, int)>>

  ace argon_type map_with_index(FuncType body) const {

    VectorType out;

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      out[i] = body(vec_[i], i);

    });

    return out;

  }


  template <typename FuncType>

    requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type, scalar_type)>>

  ace argon_type map2(argon_type other, FuncType body) const {

    VectorType out;

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      out[i] = body(vec_[i], other.vec_[i]);

    });

    return out;

  }


  template <typename FuncType>

    requires std::convertible_to<FuncType, std::function<void(scalar_type&)>>

  ace argon_type each_lane(FuncType body) {

    VectorType out = vec_;

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      body(out[i]);

    });

    return out;

  }


  template <typename FuncType>

    requires std::convertible_to<FuncType, std::function<void(scalar_type&, int)>>

  ace argon_type each_lane_with_index(FuncType body) {

    VectorType out = vec_;

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      body(out[i], i);

    });

    return out;

  }


  template <typename FuncType>

    requires std::convertible_to<FuncType, std::function<void()>>

  ace void if_lane(FuncType true_branch) {

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      if (vec_[i] != 0) {

        true_branch();

      }

    });

  }


  template <typename FuncType>

    requires std::convertible_to<FuncType, std::function<void()>>

  ace void if_else_lane(FuncType true_branch, FuncType false_branch) {

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      if (vec_[i] != 0) {

        true_branch();

      } else {

        false_branch();

      }

    });

  }


  template <typename FuncType>

    requires std::convertible_to<FuncType, std::function<void(int)>>

  ace void if_lane_with_index(FuncType true_branch) {

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      if (vec_[i] != 0) {

        true_branch(i);

      }

    });

  }


  template <typename FuncType1, typename FuncType2>

    requires std::convertible_to<FuncType1, std::function<void(int)>> &&

             std::convertible_to<FuncType2, std::function<void(int)>>

  ace void if_else_lane_with_index(FuncType1 true_branch, FuncType2 false_branch) {

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      if (vec_[i] != 0) {

        true_branch(i);

      } else {

        false_branch(i);

      }

    });

  }


  ace bool any() {

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      if (vec_[i]) {

        return true;

      }

    });

    return false;

  }


  ace bool all() {

#ifdef ARGON_PLATFORM_MVE

    return mve::max_reduce_max(vec_, vec_) != 0;

#else

    auto nonzero = TestNonzero();

    utility::constexpr_for<0, lanes, 1>([&]<int i>() {  //<

      if (nonzero[i] == 0) {

        return false;

      }

    });

    return true;

#endif

  }


  template <std::size_t Index>

  std::tuple_element_t<Index, argon_type> get() {

#ifdef ARGON_PLATFORM_MVE

    return vec_[Index];

#else

    return GetLane<Index>();

#endif

  }


 protected:

  template <std::size_t... Ints>

  ace static argon_type IotaHelper(scalar_type start, std::index_sequence<Ints...>) {

    return VectorType{static_cast<scalar_type>(start + Ints)...};

  }


  VectorType vec_;

};


}  // namespace argon


namespace std {

template <typename T>


struct tuple_size<argon::Vector<T>> {

  static constexpr size_t value = argon::Vector<T>::lanes;

};


template <size_t Index, typename T>


struct tuple_element<Index, argon::Vector<T>> {

  static_assert(Index < argon::Vector<T>::lanes);

  using type = argon::Vector<T>::const_lane_type;

};


}  // namespace std


#undef ace

#undef simd

helpers.hpp
Provides utility templates and concepts for type traits and compile-time iteration.

Argon
Definition argon_full.hpp:24

argon::ConstLane
Represents a single lane of a SIMD vector, where the lane's index is known at compile time.
Definition lane.hpp:44

argon::Lane
Represents a single lane of a SIMD vector.
Definition lane.hpp:102

argon::const_lane_type

argon::Vector::PairwiseMin
ace argon_type PairwiseMin(argon_type b) const
Select the maximum of each pair of lanes in the two vectors.
Definition vector.hpp:1088

argon::Vector< simd::Vec128_t< ScalarType > >::lane_type
Lane< simd::Vec128_t< ScalarType > > lane_type
Definition vector.hpp:54

argon::Vector::MultiplySubtract
ace argon_type MultiplySubtract(argon_type b, lane_type c) const
Multiply a vector by a lane value and subtract from a third vector.
Definition vector.hpp:515

argon::Vector::Vector
ace Vector(VectorType vector)
Constructs a Vector from a SIMD vector type.
Definition vector.hpp:74

argon::Vector< simd::Vec128_t< ScalarType > >::vector_type
simd::Vec128_t< ScalarType > vector_type
Definition vector.hpp:56

argon::Vector::Modulo
ace argon_type Modulo(argon_type b) const
Get the modulo of two vectors.
Definition vector.hpp:615

argon::Vector::operator<
ace argon_bool_type operator<(argon_type b) const
Compare two vectors, checking if this vector is less than the other.
Definition vector.hpp:214

argon::Vector::ShiftLeftRound
ace argon_type ShiftLeftRound(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, rounding the result.
Definition vector.hpp:718

argon::Vector::GenerateWithIndex
static ace argon_type GenerateWithIndex(FuncType body)
Constructs a Vector from a function that generates values with an index.
Definition vector.hpp:184

argon::Vector::TransposeWith
std::array< argon_type, 2 > TransposeWith(argon_type b) const
Perform a 2x2 matrix transpose on two vectors, returning two vectors of pairs.
Definition vector.hpp:1274

argon::Vector::Vector
constexpr Vector(const Vector &other)=default
Copy constructor for the Vector class.

argon::Vector::LoadInterleaved
static ace std::array< argon_type, stride > LoadInterleaved(const scalar_type *ptr)
Load multiple vectors from a pointer, de-interleaving.
Definition vector.hpp:887

argon::Vector::Iota
static ace argon_type Iota(scalar_type start)
Constructs a Vector from an incrementing sequence.
Definition vector.hpp:154

argon::Vector< simd::Vec128_t< ScalarType > >::FromScalar
static ace argon_type FromScalar(scalar_type scalar)
Definition vector.hpp:110

argon::Vector::Multiply
ace argon_type Multiply(scalar_type b) const
Multiply a vector by a scalar value.
Definition vector.hpp:420

argon::Vector::MultiplySubtract
ace argon_type MultiplySubtract(argon_type b, argon_type c) const
Multiply two vectors and subtract from a third vector.
Definition vector.hpp:490

argon::Vector::Extract
ace argon_type Extract(argon_type b) const
Extract n elements from the lower end of the operand, and the remaining elements from the top end of ...
Definition vector.hpp:1209

argon::Vector::vec
constexpr VectorType vec() const
Get the underlying SIMD vector.
Definition vector.hpp:268

argon::Vector::ReciprocalEstimate
ace argon_type ReciprocalEstimate() const
1 / value, using an estimate for speed
Definition vector.hpp:561

argon::Vector::Multiply
ace argon_type Multiply(argon_type b) const
Multiply two vectors.
Definition vector.hpp:411

argon::Vector::LessThanOrEqual
ace argon_bool_type LessThanOrEqual(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than or equal to ...
Definition vector.hpp:664

argon::Vector::operator--
ace argon_type operator--() const
Decrement the vector by 1 and return the result.
Definition vector.hpp:229

argon::Vector::ShiftLeft
ace argon_type ShiftLeft(const int i) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:347

argon::Vector::Load
static ace argon_type Load(const scalar_type *ptr)
Load a vector from a pointer.
Definition vector.hpp:788

argon::Vector::operator+
ace argon_type operator+(argon_type b) const
Add a vector and return the result.
Definition vector.hpp:196

argon::Vector::ShiftLeft
ace argon_type ShiftLeft(helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const
Shift the elemnets of the vector to the left by a specified number of bits.
Definition vector.hpp:676

argon::Vector::Multiply
ace argon_type Multiply(const_lane_type< LaneIndex > b) const
Multiply a vector by a lane value.
Definition vector.hpp:434

argon::Vector::PairwiseAdd
ace argon_type PairwiseAdd(argon_type b) const
Pairwise ops.
Definition vector.hpp:1078

argon::Vector::MultiplyAdd
ace argon_type MultiplyAdd(scalar_type b, argon_type c) const
Multiply a vector by a scalar value and add a third vector.
Definition vector.hpp:461

argon::Vector::operator>=
ace argon_bool_type operator>=(argon_type b) const
Compare two vectors, checking if this vector is greater than or equal to the other.
Definition vector.hpp:223

argon::Vector::MultiplyAdd
ace argon_type MultiplyAdd(argon_type b, lane_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:466

argon::Vector::operator==
ace argon_bool_type operator==(argon_type b) const
Compare two vectors for equality.
Definition vector.hpp:208

argon::Vector::MultiplyAdd
ace argon_type MultiplyAdd(argon_type b, argon_type c) const
Multiply two vectors and add a third vector.
Definition vector.hpp:441

argon::Vector::MultiplyRoundAddFixedQMax
ace argon_type MultiplyRoundAddFixedQMax(argon_type b, arg_type c) const
Multiply-round-add three fixed-point vectors, returning a fixed-point sum.
Definition vector.hpp:589

argon::Vector::ShiftLeft
ace argon_type ShiftLeft(std::make_signed_t< simd::Scalar_t< Bool_t< VectorType > > > n) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:688

argon::Vector::BitwiseOrNot
ace argon_type BitwiseOrNot(argon_type b) const
Bitwise OR of the vector with the NOT of another vector.
Definition vector.hpp:1131

argon::Vector::CountLeadingSignBits
ace helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > CountLeadingSignBits() const
Count the number of consecutive bits following the sign bit that are set to the same value as the sig...
Definition vector.hpp:1178

argon::Vector::FromLane
static ace argon_type FromLane(argon::ConstLane< LaneIndex, VectorType > lane)
Constructs a Vector from a ConstLane object.
Definition vector.hpp:136

argon::Vector::AddSaturate
ace argon_type AddSaturate(argon_type b) const
Adds two vectors, saturating the result.
Definition vector.hpp:378

argon::Vector::UnzipWith
std::array< argon_type, 2 > UnzipWith(argon_type b)
Unzip two vectors, returning two vectors of pairs.
Definition vector.hpp:1251

argon::Vector::operator++
ace argon_type operator++() const
Increment the vector by 1 and return the result.
Definition vector.hpp:226

argon::Vector::operator^
ace argon_type operator^(argon_type b) const
Bitwise XOR two vectors and return the result.
Definition vector.hpp:238

argon::Vector::Equal
ace argon_bool_type Equal(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if are equal.
Definition vector.hpp:656

argon::Vector::StoreTo
ace void StoreTo(scalar_type *ptr) const
Store the vector to a pointer.
Definition vector.hpp:1058

argon::Vector< simd::Vec128_t< ScalarType > >::const_lane_type
ConstLane< LaneIndex, simd::Vec128_t< ScalarType > > const_lane_type
Definition vector.hpp:53

argon::Vector::MultiplyRoundFixedQMax
ace argon_type MultiplyRoundFixedQMax(lane_type l) const
Multiply a fixed-point vector by a lane value, returning a fixed-point product.
Definition vector.hpp:551

argon::Vector::CountActiveBits
ace argon_type CountActiveBits() const
Count the number of bits that are set to one in the vector.
Definition vector.hpp:1190

argon::Vector::MultiplyFixedQMax
ace argon_type MultiplyFixedQMax(scalar_type s) const
Multiply a QMax fixed-point vector by a scalar value, returning a fixed-point product.
Definition vector.hpp:526

argon::Vector::CompareTestNonzero
ace predicate_type CompareTestNonzero(argon_type b) const
Ands the current vector with the given vector, then checks if nonzero.
Definition vector.hpp:1170

argon::Vector::LoadToLaneInterleaved
static ace std::array< argon_type, stride > LoadToLaneInterleaved(std::array< argon_type, stride > multi, const scalar_type *ptr)
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
Definition vector.hpp:979

argon::Vector::PairwiseMax
ace argon_type PairwiseMax(argon_type b) const
Select the maximum of each pair of lanes in the two vectors.
Definition vector.hpp:1083

argon::Vector::operator/
ace argon_type operator/(argon_type b) const
Divide a vector and return the result.
Definition vector.hpp:205

argon::Vector::BitwiseNot
ace argon_type BitwiseNot() const
Bitwise ops.
Definition vector.hpp:1094

argon::Vector::Max
ace argon_type Max(argon_type b) const
Compare the lanes of two vectors, copying the larger of each lane to the result.
Definition vector.hpp:637

argon::Vector< simd::Vec128_t< ScalarType > >::FromLane
static ace argon_type FromLane(argon::Lane< IntrinsicType > lane)
Definition vector.hpp:123

argon::Vector::operator=
constexpr Vector & operator=(Vector &&other)=default
Move assignment operator for the Vector class.

argon::Vector::ShiftLeftInsert
ace argon_type ShiftLeftInsert(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, and then OR the result wi...
Definition vector.hpp:735

argon::Vector::GetLane
ace const lane_type GetLane(const size_t i) const
Get a single lane of the vector by index.
Definition vector.hpp:285

argon::Vector::BitwiseAnd
ace argon_type BitwiseAnd(argon_type b) const
Bitwise AND of the vector with another vector.
Definition vector.hpp:1103

argon::Vector::MultiplyAdd
ace argon_type MultiplyAdd(argon_type b, const_lane_type< LaneIndex > c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:477

argon::Vector::Modulo
ace argon_type Modulo(scalar_type b) const
Get the modulo of a vector and a scalar value.
Definition vector.hpp:627

argon::Vector::Popcount
ace argon_type Popcount() const
Count the number of bits that are set to one in the vector.
Definition vector.hpp:1203

argon::Vector::LessThan
ace argon_bool_type LessThan(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than b.
Definition vector.hpp:672

argon::Vector::ShiftRight
ace argon_type ShiftRight(const int i) const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:344

argon::Vector::ShiftLeftRoundSaturate
ace argon_type ShiftLeftRoundSaturate(argon_type b) const
Shift the elements of the vector to the left by a specified number of bits, rounding and saturating t...
Definition vector.hpp:721

argon::Vector::Vector
ace Vector(scalar_type scalar)
Constructs a Vector from a scalar value.
Definition vector.hpp:79

argon::Vector::ShiftRight
ace argon_type ShiftRight() const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:741

argon::Vector::Negate
ace argon_type Negate() const
Bitwise negate the vector and return the result.
Definition vector.hpp:350

argon::Vector::StoreLaneTo
ace void StoreLaneTo(scalar_type *ptr)
Store a lane of the vector to a pointer.
Definition vector.hpp:1064

argon::Vector::Multiply
ace argon_type Multiply(lane_type b) const
Multiply a vector by a lane value.
Definition vector.hpp:430

argon::Vector::MultiplySubtract
ace argon_type MultiplySubtract(argon_type b, scalar_type c) const
Multiply a vector by a scalar value and subtract from a third vector.
Definition vector.hpp:500

argon::Vector::MultiplyAdd
ace argon_type MultiplyAdd(const_lane_type< LaneIndex > b, argon_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:483

argon::Vector::MultiplyRoundFixedQMax
ace argon_type MultiplyRoundFixedQMax(scalar_type s) const
Multiply a fixed-point vector by a scalar value, returning a fixed-point product.
Definition vector.hpp:544

argon::Vector::GreaterThanOrEqual
ace argon_bool_type GreaterThanOrEqual(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than or equal ...
Definition vector.hpp:660

argon::Vector::operator>
ace argon_bool_type operator>(argon_type b) const
Compare two vectors, checking if this vector is greater than the other.
Definition vector.hpp:217

argon::Vector::operator<<
ace argon_type operator<<(const int i) const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:259

argon::Vector::AddHalve
ace argon_type AddHalve(argon_type b) const
Adds two vectors, halving the result.
Definition vector.hpp:369

argon::Vector::LoadGatherOffsetIndexInterleaved
static ace std::array< argon_type, stride > LoadGatherOffsetIndexInterleaved(const scalar_type *base_ptr, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Perform a Load-Gather of interleaved elements.
Definition vector.hpp:996

argon::Vector::Divide
ace argon_type Divide(argon_type b) const
Divide two vectors.
Definition vector.hpp:604

argon::Vector::ShiftRightRound
ace argon_type ShiftRightRound() const
Shift the elements of the vector to the right by a specified number of bits, rounding the result.
Definition vector.hpp:751

argon::Vector< simd::Vec128_t< ScalarType > >::predicate_type
Bool_t< simd::Vec128_t< ScalarType > > predicate_type
Definition vector.hpp:58

argon::Vector::LoadCopyInterleaved
static ace std::array< argon_type, stride > LoadCopyInterleaved(const scalar_type *ptr)
Load multiple vectors from a pointer, duplicating the value across all lanes.
Definition vector.hpp:915

argon::Vector::size
static ace int size()
Get the number of elements.
Definition vector.hpp:1293

argon::Vector::LoadGatherOffsetBytes
static ace argon_type LoadGatherOffsetBytes(const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Using a base address and a vector of offset bytes and a base pointer, create a new vector.
Definition vector.hpp:812

argon::Vector::GreaterThan
ace argon_bool_type GreaterThan(argon_type b) const
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than b.
Definition vector.hpp:668

argon::Vector::ZipWith
ace std::array< argon_type, 2 > ZipWith(argon_type b) const
Zip two vectors together, returning two vectors of pairs.
Definition vector.hpp:1230

argon::Vector::operator-
ace argon_type operator-(argon_type b) const
Subtract a vector and return the result.
Definition vector.hpp:199

argon::Vector< simd::Vec128_t< ScalarType > >::argon_type
helpers::ArgonFor_t< simd::Vec128_t< ScalarType > > argon_type
Definition vector.hpp:57

argon::Vector::LoadScalar
static ace argon_type LoadScalar(const scalar_type *ptr)
Constructs a Vector from a scalar pointer.
Definition vector.hpp:104

argon::Vector::ShiftRightAccumulateRound
ace argon_type ShiftRightAccumulateRound(argon_type b) const
Shift the elements of the b vector to the right by a specified number of bits, and then add the resul...
Definition vector.hpp:771

argon::Vector::operator*
ace argon_type operator*(argon_type b) const
Multiply a vector and return the result.
Definition vector.hpp:202

argon::Vector::TestNonzero
ace predicate_type TestNonzero() const
Ands the current vector with the given vector, then checks if nonzero.
Definition vector.hpp:1173

argon::Vector::LoadCopy
static ace argon_type LoadCopy(const scalar_type *ptr)
Load a vector from a pointer, duplicating the value across all lanes.
Definition vector.hpp:797

argon::Vector::LoadToLane
ace argon_type LoadToLane(const scalar_type *ptr)
Load a lane from a pointer.
Definition vector.hpp:875

argon::Vector::SubtractAbs
ace argon_type SubtractAbs(argon_type b) const
Subtract two vectors, taking the absolute value of the result.
Definition vector.hpp:398

argon::Vector::ShiftLeft
ace argon_type ShiftLeft() const
Shift the elements of the vector to the left by a specified number of bits.
Definition vector.hpp:702

argon::Vector::LastLane
ace const_lane_type< lanes - 1 > LastLane()
Get the last lane of the vector.
Definition vector.hpp:341

argon::Vector::ShiftLeftSaturate
ace argon_type ShiftLeftSaturate(helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const
Shift the elements of the vector to the left by a specified number of bits, saturating the result.
Definition vector.hpp:711

argon::Vector::MultiplyFixedQMax
ace argon_type MultiplyFixedQMax(lane_type l) const
Multiply a QMax fixed-point vector by a lane value, returning a fixed-point product.
Definition vector.hpp:531

argon::Vector::MultiplySubtract
ace argon_type MultiplySubtract(scalar_type b, argon_type c) const
Multiply a vector by a scalar value and subtract from a third vector.
Definition vector.hpp:510

argon::Vector::operator<=
ace argon_bool_type operator<=(argon_type b) const
Compare two vectors, checking if this vector is less than or equal to the other.
Definition vector.hpp:220

argon::Vector::ShiftLeftSaturate
ace argon_type ShiftLeftSaturate() const
Shift the elements of the vector to the left by a specified number of bits, saturating the result.
Definition vector.hpp:725

argon::Vector::BitwiseSelect
ace argon_type BitwiseSelect(ArgType true_value, ArgType false_value) const
Bitwise select between two vectors, using the current vector as a mask.
Definition vector.hpp:1157

argon::Vector::operator&
ace argon_type operator&(argon_type b) const
Bitwise AND two vectors and return the result.
Definition vector.hpp:232

argon::Vector::MultiplyRoundFixedQMax
ace argon_type MultiplyRoundFixedQMax(argon_type v) const
Multiply two fixed-point vectors, returning a fixed-point product.
Definition vector.hpp:538

argon::Vector::SubtractSaturate
ace argon_type SubtractSaturate(argon_type b) const
Subtract two vectors, saturating the result.
Definition vector.hpp:394

argon::Vector::LoadToLaneInterleaved
static ace std::array< argon_type, Stride > LoadToLaneInterleaved(simd::MultiVector_t< VectorType, Stride > multi, const scalar_type *ptr)
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
Definition vector.hpp:945

argon::Vector::LoadMulti
static ace std::array< argon_type, n > LoadMulti(const scalar_type *ptr)
Load n vectors from a single contiguous set of memory.
Definition vector.hpp:1016

argon::Vector::LoadGatherOffsetIndex
static ace argon_type LoadGatherOffsetIndex(const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector)
Using a base address and a vector of offset indices and a base pointer, create a new vector.
Definition vector.hpp:844

argon::Vector< simd::Vec128_t< ScalarType > >::argon_bool_type
helpers::ArgonFor_t< predicate_type > argon_bool_type
Definition vector.hpp:59

argon::Vector::operator|
ace argon_type operator|(argon_type b) const
Bitwise OR two vectors and return the result.
Definition vector.hpp:235

argon::Vector::MultiplyFixedQMax
ace argon_type MultiplyFixedQMax(argon_type v) const
Multiply two QMax fixed-point vectors, returning a fixed-point product.
Definition vector.hpp:522

argon::Vector::MultiplyAdd
ace argon_type MultiplyAdd(argon_type b, scalar_type c) const
Multiply a vector by a scalar value and add a third vector.
Definition vector.hpp:451

argon::Vector< simd::Vec128_t< ScalarType > >::scalar_type
simd::Scalar_t< simd::Vec128_t< ScalarType > > scalar_type
Definition vector.hpp:55

argon::Vector::GetLane
ace const const_lane_type< LaneIndex > GetLane() const
Get a single lane of the vector by index.
Definition vector.hpp:323

argon::Vector::operator~
ace argon_type operator~() const
Bitwise NOT the vector and return the result.
Definition vector.hpp:241

argon::Vector::Vector
ace Vector(argon::ConstLane< LaneIndex, VectorType > lane)
Constructs a Vector from a ConstLane object.
Definition vector.hpp:92

argon::Vector::Select
ace argon_type Select(ArgType true_value, ArgType false_value) const
Bitwise select between two vectors, using the current vector as a mask.
Definition vector.hpp:1164

argon::Vector::to_array
ace std::array< scalar_type, lanes > to_array()
Convert the vector to an array of scalar values.
Definition vector.hpp:275

argon::Vector::Absolute
ace argon_type Absolute() const
Get the absolute value of the vector.
Definition vector.hpp:557

argon::Vector< simd::Vec128_t< ScalarType > >::lanes
static constexpr size_t lanes
Definition vector.hpp:62

argon::Vector::operator[]
ace lane_type operator[](const size_t i)
Access a lane of the vector by index.
Definition vector.hpp:247

argon::Vector::BitwiseClear
ace argon_type BitwiseClear(argon_type b) const
Bitwise AND of the vector with the NOT of another vector.
Definition vector.hpp:1150

argon::Vector::BitwiseAndNot
ace argon_type BitwiseAndNot(argon_type b) const
Bitwise AND of the vector with the NOT of another vector.
Definition vector.hpp:1141

argon::Vector::ShiftRightAccumulate
ace argon_type ShiftRightAccumulate(argon_type b) const
Shift the elements of the b vector to the right by a specified number of bits, and then add the resul...
Definition vector.hpp:759

argon::Vector::operator>>
ace argon_type operator>>(const int i) const
Shift the elements of the vector to the right by a specified number of bits.
Definition vector.hpp:250

argon::Vector::Add
ace argon_type Add(argon_type b) const
Add two vectors.
Definition vector.hpp:359

argon::Vector::BitwiseOr
ace argon_type BitwiseOr(argon_type b) const
Bitwise OR of the vector with another vector.
Definition vector.hpp:1112

argon::Vector::Vector
constexpr Vector()=default
The default constructor for the Vector class.

argon::Vector::MultiplyAddFixedQMax
ace argon_type MultiplyAddFixedQMax(argon_type b, arg_type c) const
Multiply-add three fixed-point vectors, returning a fixed-point sum.
Definition vector.hpp:580

argon::Vector::GetLane
ace const lane_type GetLane(const int i) const
Get a single lane of the vector by index.
Definition vector.hpp:303

argon::Vector::operator=
constexpr Vector & operator=(const Vector &other)=default
Copy assignment operator for the Vector class.

argon::Vector::operator!=
ace argon_bool_type operator!=(argon_type b) const
Compare two vectors for inequality.
Definition vector.hpp:211

argon::Vector::Generate
static ace argon_type Generate(FuncType body)
Constructs a Vector from a function that generates values.
Definition vector.hpp:169

argon::Vector::CountLeadingZeroBits
ace argon_type CountLeadingZeroBits() const
Count the number of consecutive top bits that are set to zero.
Definition vector.hpp:1186

argon::Vector::Min
ace argon_type Min(argon_type b) const
Compare the lanes of two vectors, copying the smaller of each lane to the result.
Definition vector.hpp:646

argon::Vector::operator-
ace argon_type operator-() const
Negate the SIMD vector and return the result.
Definition vector.hpp:193

argon::Vector::SubtractHalve
ace argon_type SubtractHalve(argon_type b) const
Subtract two vectors, halving the result.
Definition vector.hpp:391

argon::Vector::MultiplyAdd
ace argon_type MultiplyAdd(lane_type b, argon_type c) const
Multiply a vector by a lane value and add a third vector.
Definition vector.hpp:472

argon::Vector::ShiftRightInsert
ace argon_type ShiftRightInsert(argon_type b) const
Shift the elements of the vector to the right by a specified number of bits, ORing the result with th...
Definition vector.hpp:783

argon::Vector::SubtractAbsAdd
ace argon_type SubtractAbsAdd(argon_type b, argon_type c) const
Subtract two vectors, taking the absolute value of the result and adding a third vector.
Definition vector.hpp:402

argon::Vector::Vector
constexpr Vector(Vector &&other)=default
Move constructor for the Vector class.

argon::Vector::Subtract
ace argon_type Subtract(argon_type b) const
Subtract two vectors.
Definition vector.hpp:381

argon::Vector::BitwiseXor
ace argon_type BitwiseXor(argon_type b) const
Bitwise XOR of the vector with another vector.
Definition vector.hpp:1121

argon::Vector::Vector
ace Vector(argon::Lane< VectorType > lane)
Constructs a Vector from a Lane object.
Definition vector.hpp:85

argon::Vector::AddHalveRound
ace argon_type AddHalveRound(argon_type b) const
Adds two vectors, halving and rounding the result.
Definition vector.hpp:373

argon::Vector::operator[]
ace Lane< const VectorType > operator[](const size_t i) const
Access a lane of the vector by index.
Definition vector.hpp:244

argon::arithmetic
Definition vector.hpp:36

features.h
Header file for SIMD features and platform detection.

argon::helpers::ArgonFor_t
typename ArgonFor< std::remove_cv_t< T > >::type ArgonFor_t
Helper alias to get the Argon type for a given vector type.
Definition argon_for.hpp:45

std
Lane deconstruction feature.
Definition argon_full.hpp:302

to_array.hpp
Helper functions to convert C-style arrays to std::array of Argon types, mimicking std::to_array.

argon::to_array
constexpr std::array< helpers::ArgonFor_t< T >, N > to_array(T(&a)[N])
Convert a C-style array of vector types to a std::array of Argon types.
Definition to_array.hpp:29