Argon 0.1.0
|
Represents a SIMD vector with various operations. More...
#include <vector.hpp>
Public Types | |
template<size_t LaneIndex> | |
using | const_lane_type = ConstLane<LaneIndex, VectorType> |
The type of a single lane of the SIMD vector. | |
using | lane_type = Lane<VectorType> |
The type of a single lane of the SIMD vector. | |
using | scalar_type = simd::Scalar_t<VectorType> |
The scalar type of the SIMD vector. | |
using | vector_type = VectorType |
The SIMD vector type. | |
using | argon_type = helpers::ArgonFor_t<VectorType> |
The Argon type for the SIMD vector. | |
using | predicate_type = Bool_t<VectorType> |
The type of a boolean SIMD vector. | |
using | argon_bool_type = helpers::ArgonFor_t<predicate_type> |
The Argon type for the boolean vector. |
Public Member Functions | |
constexpr | Vector ()=default |
The default constructor for the Vector class. | |
constexpr | Vector (Vector &&other)=default |
Move constructor for the Vector class. | |
constexpr | Vector (const Vector &other)=default |
Copy constructor for the Vector class. | |
constexpr Vector & | operator= (Vector &&other)=default |
Move assignment operator for the Vector class. | |
constexpr Vector & | operator= (const Vector &other)=default |
Copy assignment operator for the Vector class. | |
ace | Vector (VectorType vector) |
Constructs a Vector from a SIMD vector type. | |
ace | Vector (scalar_type scalar) |
Constructs a Vector from a scalar value. | |
ace | Vector (argon::Lane< VectorType > lane) |
Constructs a Vector from a Lane object. | |
template<size_t LaneIndex> | |
ace | Vector (argon::ConstLane< LaneIndex, VectorType > lane) |
Constructs a Vector from a ConstLane object. | |
template<typename... ArgTypes> requires (sizeof...(ArgTypes) > 1) | |
ace | Vector (ArgTypes... args) |
ace argon_type | operator- () const |
Negate the SIMD vector and return the result. | |
ace argon_type | operator+ (argon_type b) const |
Add a vector and return the result. | |
ace argon_type | operator- (argon_type b) const |
Subtract a vector and return the result. | |
ace argon_type | operator* (argon_type b) const |
Multiply a vector and return the result. | |
ace argon_type | operator/ (argon_type b) const |
Divide a vector and return the result. | |
ace argon_bool_type | operator== (argon_type b) const |
Compare two vectors for equality. | |
ace argon_bool_type | operator!= (argon_type b) const |
Compare two vectors for inequality. | |
ace argon_bool_type | operator< (argon_type b) const |
Compare two vectors, checking if this vector is less than the other. | |
ace argon_bool_type | operator> (argon_type b) const |
Compare two vectors, checking if this vector is greater than the other. | |
ace argon_bool_type | operator<= (argon_type b) const |
Compare two vectors, checking if this vector is less than or equal to the other. | |
ace argon_bool_type | operator>= (argon_type b) const |
Compare two vectors, checking if this vector is greater than or equal to the other. | |
ace argon_type | operator++ () const |
Increment the vector by 1 and return the result. | |
ace argon_type | operator-- () const |
Decrement the vector by 1 and return the result. | |
ace argon_type | operator& (argon_type b) const |
Bitwise AND two vectors and return the result. | |
ace argon_type | operator| (argon_type b) const |
Bitwise OR two vectors and return the result. | |
ace argon_type | operator^ (argon_type b) const |
Bitwise XOR two vectors and return the result. | |
ace argon_type | operator~ () const |
Bitwise NOT the vector and return the result. | |
ace Lane< const VectorType > | operator[] (const size_t i) const |
Access a lane of the vector by index. | |
ace lane_type | operator[] (const size_t i) |
Access a lane of the vector by index. | |
ace argon_type | operator>> (const int i) const |
Shift the elements of the vector to the right by a specified number of bits. | |
ace argon_type | operator<< (const int i) const |
Shift the elements of the vector to the left by a specified number of bits. | |
constexpr VectorType | vec () const |
Get the underlying SIMD vector. | |
constexpr | operator VectorType () const |
Convert the vector to the underlying SIMD vector type. | |
ace std::array< scalar_type, lanes > | to_array () |
Convert the vector to an array of scalar values. | |
ace const lane_type | GetLane (const size_t i) const |
Get a single lane of the vector by index. | |
ace lane_type | GetLane (const size_t i) |
ace const lane_type | GetLane (const int i) const |
Get a single lane of the vector by index. | |
ace lane_type | GetLane (const int i) |
template<size_t LaneIndex> | |
ace const const_lane_type< LaneIndex > | GetLane () const |
Get a single lane of the vector by index. | |
template<size_t LaneIndex> | |
ace const_lane_type< LaneIndex > | GetLane () |
ace const_lane_type< lanes - 1 > | LastLane () |
Get the last lane of the vector. | |
ace argon_type | ShiftRight (const int i) const |
Shift the elements of the vector to the right by a specified number of bits. | |
ace argon_type | ShiftLeft (const int i) const |
Shift the elements of the vector to the left by a specified number of bits. | |
ace argon_type | Negate () const |
Bitwise negate the vector and return the result. | |
ace argon_type | Add (argon_type b) const |
Add two vectors. | |
ace argon_type | AddHalve (argon_type b) const |
Adds two vectors, halving the result. | |
ace argon_type | AddHalveRound (argon_type b) const |
Adds two vectors, halving and rounding the result. | |
ace argon_type | AddSaturate (argon_type b) const |
Adds two vectors, saturating the result. | |
ace argon_type | Subtract (argon_type b) const |
Subtract two vectors. | |
ace argon_type | SubtractHalve (argon_type b) const |
Subtract two vectors, halving the result. | |
ace argon_type | SubtractSaturate (argon_type b) const |
Subtract two vectors, saturating the result. | |
ace argon_type | SubtractAbs (argon_type b) const |
Subtract two vectors, taking the absolute value of the result. | |
ace argon_type | SubtractAbsAdd (argon_type b, argon_type c) const |
Subtract two vectors, taking the absolute value of the result and adding a third vector. | |
ace argon_type | Multiply (argon_type b) const |
Multiply two vectors. | |
ace argon_type | Multiply (scalar_type b) const |
Multiply a vector by a scalar value. | |
ace argon_type | Multiply (lane_type b) const |
Multiply a vector by a lane value. | |
template<size_t LaneIndex> | |
ace argon_type | Multiply (const_lane_type< LaneIndex > b) const |
Multiply a vector by a lane value. | |
ace argon_type | MultiplyAdd (argon_type b, argon_type c) const |
Multiply two vectors and add a third vector. | |
ace argon_type | MultiplyAdd (argon_type b, scalar_type c) const |
Multiply a vector by a scalar value and add a third vector. | |
ace argon_type | MultiplyAdd (scalar_type b, argon_type c) const |
Multiply a vector by a scalar value and add a third vector. | |
ace argon_type | MultiplyAdd (argon_type b, lane_type c) const |
Multiply a vector by a lane value and add a third vector. | |
ace argon_type | MultiplyAdd (lane_type b, argon_type c) const |
Multiply a vector by a lane value and add a third vector. | |
template<size_t LaneIndex> | |
ace argon_type | MultiplyAdd (argon_type b, const_lane_type< LaneIndex > c) const |
Multiply a vector by a lane value and add a third vector. | |
template<size_t LaneIndex> | |
ace argon_type | MultiplyAdd (const_lane_type< LaneIndex > b, argon_type c) const |
Multiply a vector by a lane value and add a third vector. | |
ace argon_type | MultiplySubtract (argon_type b, argon_type c) const |
Multiply two vectors and subtract from a third vector. | |
ace argon_type | MultiplySubtract (argon_type b, scalar_type c) const |
Multiply a vector by a scalar value and subtract from a third vector. | |
ace argon_type | MultiplySubtract (scalar_type b, argon_type c) const |
Multiply a vector by a scalar value and subtract from a third vector. | |
ace argon_type | MultiplySubtract (argon_type b, lane_type c) const |
Multiply a vector by a lane value and subtract from a third vector. | |
ace argon_type | MultiplyFixedQMax (argon_type v) const |
Multiply two QMax fixed-point vectors, returning a fixed-point product. | |
ace argon_type | MultiplyFixedQMax (scalar_type s) const |
Multiply a QMax fixed-point vector by a scalar value, returning a fixed-point product. | |
ace argon_type | MultiplyFixedQMax (lane_type l) const |
Multiply a QMax fixed-point vector by a lane value, returning a fixed-point product. | |
ace argon_type | MultiplyRoundFixedQMax (argon_type v) const |
Multiply two fixed-point vectors, returning a fixed-point product. | |
ace argon_type | MultiplyRoundFixedQMax (scalar_type s) const |
Multiply a fixed-point vector by a scalar value, returning a fixed-point product. | |
ace argon_type | MultiplyRoundFixedQMax (lane_type l) const |
Multiply a fixed-point vector by a lane value, returning a fixed-point product. | |
ace argon_type | Absolute () const |
Get the absolute value of the vector. | |
ace argon_type | ReciprocalEstimate () const |
1 / value, using an estimate for speed | |
template<typename arg_type> requires (is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> || std::is_convertible_v<arg_type, scalar_type>) | |
ace argon_type | MultiplyAddFixedQMax (argon_type b, arg_type c) const |
Multiply-add three fixed-point vectors, returning a fixed-point sum. | |
template<typename arg_type> requires (is_one_of<arg_type, argon_type, scalar_type, lane_type> || std::is_convertible_v<arg_type, argon_type> || std::is_convertible_v<arg_type, scalar_type>) | |
ace argon_type | MultiplyRoundAddFixedQMax (argon_type b, arg_type c) const |
Multiply-round-add three fixed-point vectors, returning a fixed-point sum. | |
ace argon_type | Divide (argon_type b) const |
Divide two vectors. | |
ace argon_type | Modulo (argon_type b) const |
Get the modulo of two vectors. | |
ace argon_type | Modulo (scalar_type b) const |
Get the modulo of a vector and a scalar value. | |
ace argon_type | Max (argon_type b) const |
Compare the lanes of two vectors, copying the larger of each lane to the result. | |
ace argon_type | Min (argon_type b) const |
Compare the lanes of two vectors, copying the smaller of each lane to the result. | |
ace argon_bool_type | Equal (argon_type b) const |
Compare the lanes of two vectors, setting the result lane's bits to ON if are equal. | |
ace argon_bool_type | GreaterThanOrEqual (argon_type b) const |
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than or equal to b. | |
ace argon_bool_type | LessThanOrEqual (argon_type b) const |
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than or equal to b. | |
ace argon_bool_type | GreaterThan (argon_type b) const |
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than b. | |
ace argon_bool_type | LessThan (argon_type b) const |
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than b. | |
ace argon_type | ShiftLeft (helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const |
Shift the elemnets of the vector to the left by a specified number of bits. | |
ace argon_type | ShiftLeft (std::make_signed_t< simd::Scalar_t< Bool_t< VectorType > > > n) const |
Shift the elements of the vector to the left by a specified number of bits. | |
template<int n> | |
ace argon_type | ShiftLeft () const |
Shift the elements of the vector to the left by a specified number of bits. | |
ace argon_type | ShiftLeftSaturate (helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > b) const |
Shift the elements of the vector to the left by a specified number of bits, saturating the result. | |
ace argon_type | ShiftLeftRound (argon_type b) const |
Shift the elements of the vector to the left by a specified number of bits, rounding the result. | |
ace argon_type | ShiftLeftRoundSaturate (argon_type b) const |
Shift the elements of the vector to the left by a specified number of bits, rounding and saturating the result. | |
template<int n> | |
ace argon_type | ShiftLeftSaturate () const |
Shift the elements of the vector to the left by a specified number of bits, saturating the result. | |
template<int n> | |
ace argon_type | ShiftLeftInsert (argon_type b) const |
Shift the elements of the vector to the left by a specified number of bits, and then OR the result with another vector masked to the number of shift bits. | |
template<int n> | |
ace argon_type | ShiftRight () const |
Shift the elements of the vector to the right by a specified number of bits. | |
template<int n> | |
ace argon_type | ShiftRightRound () const |
Shift the elements of the vector to the right by a specified number of bits, rounding the result. | |
template<int n> | |
ace argon_type | ShiftRightAccumulate (argon_type b) const |
Shift the elements of the b vector to the right by a specified number of bits, and then add the result to this vector. | |
template<int n> | |
ace argon_type | ShiftRightAccumulateRound (argon_type b) const |
Shift the elements of the b vector to the right by a specified number of bits, and then add the result to this vector. | |
template<int n> | |
ace argon_type | ShiftRightInsert (argon_type b) const |
Shift the elements of the vector to the right by a specified number of bits, ORing the result with the vector masked to the number of shift bits. | |
template<size_t lane> | |
ace argon_type | LoadToLane (const scalar_type *ptr) |
Load a lane from a pointer. | |
ace void | StoreTo (scalar_type *ptr) const |
Store the vector to a pointer. | |
template<int LaneIndex> | |
ace void | StoreLaneTo (scalar_type *ptr) |
Store a lane of the vector to a pointer. | |
ace argon_type | PairwiseAdd (argon_type b) const |
Pairwise ops. | |
ace argon_type | PairwiseMax (argon_type b) const |
Select the maximum of each pair of lanes in the two vectors. | |
ace argon_type | PairwiseMin (argon_type b) const |
Select the maximum of each pair of lanes in the two vectors. | |
ace argon_type | BitwiseNot () const |
Bitwise ops. | |
ace argon_type | BitwiseAnd (argon_type b) const |
Bitwise AND of the vector with another vector. | |
ace argon_type | BitwiseOr (argon_type b) const |
Bitwise OR of the vector with another vector. | |
ace argon_type | BitwiseXor (argon_type b) const |
Bitwise XOR of the vector with another vector. | |
ace argon_type | BitwiseOrNot (argon_type b) const |
Bitwise OR of the vector with the NOT of another vector. | |
ace argon_type | BitwiseAndNot (argon_type b) const |
Bitwise AND of the vector with the NOT of another vector. | |
ace argon_type | BitwiseClear (argon_type b) const |
Bitwise AND of the vector with the NOT of another vector. | |
template<typename ArgType> requires std::is_unsigned_v<scalar_type> | |
ace argon_type | BitwiseSelect (ArgType true_value, ArgType false_value) const |
Bitwise select between two vectors, using the current vector as a mask. | |
template<typename ArgType> requires std::is_unsigned_v<scalar_type> | |
ace argon_type | Select (ArgType true_value, ArgType false_value) const |
Bitwise select between two vectors, using the current vector as a mask. | |
ace predicate_type | CompareTestNonzero (argon_type b) const |
Ands the current vector with the given vector, then checks if nonzero. | |
ace predicate_type | TestNonzero () const |
Ands the current vector with the given vector, then checks if nonzero. | |
ace helpers::ArgonFor_t< simd::make_signed_t< Bool_t< VectorType > > > | CountLeadingSignBits () const |
Count the number of consecutive bits following the sign bit that are set to the same value as the sign bit. | |
ace argon_type | CountLeadingZeroBits () const |
Count the number of consecutive top bits that are set to zero. | |
ace argon_type | CountActiveBits () const |
Count the number of bits that are set to one in the vector. | |
ace argon_type | Popcount () const |
Count the number of bits that are set to one in the vector. | |
template<int n> | |
ace argon_type | Extract (argon_type b) const |
Extract n elements from the lower end of the operand, and the remaining elements from the top end of this vector, combining them into the result vector. | |
ace argon_type | Reverse64bit () const |
ace argon_type | Reverse32bit () const |
ace argon_type | Reverse16bit () const |
ace std::array< argon_type, 2 > | ZipWith (argon_type b) const |
Zip two vectors together, returning two vectors of pairs. | |
std::array< argon_type, 2 > | UnzipWith (argon_type b) |
Unzip two vectors, returning two vectors of pairs. | |
std::array< argon_type, 2 > | TransposeWith (argon_type b) const |
Perform a 2x2 matrix transpose on two vectors, returning two vectors of pairs. | |
template<typename FuncType> requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type)>> | |
ace argon_type | map (FuncType body) const |
template<typename FuncType> requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type, int)>> | |
ace argon_type | map_with_index (FuncType body) const |
template<typename FuncType> requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type, scalar_type)>> | |
ace argon_type | map2 (argon_type other, FuncType body) const |
template<typename FuncType> requires std::convertible_to<FuncType, std::function<void(scalar_type&)>> | |
ace argon_type | each_lane (FuncType body) |
template<typename FuncType> requires std::convertible_to<FuncType, std::function<void(scalar_type&, int)>> | |
ace argon_type | each_lane_with_index (FuncType body) |
template<typename FuncType> requires std::convertible_to<FuncType, std::function<void()>> | |
ace void | if_lane (FuncType true_branch) |
template<typename FuncType> requires std::convertible_to<FuncType, std::function<void()>> | |
ace void | if_else_lane (FuncType true_branch, FuncType false_branch) |
template<typename FuncType> requires std::convertible_to<FuncType, std::function<void(int)>> | |
ace void | if_lane_with_index (FuncType true_branch) |
template<typename FuncType1, typename FuncType2> requires std::convertible_to<FuncType1, std::function<void(int)>> && std::convertible_to<FuncType2, std::function<void(int)>> | |
ace void | if_else_lane_with_index (FuncType1 true_branch, FuncType2 false_branch) |
ace bool | any () |
ace bool | all () |
template<std::size_t Index> | |
std::tuple_element_t< Index, argon_type > | get () |
Static Public Member Functions | |
static ace argon_type | LoadScalar (const scalar_type *ptr) |
Constructs a Vector from a scalar pointer. | |
static ace argon_type | FromScalar (scalar_type scalar) |
Constructs a Vector from a scalar value. | |
template<simd::is_vector_type IntrinsicType> | |
static ace argon_type | FromLane (argon::Lane< IntrinsicType > lane) |
Constructs a Vector from a Lane object. | |
template<size_t LaneIndex> | |
static ace argon_type | FromLane (argon::ConstLane< LaneIndex, VectorType > lane) |
Constructs a Vector from a ConstLane object. | |
static ace argon_type | Iota (scalar_type start) |
Constructs a Vector from an incrementing sequence. | |
template<typename FuncType> requires std::convertible_to<FuncType, std::function<scalar_type()>> | |
static ace argon_type | Generate (FuncType body) |
Constructs a Vector from a function that generates values. | |
template<typename FuncType> requires std::convertible_to<FuncType, std::function<scalar_type(scalar_type)>> | |
static ace argon_type | GenerateWithIndex (FuncType body) |
Constructs a Vector from a function that generates values with an index. | |
static ace argon_type | Load (const scalar_type *ptr) |
Load a vector from a pointer. | |
static ace argon_type | LoadCopy (const scalar_type *ptr) |
Load a vector from a pointer, duplicating the value across all lanes. | |
static ace argon_type | LoadGatherOffsetBytes (const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector) |
Using a base address and a vector of offset bytes and a base pointer, create a new vector. | |
static ace argon_type | LoadGatherOffsetIndex (const scalar_type *base, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector) |
Using a base address and a vector of offset indices and a base pointer, create a new vector. | |
template<size_t stride> | |
static ace std::array< argon_type, stride > | LoadInterleaved (const scalar_type *ptr) |
Load multiple vectors from a pointer, de-interleaving. | |
template<size_t stride> | |
static ace std::array< argon_type, stride > | LoadCopyInterleaved (const scalar_type *ptr) |
Load multiple vectors from a pointer, duplicating the value across all lanes. | |
template<size_t LaneIndex, size_t Stride> | |
static ace std::array< argon_type, Stride > | LoadToLaneInterleaved (simd::MultiVector_t< VectorType, Stride > multi, const scalar_type *ptr) |
Load a value from a pointer into a vector at the lane index lane, de-interleaving. | |
template<size_t lane, size_t stride> | |
static ace std::array< argon_type, stride > | LoadToLaneInterleaved (std::array< argon_type, stride > multi, const scalar_type *ptr) |
Load a value from a pointer into a vector at the lane index lane, de-interleaving. | |
template<size_t stride> | |
static ace std::array< argon_type, stride > | LoadGatherOffsetIndexInterleaved (const scalar_type *base_ptr, helpers::ArgonFor_t< simd::make_unsigned_t< Bool_t< VectorType > > > offset_vector) |
Perform a Load-Gather of interleaved elements. | |
template<size_t n> | |
static ace std::array< argon_type, n > | LoadMulti (const scalar_type *ptr) |
Load n vectors from a single contiguous set of memory. | |
static ace int | size () |
Get the number of elements. |
Static Public Attributes | |
static constexpr size_t | lanes = (simd::is_quadword_v<VectorType> ? 16 : 8) / sizeof(scalar_type) |
The number of lanes in the SIMD vector. |
Static Protected Member Functions | |
template<std::size_t... Ints> | |
static ace argon_type | IotaHelper (scalar_type start, std::index_sequence< Ints... >) |
Protected Attributes | |
VectorType | vec_ |
Represents a SIMD vector with various operations.
VectorType | The type of the SIMD vector. (e.g. int32x4_t, float32x4_t) |
This class provides a wrapper around SIMD vector types, allowing for object-oriented operations on the vector.
|
inline |
Constructs a Vector from a SIMD vector type.
vector | The SIMD vector to construct from. |
|
inline |
Constructs a Vector from a scalar value.
scalar | The scalar value to construct from. |
This constructor duplicates the scalar value across all lanes of the SIMD vector.
|
inline |
|
inline |
|
inline |
Adds two vectors, halving the result.
Equivalent to (a + b) / 2.
|
inline |
Adds two vectors, halving and rounding the result.
Equivalent to round((a + b) / 2).
|
inline |
Adds two vectors, saturating the result.
Equivalent to a + b, but saturates to the maximum value if the result exceeds the maximum representable value.
|
inline |
Bitwise AND of the vector with the NOT of another vector.
Equivalent to a & ~b.
|
inline |
Bitwise AND of the vector with the NOT of another vector.
Equivalent to a & ~b.
|
inline |
Bitwise ops.
Bitwise NOT of the vector
|
inline |
Bitwise OR of the vector with the NOT of another vector.
Equivalent to a | ~b.
|
inline |
Bitwise select between two vectors, using the current vector as a mask.
Equivalent to (mask & b) | (~mask & c).
|
inline |
Ands the current vector with the given vector, then checks if nonzero.
If so, fills the lane with all ones
Equivalent to (a & b) != 0 ? 0xFFFFFFFF : 0x00000000
|
inline |
Count the number of bits that are set to one in the vector.
Equivalent to std::popcount(a).
|
inline |
Count the number of consecutive bits following the sign bit that are set to the same value as the sign bit.
Equivalent to std::countl_one(a).
|
inline |
Count the number of consecutive top bits that are set to zero.
Equivalent to std::countl_zero(a)
|
inline |
Compare the lanes of two vectors, setting the result lane's bits to ON if are equal.
Equivalent to a == b ? 0xFFFFFFFF : 0x00000000
|
inline |
Extract n elements from the lower end of the operand, and the remaining elements from the top end of this vector, combining them into the result vector.
For example: {a0, a1, a2, a3} and {b0, b1, b2, b3} with n = 1 will result in {b0, a1, a2, a3}
|
inlinestatic |
|
inlinestatic |
|
inlinestatic |
Constructs a Vector from a scalar value.
scalar | The scalar value to construct from. |
This constructor duplicates the scalar value across all lanes of the SIMD vector.
|
inlinestatic |
Constructs a Vector from a function that generates values.
body | The function that generates the values. |
This constructor creates a SIMD vector with lanes containing values generated by the function.
|
inlinestatic |
Constructs a Vector from a function that generates values with an index.
body | The function that generates the values. |
This constructor creates a SIMD vector with lanes containing values generated by the function using the index.
|
inline |
Get a single lane of the vector by index.
LaneIndex | The index of the lane to get. |
|
inline |
Get a single lane of the vector by index.
i | The index of the lane to get. |
|
inline |
Get a single lane of the vector by index.
i | The index of the lane to get. |
|
inline |
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than b.
Equivalent to a > b ? 0xFFFFFFFF : 0x00000000
|
inline |
Compare the lanes of two vectors, setting the result lane's bits to ON if a is greater than or equal to b.
Equivalent to a >= b ? 0xFFFFFFFF : 0x00000000
|
inlinestatic |
Constructs a Vector from an incrementing sequence.
start | The starting value of the sequence. |
step | The step size of the sequence. |
This constructor creates a SIMD vector with lanes containing values from start to start + (lanes - 1) * step.
|
inline |
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than b.
Equivalent to a < b ? 0xFFFFFFFF : 0x00000000
|
inline |
Compare the lanes of two vectors, setting the result lane's bits to ON if a is less than or equal to b.
Equivalent to a <= b ? 0xFFFFFFFF : 0x00000000
|
inlinestatic |
Load multiple vectors from a pointer, duplicating the value across all lanes.
stride | The interleave stride |
ptr | The pointer to load from |
|
inlinestatic |
Using a base address and a vector of offset bytes and a base pointer, create a new vector.
base | The address to index from |
offset_vector | A vector of offset indices |
|
inlinestatic |
Using a base address and a vector of offset indices and a base pointer, create a new vector.
base | The address to index from |
offset_vector | A vector of offset indices |
|
inlinestatic |
Perform a Load-Gather of interleaved elements.
stride | the distance between similar elements |
base_ptr | the address to use as a base for the gather operation |
offset_vector | a vector of offset values that are added to base_ptr to get the address to load |
|
inlinestatic |
Load multiple vectors from a pointer, de-interleaving.
stride | The interleave stride |
ptr | The pointer to load from |
|
inlinestatic |
Load n vectors from a single contiguous set of memory.
n | The number of vectors to load |
ptr | The pointer to the location in memory to load from |
|
inlinestatic |
Constructs a Vector from a scalar pointer.
ptr | The pointer to the scalar value to construct from. |
This constructor loads the scalar value from the pointer and duplicates it across all lanes of the SIMD vector.
|
inline |
Load a lane from a pointer.
ptr | The pointer to load from |
|
inlinestatic |
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
lane | The lane to load |
stride | The interleave stride |
multi | The multi-vector to load into |
ptr | The pointer to load from |
|
inlinestatic |
Load a value from a pointer into a vector at the lane index lane, de-interleaving.
lane | The lane to load |
stride | The interleave stride |
multi | The multi-vector to load into |
ptr | The pointer to load from |
|
inline |
Compare the lanes of two vectors, copying the larger of each lane to the result.
Equivalent to a > b ? a : b
|
inline |
Compare the lanes of two vectors, copying the smaller of each lane to the result.
Equivalent to a < b ? a : b
|
inline |
Get the modulo of two vectors.
Equivalent to a % b.
|
inline |
Get the modulo of a vector and a scalar value.
Equivalent to a % b.
|
inline |
Multiply two vectors and add a third vector.
Equivalent to a + (b * c).
|
inline |
Multiply a vector by a lane value and add a third vector.
Equivalent to a + (b * c).
|
inline |
Multiply a vector by a lane value and add a third vector.
Equivalent to a + (b * c).
|
inline |
Multiply a vector by a scalar value and add a third vector.
Equivalent to a + (b * c).
|
inline |
Multiply a vector by a lane value and add a third vector.
Equivalent to a + (b * c).
|
inline |
Multiply a vector by a lane value and add a third vector.
Equivalent to a + (b * c).
|
inline |
Multiply a vector by a scalar value and add a third vector.
Equivalent to a + (b * c).
|
inline |
Multiply-add three fixed-point vectors, returning a fixed-point sum.
This is equivalent to a + ((b * c) >> 31)
|
inline |
Multiply two QMax fixed-point vectors, returning a fixed-point product.
This is equivalent to ((uint64_t)a * b) >> 31
|
inline |
Multiply a QMax fixed-point vector by a lane value, returning a fixed-point product.
This is equivalent to ((uint64_t)a * b) >> 31
|
inline |
Multiply a QMax fixed-point vector by a scalar value, returning a fixed-point product.
This is equivalent to ((uint64_t)a * b) >> 31
|
inline |
Multiply-round-add three fixed-point vectors, returning a fixed-point sum.
This is equivalent to a + (rnd(b * c) >> 31)
|
inline |
Multiply two fixed-point vectors, returning a fixed-point product.
This is equivalent to round(a * b) >> 31
|
inline |
Multiply a fixed-point vector by a lane value, returning a fixed-point product.
This is equivalent to round(a * b) >> 31
|
inline |
Multiply a fixed-point vector by a scalar value, returning a fixed-point product.
This is equivalent to round(a * b) >> 31
|
inline |
Multiply two vectors and subtract from a third vector.
Equivalent to a - (b * c).
|
inline |
Multiply a vector by a lane value and subtract from a third vector.
Equivalent to a - (b * c).
|
inline |
Multiply a vector by a scalar value and subtract from a third vector.
Equivalent to a - (b * c).
|
inline |
Multiply a vector by a scalar value and subtract from a third vector.
Equivalent to a - (b * c).
|
inline |
Pairwise ops.
Pairwise add two vectors, returning the sum of each pair of lanes.
Given a pair of vector {a0, a1, a2, a3} and {b0, b1, b2, b3}, the result is {a0 + a1, a1 + a2, b0 + b1, b1 + b2}
|
inline |
Select the maximum of each pair of lanes in the two vectors.
Given a pair of vector {a0, a1, a2, a3} and {b0, b1, b2, b3}, the result is {max(a0, a1), max(a2, b2), max(b0, b1), max(b2, b3)}
|
inline |
Select the maximum of each pair of lanes in the two vectors.
Given a pair of vector {a0, a1, a2, a3} and {b0, b1, b2, b3}, the result is {max(a0, a1), max(a2, b2), max(b0, b1), max(b2, b3)}
|
inline |
Count the number of bits that are set to one in the vector.
Equivalent to std::popcount(a).
|
inline |
1 / value, using an estimate for speed
|
inline |
Bitwise select between two vectors, using the current vector as a mask.
Equivalent to (mask & b) | (~mask & c).
|
inline |
Shift the elements of the vector to the left by a specified number of bits.
Equivalent to a << b.
|
inline |
Shift the elemnets of the vector to the left by a specified number of bits.
Equivalent to a << b.
|
inline |
Shift the elements of the vector to the left by a specified number of bits.
Equivalent to a << b.
|
inline |
Shift the elements of the vector to the left by a specified number of bits, and then OR the result with another vector masked to the number of shift bits.
Equivalent to (a << b) | (c & ((1 << b) - 1)).
|
inline |
Shift the elements of the b vector to the right by a specified number of bits, and then add the result to this vector.
Equivalent to a + (b >> n).
|
inline |
Shift the elements of the b vector to the right by a specified number of bits, and then add the result to this vector.
Equivalent to a + (b >> n).
|
inline |
Shift the elements of the vector to the right by a specified number of bits, ORing the result with the vector masked to the number of shift bits.
Equivalent to (a >> b) | (c & ~((1 << b) - 1)).
|
inline |
Store a lane of the vector to a pointer.
ptr | The pointer to store to |
lane | The lane to store |
|
inline |
Store the vector to a pointer.
ptr | The pointer to store to |
|
inline |
Subtract two vectors, taking the absolute value of the result.
Equivalent to |a - b|.
|
inline |
Subtract two vectors, taking the absolute value of the result and adding a third vector.
Equivalent to a + |b - c|
|
inline |
Subtract two vectors, halving the result.
Equivalent to (a - b) / 2.
|
inline |
Ands the current vector with the given vector, then checks if nonzero.
If so, fills the lane with all ones
Equivalent to (a & b) != 0 ? 0xFFFFFFFF : 0x00000000
|
inline |
Convert the vector to an array of scalar values.
|
inline |
Perform a 2x2 matrix transpose on two vectors, returning two vectors of pairs.
Given a pair of vectors {{a0, a1, a2, a3}, the result is {{a0, b0, a2, b2},
|
inline |
Unzip two vectors, returning two vectors of pairs.
Given a pair of vector {a0, b0, a1, b1} and {a2, b2, a3, b3}, the result is {{a0, a1, a2, a3}, {b0, b1, b2, b3}}
|
inline |
Zip two vectors together, returning two vectors of pairs.
Given a pair of vector {a0, a1, a2, a3} and {b0, b1, b2, b3}, the result is {{a0, b0, a1, b1}, {a2, b2, a3, b3}}