5#elifdef __ARM_FEATURE_MVE
9#define SIMDE_ENABLE_NATIVE_ALIASES
26template <
typename T> nce T reinterpret(int8x16_t a);
27template <
typename T> nce T reinterpret(int16x8_t a);
28template <
typename T> nce T reinterpret(int32x4_t a);
29template <
typename T> nce T reinterpret(uint8x16_t a);
30template <
typename T> nce T reinterpret(uint16x8_t a);
31template <
typename T> nce T reinterpret(uint32x4_t a);
32template <
typename T> nce T reinterpret(uint64x2_t a);
33template <
typename T> nce T reinterpret(int64x2_t a);
34[[gnu::always_inline]] nce uint8x16_t add(uint8x16_t a, uint8x16_t b) {
return vaddq_u8(a, b); }
35[[gnu::always_inline]] nce uint8x16_t add_halve(uint8x16_t a, uint8x16_t b) {
return vhaddq_u8(a, b); }
36[[gnu::always_inline]] nce uint8x16_t add_halve_round(uint8x16_t a, uint8x16_t b) {
return vrhaddq_u8(a, b); }
37[[gnu::always_inline]] nce uint8x16_t add_saturate(uint8x16_t a, uint8x16_t b) {
return vqaddq_u8(a, b); }
38[[gnu::always_inline]] nce uint8x16_t multiply(uint8x16_t a, uint8x16_t b) {
return vmulq_u8(a, b); }
39[[gnu::always_inline]] nce uint8x16_t subtract(uint8x16_t a, uint8x16_t b) {
return vsubq_u8(a, b); }
40[[gnu::always_inline]] nce uint8x16_t subtract_halve(uint8x16_t a, uint8x16_t b) {
return vhsubq_u8(a, b); }
41[[gnu::always_inline]] nce uint8x16_t subtract_saturate(uint8x16_t a, uint8x16_t b) {
return vqsubq_u8(a, b); }
42[[gnu::always_inline]] nce uint8x16_t subtract_absolute(uint8x16_t a, uint8x16_t b) {
return vabdq_u8(a, b); }
43[[gnu::always_inline]] nce uint8x16_t max(uint8x16_t a, uint8x16_t b) {
return vmaxq_u8(a, b); }
44[[gnu::always_inline]] nce uint8x16_t min(uint8x16_t a, uint8x16_t b) {
return vminq_u8(a, b); }
45[[gnu::always_inline]] nce uint8x16_t shift_left(uint8x16_t a, int8x16_t b) {
return vshlq_u8(a, b); }
46[[gnu::always_inline]] nce uint8x16_t shift_left_saturate(uint8x16_t a, int8x16_t b) {
return vqshlq_u8(a, b); }
47template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_left_saturate(uint8x16_t a) {
return vqshlq_n_u8(a, n); }
48template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_left_insert(uint8x16_t a, uint8x16_t b) {
return vsliq_n_u8(a, b, n); }
49[[gnu::always_inline]] nce uint8x16_t shift_left_round(uint8x16_t a, int8x16_t b) {
return vrshlq_u8(a, b); }
50[[gnu::always_inline]] nce uint8x16_t shift_left_round_saturate(uint8x16_t a, int8x16_t b) {
return vqrshlq_u8(a, b); }
51template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_right(uint8x16_t a) {
return vshrq_n_u8(a, n); }
52template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_right_round(uint8x16_t a) {
return vrshrq_n_u8(a, n); }
53template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_right_insert(uint8x16_t a, uint8x16_t b) {
return vsriq_n_u8(a, b, n); }
54template <> [[gnu::always_inline]] nce int8x16_t reinterpret(uint8x16_t a) {
return vreinterpretq_s8_u8(a); }
55template <> [[gnu::always_inline]] nce int16x8_t reinterpret(uint8x16_t a) {
return vreinterpretq_s16_u8(a); }
56template <> [[gnu::always_inline]] nce int32x4_t reinterpret(uint8x16_t a) {
return vreinterpretq_s32_u8(a); }
57template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(uint8x16_t a) {
return vreinterpretq_u16_u8(a); }
58template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(uint8x16_t a) {
return vreinterpretq_u32_u8(a); }
59template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(uint8x16_t a) {
return vreinterpretq_u64_u8(a); }
60template <> [[gnu::always_inline]] nce int64x2_t reinterpret(uint8x16_t a) {
return vreinterpretq_s64_u8(a); }
61template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(uint8x16_t a) {
return a; }
62[[gnu::always_inline]] nce uint8x16_t bitwise_not(uint8x16_t a) {
return vmvnq_u8(a); }
63[[gnu::always_inline]] nce uint8x16_t bitwise_and(uint8x16_t a, uint8x16_t b) {
return vandq_u8(a, b); }
64[[gnu::always_inline]] nce uint8x16_t bitwise_or(uint8x16_t a, uint8x16_t b) {
return vorrq_u8(a, b); }
65[[gnu::always_inline]] nce uint8x16_t bitwise_xor(uint8x16_t a, uint8x16_t b) {
return veorq_u8(a, b); }
66[[gnu::always_inline]] nce uint8x16_t bitwise_or_not(uint8x16_t a, uint8x16_t b) {
return vornq_u8(a, b); }
67[[gnu::always_inline]] nce uint8x16_t count_leading_zero_bits(uint8x16_t a) {
return vclzq_u8(a); }
68[[gnu::always_inline]] nce uint8x16_t bitwise_clear(uint8x16_t a, uint8x16_t b) {
return vbicq_u8(a, b); }
69template <
int lane>[[gnu::always_inline]] nce uint8_t get_lane(uint8x16_t v) {
return vgetq_lane_u8(v, lane); }
70[[gnu::always_inline]] nce uint8x16_t reverse_64bit(uint8x16_t a) {
return vrev64q_u8(a); }
71[[gnu::always_inline]] nce uint8x16_t reverse_32bit(uint8x16_t a) {
return vrev32q_u8(a); }
72[[gnu::always_inline]] nce uint8x16_t reverse_16bit(uint8x16_t a) {
return vrev16q_u8(a); }
73[[gnu::always_inline]] nce int8x16_t add(int8x16_t a, int8x16_t b) {
return vaddq_s8(a, b); }
74[[gnu::always_inline]] nce int8x16_t add_halve(int8x16_t a, int8x16_t b) {
return vhaddq_s8(a, b); }
75[[gnu::always_inline]] nce int8x16_t add_halve_round(int8x16_t a, int8x16_t b) {
return vrhaddq_s8(a, b); }
76[[gnu::always_inline]] nce int8x16_t add_saturate(int8x16_t a, int8x16_t b) {
return vqaddq_s8(a, b); }
77[[gnu::always_inline]] nce int8x16_t multiply(int8x16_t a, int8x16_t b) {
return vmulq_s8(a, b); }
78[[gnu::always_inline]] nce int8x16_t subtract(int8x16_t a, int8x16_t b) {
return vsubq_s8(a, b); }
79[[gnu::always_inline]] nce int8x16_t subtract_halve(int8x16_t a, int8x16_t b) {
return vhsubq_s8(a, b); }
80[[gnu::always_inline]] nce int8x16_t subtract_saturate(int8x16_t a, int8x16_t b) {
return vqsubq_s8(a, b); }
81[[gnu::always_inline]] nce int8x16_t subtract_absolute(int8x16_t a, int8x16_t b) {
return vabdq_s8(a, b); }
82[[gnu::always_inline]] nce int8x16_t abs(int8x16_t a) {
return vabsq_s8(a); }
83[[gnu::always_inline]] nce int8x16_t abs_saturate(int8x16_t a) {
return vqabsq_s8(a); }
84[[gnu::always_inline]] nce int8x16_t max(int8x16_t a, int8x16_t b) {
return vmaxq_s8(a, b); }
85[[gnu::always_inline]] nce int8x16_t min(int8x16_t a, int8x16_t b) {
return vminq_s8(a, b); }
86[[gnu::always_inline]] nce int8x16_t shift_left(int8x16_t a, int8x16_t b) {
return vshlq_s8(a, b); }
87[[gnu::always_inline]] nce int8x16_t shift_left_saturate(int8x16_t a, int8x16_t b) {
return vqshlq_s8(a, b); }
88template <
int n>[[gnu::always_inline]] nce int8x16_t shift_left_saturate(int8x16_t a) {
return vqshlq_n_s8(a, n); }
89[[gnu::always_inline]] nce int8x16_t shift_left_round(int8x16_t a, int8x16_t b) {
return vrshlq_s8(a, b); }
90[[gnu::always_inline]] nce int8x16_t shift_left_round_saturate(int8x16_t a, int8x16_t b) {
return vqrshlq_s8(a, b); }
91template <
int n>[[gnu::always_inline]] nce int8x16_t shift_left_insert(int8x16_t a, int8x16_t b) {
return vsliq_n_s8(a, b, n); }
92template <
int n>[[gnu::always_inline]] nce int8x16_t shift_right(int8x16_t a) {
return vshrq_n_s8(a, n); }
93template <
int n>[[gnu::always_inline]] nce int8x16_t shift_right_round(int8x16_t a) {
return vrshrq_n_s8(a, n); }
94template <
int n>[[gnu::always_inline]] nce int8x16_t shift_right_insert(int8x16_t a, int8x16_t b) {
return vsriq_n_s8(a, b, n); }
95template <> [[gnu::always_inline]] nce int16x8_t reinterpret(int8x16_t a) {
return vreinterpretq_s16_s8(a); }
96template <> [[gnu::always_inline]] nce int32x4_t reinterpret(int8x16_t a) {
return vreinterpretq_s32_s8(a); }
97template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(int8x16_t a) {
return vreinterpretq_u8_s8(a); }
98template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(int8x16_t a) {
return vreinterpretq_u16_s8(a); }
99template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(int8x16_t a) {
return vreinterpretq_u32_s8(a); }
100template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(int8x16_t a) {
return vreinterpretq_u64_s8(a); }
101template <> [[gnu::always_inline]] nce int64x2_t reinterpret(int8x16_t a) {
return vreinterpretq_s64_s8(a); }
102template <> [[gnu::always_inline]] nce int8x16_t reinterpret(int8x16_t a) {
return a; }
103[[gnu::always_inline]] nce int8x16_t negate(int8x16_t a) {
return vnegq_s8(a); }
104[[gnu::always_inline]] nce int8x16_t negate_saturate(int8x16_t a) {
return vqnegq_s8(a); }
105[[gnu::always_inline]] nce int8x16_t bitwise_not(int8x16_t a) {
return vmvnq_s8(a); }
106[[gnu::always_inline]] nce int8x16_t bitwise_and(int8x16_t a, int8x16_t b) {
return vandq_s8(a, b); }
107[[gnu::always_inline]] nce int8x16_t bitwise_or(int8x16_t a, int8x16_t b) {
return vorrq_s8(a, b); }
108[[gnu::always_inline]] nce int8x16_t bitwise_xor(int8x16_t a, int8x16_t b) {
return veorq_s8(a, b); }
109[[gnu::always_inline]] nce int8x16_t bitwise_or_not(int8x16_t a, int8x16_t b) {
return vornq_s8(a, b); }
110[[gnu::always_inline]] nce int8x16_t count_leading_sign_bits(int8x16_t a) {
return vclsq_s8(a); }
111[[gnu::always_inline]] nce int8x16_t count_leading_zero_bits(int8x16_t a) {
return vclzq_s8(a); }
112[[gnu::always_inline]] nce int8x16_t bitwise_clear(int8x16_t a, int8x16_t b) {
return vbicq_s8(a, b); }
113template <
int lane>[[gnu::always_inline]] nce int8_t get_lane(int8x16_t v) {
return vgetq_lane_s8(v, lane); }
114[[gnu::always_inline]] nce int8x16_t reverse_64bit(int8x16_t a) {
return vrev64q_s8(a); }
115[[gnu::always_inline]] nce int8x16_t reverse_32bit(int8x16_t a) {
return vrev32q_s8(a); }
116[[gnu::always_inline]] nce int8x16_t reverse_16bit(int8x16_t a) {
return vrev16q_s8(a); }
117[[gnu::always_inline]] nce uint16x8_t add(uint16x8_t a, uint16x8_t b) {
return vaddq_u16(a, b); }
118[[gnu::always_inline]] nce uint16x8_t add_halve(uint16x8_t a, uint16x8_t b) {
return vhaddq_u16(a, b); }
119[[gnu::always_inline]] nce uint16x8_t add_halve_round(uint16x8_t a, uint16x8_t b) {
return vrhaddq_u16(a, b); }
120[[gnu::always_inline]] nce uint16x8_t add_saturate(uint16x8_t a, uint16x8_t b) {
return vqaddq_u16(a, b); }
121[[gnu::always_inline]] nce uint16x8_t multiply(uint16x8_t a, uint16x8_t b) {
return vmulq_u16(a, b); }
122[[gnu::always_inline]] nce uint16x8_t subtract(uint16x8_t a, uint16x8_t b) {
return vsubq_u16(a, b); }
123[[gnu::always_inline]] nce uint16x8_t subtract_halve(uint16x8_t a, uint16x8_t b) {
return vhsubq_u16(a, b); }
124[[gnu::always_inline]] nce uint16x8_t subtract_saturate(uint16x8_t a, uint16x8_t b) {
return vqsubq_u16(a, b); }
125[[gnu::always_inline]] nce uint16x8_t subtract_absolute(uint16x8_t a, uint16x8_t b) {
return vabdq_u16(a, b); }
126[[gnu::always_inline]] nce uint16x8_t max(uint16x8_t a, uint16x8_t b) {
return vmaxq_u16(a, b); }
127[[gnu::always_inline]] nce uint16x8_t min(uint16x8_t a, uint16x8_t b) {
return vminq_u16(a, b); }
128[[gnu::always_inline]] nce uint16x8_t shift_left_saturate(uint16x8_t a, int16x8_t b) {
return vqshlq_u16(a, b); }
129template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_left_saturate(uint16x8_t a) {
return vqshlq_n_u16(a, n); }
130template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_left_insert(uint16x8_t a, uint16x8_t b) {
return vsliq_n_u16(a, b, n); }
131[[gnu::always_inline]] nce uint16x8_t shift_left_round(uint16x8_t a, int16x8_t b) {
return vrshlq_u16(a, b); }
132[[gnu::always_inline]] nce uint16x8_t shift_left_round_saturate(uint16x8_t a, int16x8_t b) {
return vqrshlq_u16(a, b); }
133template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_right(uint16x8_t a) {
return vshrq_n_u16(a, n); }
134template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_right_round(uint16x8_t a) {
return vrshrq_n_u16(a, n); }
135template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_right_insert(uint16x8_t a, uint16x8_t b) {
return vsriq_n_u16(a, b, n); }
136template <> [[gnu::always_inline]] nce int8x16_t reinterpret(uint16x8_t a) {
return vreinterpretq_s8_u16(a); }
137template <> [[gnu::always_inline]] nce int16x8_t reinterpret(uint16x8_t a) {
return vreinterpretq_s16_u16(a); }
138template <> [[gnu::always_inline]] nce int32x4_t reinterpret(uint16x8_t a) {
return vreinterpretq_s32_u16(a); }
139template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(uint16x8_t a) {
return vreinterpretq_u8_u16(a); }
140template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(uint16x8_t a) {
return vreinterpretq_u32_u16(a); }
141template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(uint16x8_t a) {
return vreinterpretq_u64_u16(a); }
142template <> [[gnu::always_inline]] nce int64x2_t reinterpret(uint16x8_t a) {
return vreinterpretq_s64_u16(a); }
143template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(uint16x8_t a) {
return a; }
144[[gnu::always_inline]] nce uint16x8_t shift_left(uint16x8_t a, int16x8_t b) {
return vshlq_u16(a, b); }
145[[gnu::always_inline]] nce uint16x8_t bitwise_not(uint16x8_t a) {
return vmvnq_u16(a); }
146[[gnu::always_inline]] nce uint16x8_t bitwise_and(uint16x8_t a, uint16x8_t b) {
return vandq_u16(a, b); }
147[[gnu::always_inline]] nce uint16x8_t bitwise_or(uint16x8_t a, uint16x8_t b) {
return vorrq_u16(a, b); }
148[[gnu::always_inline]] nce uint16x8_t bitwise_xor(uint16x8_t a, uint16x8_t b) {
return veorq_u16(a, b); }
149[[gnu::always_inline]] nce uint16x8_t bitwise_or_not(uint16x8_t a, uint16x8_t b) {
return vornq_u16(a, b); }
150[[gnu::always_inline]] nce uint16x8_t count_leading_zero_bits(uint16x8_t a) {
return vclzq_u16(a); }
151[[gnu::always_inline]] nce uint16x8_t bitwise_clear(uint16x8_t a, uint16x8_t b) {
return vbicq_u16(a, b); }
152template <
int lane>[[gnu::always_inline]] nce uint16_t get_lane(uint16x8_t v) {
return vgetq_lane_u16(v, lane); }
153[[gnu::always_inline]] nce uint16x8_t reverse_64bit(uint16x8_t a) {
return vrev64q_u16(a); }
154[[gnu::always_inline]] nce uint16x8_t reverse_32bit(uint16x8_t a) {
return vrev32q_u16(a); }
155[[gnu::always_inline]] nce uint16x8_t multiply(uint16x8_t a, uint16_t b) {
return vmulq_n_u16(a, b); }
156[[gnu::always_inline]] nce int16x8_t add(int16x8_t a, int16x8_t b) {
return vaddq_s16(a, b); }
157[[gnu::always_inline]] nce int16x8_t add_halve(int16x8_t a, int16x8_t b) {
return vhaddq_s16(a, b); }
158[[gnu::always_inline]] nce int16x8_t add_halve_round(int16x8_t a, int16x8_t b) {
return vrhaddq_s16(a, b); }
159[[gnu::always_inline]] nce int16x8_t add_saturate(int16x8_t a, int16x8_t b) {
return vqaddq_s16(a, b); }
160[[gnu::always_inline]] nce int16x8_t multiply(int16x8_t a, int16x8_t b) {
return vmulq_s16(a, b); }
161[[gnu::always_inline]] nce int16x8_t multiply_double_saturate_high(int16x8_t a, int16x8_t b) {
return vqdmulhq_s16(a, b); }
162[[gnu::always_inline]] nce int16x8_t multiply_double_round_saturate_high(int16x8_t a, int16x8_t b) {
return vqrdmulhq_s16(a, b); }
163[[gnu::always_inline]] nce int16x8_t subtract(int16x8_t a, int16x8_t b) {
return vsubq_s16(a, b); }
164[[gnu::always_inline]] nce int16x8_t subtract_halve(int16x8_t a, int16x8_t b) {
return vhsubq_s16(a, b); }
165[[gnu::always_inline]] nce int16x8_t subtract_saturate(int16x8_t a, int16x8_t b) {
return vqsubq_s16(a, b); }
166[[gnu::always_inline]] nce int16x8_t subtract_absolute(int16x8_t a, int16x8_t b) {
return vabdq_s16(a, b); }
167[[gnu::always_inline]] nce int16x8_t multiply_double_saturate_high(int16x8_t a, int16_t b) {
return vqdmulhq_n_s16(a, b); }
168[[gnu::always_inline]] nce int16x8_t multiply_double_round_saturate_high(int16x8_t a, int16_t b) {
return vqrdmulhq_n_s16(a, b); }
169[[gnu::always_inline]] nce int16x8_t abs(int16x8_t a) {
return vabsq_s16(a); }
170[[gnu::always_inline]] nce int16x8_t abs_saturate(int16x8_t a) {
return vqabsq_s16(a); }
171[[gnu::always_inline]] nce int16x8_t max(int16x8_t a, int16x8_t b) {
return vmaxq_s16(a, b); }
172[[gnu::always_inline]] nce int16x8_t min(int16x8_t a, int16x8_t b) {
return vminq_s16(a, b); }
173[[gnu::always_inline]] nce int16x8_t shift_left(int16x8_t a, int16x8_t b) {
return vshlq_s16(a, b); }
174[[gnu::always_inline]] nce int16x8_t shift_left_saturate(int16x8_t a, int16x8_t b) {
return vqshlq_s16(a, b); }
175template <
int n>[[gnu::always_inline]] nce int16x8_t shift_left_saturate(int16x8_t a) {
return vqshlq_n_s16(a, n); }
176[[gnu::always_inline]] nce int16x8_t shift_left_round(int16x8_t a, int16x8_t b) {
return vrshlq_s16(a, b); }
177[[gnu::always_inline]] nce int16x8_t shift_left_round_saturate(int16x8_t a, int16x8_t b) {
return vqrshlq_s16(a, b); }
178template <
int n>[[gnu::always_inline]] nce int16x8_t shift_left_insert(int16x8_t a, int16x8_t b) {
return vsliq_n_s16(a, b, n); }
179template <
int n>[[gnu::always_inline]] nce int16x8_t shift_right(int16x8_t a) {
return vshrq_n_s16(a, n); }
180template <
int n>[[gnu::always_inline]] nce int16x8_t shift_right_round(int16x8_t a) {
return vrshrq_n_s16(a, n); }
181template <
int n>[[gnu::always_inline]] nce int16x8_t shift_right_insert(int16x8_t a, int16x8_t b) {
return vsriq_n_s16(a, b, n); }
182template <> [[gnu::always_inline]] nce int8x16_t reinterpret(int16x8_t a) {
return vreinterpretq_s8_s16(a); }
183template <> [[gnu::always_inline]] nce int32x4_t reinterpret(int16x8_t a) {
return vreinterpretq_s32_s16(a); }
184template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(int16x8_t a) {
return vreinterpretq_u8_s16(a); }
185template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(int16x8_t a) {
return vreinterpretq_u16_s16(a); }
186template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(int16x8_t a) {
return vreinterpretq_u32_s16(a); }
187template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(int16x8_t a) {
return vreinterpretq_u64_s16(a); }
188template <> [[gnu::always_inline]] nce int64x2_t reinterpret(int16x8_t a) {
return vreinterpretq_s64_s16(a); }
189template <> [[gnu::always_inline]] nce int16x8_t reinterpret(int16x8_t a) {
return a; }
190[[gnu::always_inline]] nce int16x8_t negate(int16x8_t a) {
return vnegq_s16(a); }
191[[gnu::always_inline]] nce int16x8_t negate_saturate(int16x8_t a) {
return vqnegq_s16(a); }
192[[gnu::always_inline]] nce int16x8_t bitwise_not(int16x8_t a) {
return vmvnq_s16(a); }
193[[gnu::always_inline]] nce int16x8_t bitwise_and(int16x8_t a, int16x8_t b) {
return vandq_s16(a, b); }
194[[gnu::always_inline]] nce int16x8_t bitwise_or(int16x8_t a, int16x8_t b) {
return vorrq_s16(a, b); }
195[[gnu::always_inline]] nce int16x8_t bitwise_xor(int16x8_t a, int16x8_t b) {
return veorq_s16(a, b); }
196[[gnu::always_inline]] nce int16x8_t bitwise_or_not(int16x8_t a, int16x8_t b) {
return vornq_s16(a, b); }
197[[gnu::always_inline]] nce int16x8_t count_leading_sign_bits(int16x8_t a) {
return vclsq_s16(a); }
198[[gnu::always_inline]] nce int16x8_t count_leading_zero_bits(int16x8_t a) {
return vclzq_s16(a); }
199[[gnu::always_inline]] nce int16x8_t bitwise_clear(int16x8_t a, int16x8_t b) {
return vbicq_s16(a, b); }
200template <
int lane>[[gnu::always_inline]] nce int16_t get_lane(int16x8_t v) {
return vgetq_lane_s16(v, lane); }
201[[gnu::always_inline]] nce int16x8_t reverse_64bit(int16x8_t a) {
return vrev64q_s16(a); }
202[[gnu::always_inline]] nce int16x8_t reverse_32bit(int16x8_t a) {
return vrev32q_s16(a); }
203[[gnu::always_inline]] nce int16x8_t multiply(int16x8_t a, int16_t b) {
return vmulq_n_s16(a, b); }
204[[gnu::always_inline]] nce int32x4_t add(int32x4_t a, int32x4_t b) {
return vaddq_s32(a, b); }
205[[gnu::always_inline]] nce int32x4_t add_halve(int32x4_t a, int32x4_t b) {
return vhaddq_s32(a, b); }
206[[gnu::always_inline]] nce int32x4_t add_halve_round(int32x4_t a, int32x4_t b) {
return vrhaddq_s32(a, b); }
207[[gnu::always_inline]] nce int32x4_t add_saturate(int32x4_t a, int32x4_t b) {
return vqaddq_s32(a, b); }
208[[gnu::always_inline]] nce int32x4_t multiply(int32x4_t a, int32x4_t b) {
return vmulq_s32(a, b); }
209[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_high(int32x4_t a, int32x4_t b) {
return vqdmulhq_s32(a, b); }
210[[gnu::always_inline]] nce int32x4_t multiply_double_round_saturate_high(int32x4_t a, int32x4_t b) {
return vqrdmulhq_s32(a, b); }
211[[gnu::always_inline]] nce int32x4_t subtract(int32x4_t a, int32x4_t b) {
return vsubq_s32(a, b); }
212[[gnu::always_inline]] nce int32x4_t subtract_halve(int32x4_t a, int32x4_t b) {
return vhsubq_s32(a, b); }
213[[gnu::always_inline]] nce int32x4_t subtract_saturate(int32x4_t a, int32x4_t b) {
return vqsubq_s32(a, b); }
214[[gnu::always_inline]] nce int32x4_t subtract_absolute(int32x4_t a, int32x4_t b) {
return vabdq_s32(a, b); }
215[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_high(int32x4_t a, int32_t b) {
return vqdmulhq_n_s32(a, b); }
216[[gnu::always_inline]] nce int32x4_t multiply_double_round_saturate_high(int32x4_t a, int32_t b) {
return vqrdmulhq_n_s32(a, b); }
217[[gnu::always_inline]] nce int32x4_t abs(int32x4_t a) {
return vabsq_s32(a); }
218[[gnu::always_inline]] nce int32x4_t abs_saturate(int32x4_t a) {
return vqabsq_s32(a); }
219[[gnu::always_inline]] nce int32x4_t max(int32x4_t a, int32x4_t b) {
return vmaxq_s32(a, b); }
220[[gnu::always_inline]] nce int32x4_t min(int32x4_t a, int32x4_t b) {
return vminq_s32(a, b); }
221[[gnu::always_inline]] nce int32x4_t shift_left(int32x4_t a, int32x4_t b) {
return vshlq_s32(a, b); }
222[[gnu::always_inline]] nce int32x4_t shift_left_saturate(int32x4_t a, int32x4_t b) {
return vqshlq_s32(a, b); }
223template <
int n>[[gnu::always_inline]] nce int32x4_t shift_left_saturate(int32x4_t a) {
return vqshlq_n_s32(a, n); }
224[[gnu::always_inline]] nce int32x4_t shift_left_round(int32x4_t a, int32x4_t b) {
return vrshlq_s32(a, b); }
225[[gnu::always_inline]] nce int32x4_t shift_left_round_saturate(int32x4_t a, int32x4_t b) {
return vqrshlq_s32(a, b); }
226template <
int n>[[gnu::always_inline]] nce int32x4_t shift_left_insert(int32x4_t a, int32x4_t b) {
return vsliq_n_s32(a, b, n); }
227template <
int n>[[gnu::always_inline]] nce int32x4_t shift_right(int32x4_t a) {
return vshrq_n_s32(a, n); }
228template <
int n>[[gnu::always_inline]] nce int32x4_t shift_right_round(int32x4_t a) {
return vrshrq_n_s32(a, n); }
229template <
int n>[[gnu::always_inline]] nce int32x4_t shift_right_insert(int32x4_t a, int32x4_t b) {
return vsriq_n_s32(a, b, n); }
230template <> [[gnu::always_inline]] nce int8x16_t reinterpret(int32x4_t a) {
return vreinterpretq_s8_s32(a); }
231template <> [[gnu::always_inline]] nce int16x8_t reinterpret(int32x4_t a) {
return vreinterpretq_s16_s32(a); }
232template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(int32x4_t a) {
return vreinterpretq_u8_s32(a); }
233template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(int32x4_t a) {
return vreinterpretq_u16_s32(a); }
234template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(int32x4_t a) {
return vreinterpretq_u32_s32(a); }
235template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(int32x4_t a) {
return vreinterpretq_u64_s32(a); }
236template <> [[gnu::always_inline]] nce int64x2_t reinterpret(int32x4_t a) {
return vreinterpretq_s64_s32(a); }
237template <> [[gnu::always_inline]] nce int32x4_t reinterpret(int32x4_t a) {
return a; }
238[[gnu::always_inline]] nce int32x4_t negate(int32x4_t a) {
return vnegq_s32(a); }
239[[gnu::always_inline]] nce int32x4_t negate_saturate(int32x4_t a) {
return vqnegq_s32(a); }
240[[gnu::always_inline]] nce int32x4_t bitwise_not(int32x4_t a) {
return vmvnq_s32(a); }
241[[gnu::always_inline]] nce int32x4_t bitwise_and(int32x4_t a, int32x4_t b) {
return vandq_s32(a, b); }
242[[gnu::always_inline]] nce int32x4_t bitwise_or(int32x4_t a, int32x4_t b) {
return vorrq_s32(a, b); }
243[[gnu::always_inline]] nce int32x4_t bitwise_xor(int32x4_t a, int32x4_t b) {
return veorq_s32(a, b); }
244[[gnu::always_inline]] nce int32x4_t bitwise_or_not(int32x4_t a, int32x4_t b) {
return vornq_s32(a, b); }
245[[gnu::always_inline]] nce int32x4_t count_leading_sign_bits(int32x4_t a) {
return vclsq_s32(a); }
246[[gnu::always_inline]] nce int32x4_t count_leading_zero_bits(int32x4_t a) {
return vclzq_s32(a); }
247[[gnu::always_inline]] nce int32x4_t bitwise_clear(int32x4_t a, int32x4_t b) {
return vbicq_s32(a, b); }
248template <
int lane>[[gnu::always_inline]] nce int32_t get_lane(int32x4_t v) {
return vgetq_lane_s32(v, lane); }
249[[gnu::always_inline]] nce int32x4_t reverse_64bit(int32x4_t a) {
return vrev64q_s32(a); }
250[[gnu::always_inline]] nce int32x4_t multiply(int32x4_t a, int32_t b) {
return vmulq_n_s32(a, b); }
251template <> [[gnu::always_inline]] nce int8x16_t reinterpret(uint64x2_t a) {
return vreinterpretq_s8_u64(a); }
252template <> [[gnu::always_inline]] nce int16x8_t reinterpret(uint64x2_t a) {
return vreinterpretq_s16_u64(a); }
253template <> [[gnu::always_inline]] nce int32x4_t reinterpret(uint64x2_t a) {
return vreinterpretq_s32_u64(a); }
254template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(uint64x2_t a) {
return vreinterpretq_u8_u64(a); }
255template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(uint64x2_t a) {
return vreinterpretq_u16_u64(a); }
256template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(uint64x2_t a) {
return vreinterpretq_u32_u64(a); }
257template <> [[gnu::always_inline]] nce int64x2_t reinterpret(uint64x2_t a) {
return vreinterpretq_s64_u64(a); }
258template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(uint64x2_t a) {
return a; }
259template <
int lane>[[gnu::always_inline]] nce uint64_t get_lane(uint64x2_t v) {
return vgetq_lane_u64(v, lane); }
260[[gnu::always_inline]] nce uint32x4_t shift_left(uint32x4_t a, int32x4_t b) {
return vshlq_u32(a, b); }
261[[gnu::always_inline]] nce uint32x4_t add(uint32x4_t a, uint32x4_t b) {
return vaddq_u32(a, b); }
262[[gnu::always_inline]] nce uint32x4_t add_halve(uint32x4_t a, uint32x4_t b) {
return vhaddq_u32(a, b); }
263[[gnu::always_inline]] nce uint32x4_t add_halve_round(uint32x4_t a, uint32x4_t b) {
return vrhaddq_u32(a, b); }
264[[gnu::always_inline]] nce uint32x4_t add_saturate(uint32x4_t a, uint32x4_t b) {
return vqaddq_u32(a, b); }
265[[gnu::always_inline]] nce uint32x4_t multiply(uint32x4_t a, uint32x4_t b) {
return vmulq_u32(a, b); }
266[[gnu::always_inline]] nce uint32x4_t subtract(uint32x4_t a, uint32x4_t b) {
return vsubq_u32(a, b); }
267[[gnu::always_inline]] nce uint32x4_t subtract_halve(uint32x4_t a, uint32x4_t b) {
return vhsubq_u32(a, b); }
268[[gnu::always_inline]] nce uint32x4_t subtract_saturate(uint32x4_t a, uint32x4_t b) {
return vqsubq_u32(a, b); }
269[[gnu::always_inline]] nce uint32x4_t subtract_absolute(uint32x4_t a, uint32x4_t b) {
return vabdq_u32(a, b); }
270[[gnu::always_inline]] nce uint32x4_t max(uint32x4_t a, uint32x4_t b) {
return vmaxq_u32(a, b); }
271[[gnu::always_inline]] nce uint32x4_t min(uint32x4_t a, uint32x4_t b) {
return vminq_u32(a, b); }
272[[gnu::always_inline]] nce uint32x4_t shift_left_saturate(uint32x4_t a, int32x4_t b) {
return vqshlq_u32(a, b); }
273template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_left_saturate(uint32x4_t a) {
return vqshlq_n_u32(a, n); }
274[[gnu::always_inline]] nce uint32x4_t shift_left_round(uint32x4_t a, int32x4_t b) {
return vrshlq_u32(a, b); }
275[[gnu::always_inline]] nce uint32x4_t shift_left_round_saturate(uint32x4_t a, int32x4_t b) {
return vqrshlq_u32(a, b); }
276template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_left_insert(uint32x4_t a, uint32x4_t b) {
return vsliq_n_u32(a, b, n); }
277template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_right(uint32x4_t a) {
return vshrq_n_u32(a, n); }
278template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_right_round(uint32x4_t a) {
return vrshrq_n_u32(a, n); }
279template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_right_insert(uint32x4_t a, uint32x4_t b) {
return vsriq_n_u32(a, b, n); }
280template <> [[gnu::always_inline]] nce int8x16_t reinterpret(uint32x4_t a) {
return vreinterpretq_s8_u32(a); }
281template <> [[gnu::always_inline]] nce int16x8_t reinterpret(uint32x4_t a) {
return vreinterpretq_s16_u32(a); }
282template <> [[gnu::always_inline]] nce int32x4_t reinterpret(uint32x4_t a) {
return vreinterpretq_s32_u32(a); }
283template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(uint32x4_t a) {
return vreinterpretq_u8_u32(a); }
284template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(uint32x4_t a) {
return vreinterpretq_u16_u32(a); }
285template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(uint32x4_t a) {
return vreinterpretq_u64_u32(a); }
286template <> [[gnu::always_inline]] nce int64x2_t reinterpret(uint32x4_t a) {
return vreinterpretq_s64_u32(a); }
287template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(uint32x4_t a) {
return a; }
288[[gnu::always_inline]] nce uint32x4_t bitwise_not(uint32x4_t a) {
return vmvnq_u32(a); }
289[[gnu::always_inline]] nce uint32x4_t bitwise_and(uint32x4_t a, uint32x4_t b) {
return vandq_u32(a, b); }
290[[gnu::always_inline]] nce uint32x4_t bitwise_or(uint32x4_t a, uint32x4_t b) {
return vorrq_u32(a, b); }
291[[gnu::always_inline]] nce uint32x4_t bitwise_xor(uint32x4_t a, uint32x4_t b) {
return veorq_u32(a, b); }
292[[gnu::always_inline]] nce uint32x4_t bitwise_or_not(uint32x4_t a, uint32x4_t b) {
return vornq_u32(a, b); }
293[[gnu::always_inline]] nce uint32x4_t count_leading_zero_bits(uint32x4_t a) {
return vclzq_u32(a); }
294[[gnu::always_inline]] nce uint32x4_t bitwise_clear(uint32x4_t a, uint32x4_t b) {
return vbicq_u32(a, b); }
295template <
int lane>[[gnu::always_inline]] nce uint32_t get_lane(uint32x4_t v) {
return vgetq_lane_u32(v, lane); }
296[[gnu::always_inline]] nce uint32x4_t reverse_64bit(uint32x4_t a) {
return vrev64q_u32(a); }
297[[gnu::always_inline]] nce uint32x4_t multiply(uint32x4_t a, uint32_t b) {
return vmulq_n_u32(a, b); }
298template <> [[gnu::always_inline]] nce int8x16_t reinterpret(int64x2_t a) {
return vreinterpretq_s8_s64(a); }
299template <> [[gnu::always_inline]] nce int16x8_t reinterpret(int64x2_t a) {
return vreinterpretq_s16_s64(a); }
300template <> [[gnu::always_inline]] nce int32x4_t reinterpret(int64x2_t a) {
return vreinterpretq_s32_s64(a); }
301template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(int64x2_t a) {
return vreinterpretq_u8_s64(a); }
302template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(int64x2_t a) {
return vreinterpretq_u16_s64(a); }
303template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(int64x2_t a) {
return vreinterpretq_u32_s64(a); }
304template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(int64x2_t a) {
return vreinterpretq_u64_s64(a); }
305template <> [[gnu::always_inline]] nce int64x2_t reinterpret(int64x2_t a) {
return a; }
306template <
int lane>[[gnu::always_inline]] nce int64_t get_lane(int64x2_t v) {
return vgetq_lane_s64(v, lane); }
307template <
int lane>[[gnu::always_inline]] nce uint8x16_t set_lane(uint8_t a, uint8x16_t v) {
return vsetq_lane_u8(a, v, lane); }
308template <
int lane>[[gnu::always_inline]] nce uint16x8_t set_lane(uint16_t a, uint16x8_t v) {
return vsetq_lane_u16(a, v, lane); }
309template <
int lane>[[gnu::always_inline]] nce uint32x4_t set_lane(uint32_t a, uint32x4_t v) {
return vsetq_lane_u32(a, v, lane); }
310template <
int lane>[[gnu::always_inline]] nce uint64x2_t set_lane(uint64_t a, uint64x2_t v) {
return vsetq_lane_u64(a, v, lane); }
311template <
int lane>[[gnu::always_inline]] nce int8x16_t set_lane(int8_t a, int8x16_t v) {
return vsetq_lane_s8(a, v, lane); }
312template <
int lane>[[gnu::always_inline]] nce int16x8_t set_lane(int16_t a, int16x8_t v) {
return vsetq_lane_s16(a, v, lane); }
313template <
int lane>[[gnu::always_inline]] nce int32x4_t set_lane(int32_t a, int32x4_t v) {
return vsetq_lane_s32(a, v, lane); }
314template <
int lane>[[gnu::always_inline]] nce int64x2_t set_lane(int64_t a, int64x2_t v) {
return vsetq_lane_s64(a, v, lane); }
315[[gnu::always_inline]]
inline void store1(int8_t *ptr, int8x16_t val) {
return vst1q_s8(ptr, val); }
316[[gnu::always_inline]]
inline void store1(int16_t *ptr, int16x8_t val) {
return vst1q_s16(ptr, val); }
317[[gnu::always_inline]]
inline void store1(int32_t *ptr, int32x4_t val) {
return vst1q_s32(ptr, val); }
318[[gnu::always_inline]]
inline void store1(uint8_t *ptr, uint8x16_t val) {
return vst1q_u8(ptr, val); }
319[[gnu::always_inline]]
inline void store1(uint16_t *ptr, uint16x8_t val) {
return vst1q_u16(ptr, val); }
320[[gnu::always_inline]]
inline void store1(uint32_t *ptr, uint32x4_t val) {
return vst1q_u32(ptr, val); }