2#include "arm_simd/shared/vfpv3_int.hpp"
4#include "arm_simd/shared/vfpv3_float.hpp"
16template <
typename T> nce T convert(float32x2_t a);
17template <
typename T> nce T convert(int32x2_t a);
18template <
typename T> nce T convert(uint32x2_t a);
19template <
typename T> nce T reinterpret(int8x8_t a);
20template <
typename T> nce T reinterpret(int16x4_t a);
21template <
typename T> nce T reinterpret(int32x2_t a);
22template <
typename T> nce T reinterpret(float32x2_t a);
23template <
typename T> nce T reinterpret(uint8x8_t a);
24template <
typename T> nce T reinterpret(uint16x4_t a);
25template <
typename T> nce T reinterpret(uint32x2_t a);
26template <
typename T> nce T reinterpret(poly8x8_t a);
27template <
typename T> nce T reinterpret(poly16x4_t a);
28template <
typename T> nce T reinterpret(uint64x1_t a);
29template <
typename T> nce T reinterpret(int64x1_t a);
30template <
typename T> nce T reinterpret(poly8x16_t a);
31template <
typename T> nce T reinterpret(poly16x8_t a);
32template <
typename T> nce T create(uint64_t a);
33template <
typename T> nce T duplicate(int8_t value);
34template <
typename T> nce T duplicate(int16_t value);
35template <
typename T> nce T duplicate(int32_t value);
36template <
typename T> nce T duplicate(int64_t value);
37template <
typename T> nce T duplicate(uint8_t value);
38template <
typename T> nce T duplicate(uint16_t value);
39template <
typename T> nce T duplicate(uint32_t value);
40template <
typename T> nce T duplicate(uint64_t value);
41template <
typename T> nce T duplicate(float32_t value);
42template <
typename T> nce T duplicate(poly8_t value);
43template <
typename T> nce T duplicate(poly16_t value);
44template <
typename T> nce T load1(int8_t
const *ptr);
45template <
typename T> nce T load1(int16_t
const *ptr);
46template <
typename T> nce T load1(int32_t
const *ptr);
47template <
typename T> nce T load1(int64_t
const *ptr);
48template <
typename T> nce T load1(uint8_t
const *ptr);
49template <
typename T> nce T load1(uint16_t
const *ptr);
50template <
typename T> nce T load1(uint32_t
const *ptr);
51template <
typename T> nce T load1(uint64_t
const *ptr);
52template <
typename T> nce T load1(float32_t
const *ptr);
53template <
typename T> nce T load1(poly8_t
const *ptr);
54template <
typename T> nce T load1(poly16_t
const *ptr);
55template <
typename T> nce T load1_duplicate(int8_t
const *ptr);
56template <
typename T> nce T load1_duplicate(int16_t
const *ptr);
57template <
typename T> nce T load1_duplicate(int32_t
const *ptr);
58template <
typename T> nce T load1_duplicate(int64_t
const *ptr);
59template <
typename T> nce T load1_duplicate(uint8_t
const *ptr);
60template <
typename T> nce T load1_duplicate(uint16_t
const *ptr);
61template <
typename T> nce T load1_duplicate(uint32_t
const *ptr);
62template <
typename T> nce T load1_duplicate(uint64_t
const *ptr);
63template <
typename T> nce T load1_duplicate(float32_t
const *ptr);
64template <
typename T> nce T load1_duplicate(poly8_t
const *ptr);
65template <
typename T> nce T load1_duplicate(poly16_t
const *ptr);
66template <
typename T> nce T load2(int8_t
const *ptr);
67template <
typename T> nce T load2(int16_t
const *ptr);
68template <
typename T> nce T load2(int32_t
const *ptr);
69template <
typename T> nce T load2(uint8_t
const *ptr);
70template <
typename T> nce T load2(uint16_t
const *ptr);
71template <
typename T> nce T load2(uint32_t
const *ptr);
72template <
typename T> nce T load2(float32_t
const *ptr);
73template <
typename T> nce T load2(poly8_t
const *ptr);
74template <
typename T> nce T load2(poly16_t
const *ptr);
75template <
typename T> nce T load3(int8_t
const *ptr);
76template <
typename T> nce T load3(int16_t
const *ptr);
77template <
typename T> nce T load3(int32_t
const *ptr);
78template <
typename T> nce T load3(uint8_t
const *ptr);
79template <
typename T> nce T load3(uint16_t
const *ptr);
80template <
typename T> nce T load3(uint32_t
const *ptr);
81template <
typename T> nce T load3(float32_t
const *ptr);
82template <
typename T> nce T load3(poly8_t
const *ptr);
83template <
typename T> nce T load3(poly16_t
const *ptr);
84template <
typename T> nce T load4(int8_t
const *ptr);
85template <
typename T> nce T load4(int16_t
const *ptr);
86template <
typename T> nce T load4(int32_t
const *ptr);
87template <
typename T> nce T load4(uint8_t
const *ptr);
88template <
typename T> nce T load4(uint16_t
const *ptr);
89template <
typename T> nce T load4(uint32_t
const *ptr);
90template <
typename T> nce T load4(float32_t
const *ptr);
91template <
typename T> nce T load4(poly8_t
const *ptr);
92template <
typename T> nce T load4(poly16_t
const *ptr);
93template <
typename T> nce T load2_duplicate(int8_t
const *ptr);
94template <
typename T> nce T load2_duplicate(int16_t
const *ptr);
95template <
typename T> nce T load2_duplicate(int32_t
const *ptr);
96template <
typename T> nce T load2_duplicate(uint8_t
const *ptr);
97template <
typename T> nce T load2_duplicate(uint16_t
const *ptr);
98template <
typename T> nce T load2_duplicate(uint32_t
const *ptr);
99template <
typename T> nce T load2_duplicate(float32_t
const *ptr);
100template <
typename T> nce T load2_duplicate(poly8_t
const *ptr);
101template <
typename T> nce T load2_duplicate(poly16_t
const *ptr);
102template <
typename T> nce T load3_duplicate(int8_t
const *ptr);
103template <
typename T> nce T load3_duplicate(int16_t
const *ptr);
104template <
typename T> nce T load3_duplicate(int32_t
const *ptr);
105template <
typename T> nce T load3_duplicate(uint8_t
const *ptr);
106template <
typename T> nce T load3_duplicate(uint16_t
const *ptr);
107template <
typename T> nce T load3_duplicate(uint32_t
const *ptr);
108template <
typename T> nce T load3_duplicate(float32_t
const *ptr);
109template <
typename T> nce T load3_duplicate(poly8_t
const *ptr);
110template <
typename T> nce T load3_duplicate(poly16_t
const *ptr);
111template <
typename T> nce T load4_duplicate(int8_t
const *ptr);
112template <
typename T> nce T load4_duplicate(int16_t
const *ptr);
113template <
typename T> nce T load4_duplicate(int32_t
const *ptr);
114template <
typename T> nce T load4_duplicate(uint8_t
const *ptr);
115template <
typename T> nce T load4_duplicate(uint16_t
const *ptr);
116template <
typename T> nce T load4_duplicate(uint32_t
const *ptr);
117template <
typename T> nce T load4_duplicate(float32_t
const *ptr);
118template <
typename T> nce T load4_duplicate(poly8_t
const *ptr);
119template <
typename T> nce T load4_duplicate(poly16_t
const *ptr);
120#if defined(__clang__) || (__GNUC__ > 13)
121template <
typename T> nce T load1_x2(int8_t
const *ptr);
122template <
typename T> nce T load1_x2(int16_t
const *ptr);
123template <
typename T> nce T load1_x2(int32_t
const *ptr);
124template <
typename T> nce T load1_x2(uint8_t
const *ptr);
125template <
typename T> nce T load1_x2(uint16_t
const *ptr);
126template <
typename T> nce T load1_x2(uint32_t
const *ptr);
127template <
typename T> nce T load1_x2(float32_t
const *ptr);
128template <
typename T> nce T load1_x2(poly8_t
const *ptr);
129template <
typename T> nce T load1_x2(poly16_t
const *ptr);
130template <
typename T> nce T load1_x2(int64_t
const *ptr);
131template <
typename T> nce T load1_x2(uint64_t
const *ptr);
132template <
typename T> nce T load1_x3(int8_t
const *ptr);
133template <
typename T> nce T load1_x3(int16_t
const *ptr);
134template <
typename T> nce T load1_x3(int32_t
const *ptr);
135template <
typename T> nce T load1_x3(uint8_t
const *ptr);
136template <
typename T> nce T load1_x3(uint16_t
const *ptr);
137template <
typename T> nce T load1_x3(uint32_t
const *ptr);
138template <
typename T> nce T load1_x3(float32_t
const *ptr);
139template <
typename T> nce T load1_x3(poly8_t
const *ptr);
140template <
typename T> nce T load1_x3(poly16_t
const *ptr);
141template <
typename T> nce T load1_x3(int64_t
const *ptr);
142template <
typename T> nce T load1_x3(uint64_t
const *ptr);
143template <
typename T> nce T load1_x4(int8_t
const *ptr);
144template <
typename T> nce T load1_x4(int16_t
const *ptr);
145template <
typename T> nce T load1_x4(int32_t
const *ptr);
146template <
typename T> nce T load1_x4(uint8_t
const *ptr);
147template <
typename T> nce T load1_x4(uint16_t
const *ptr);
148template <
typename T> nce T load1_x4(uint32_t
const *ptr);
149template <
typename T> nce T load1_x4(float32_t
const *ptr);
150template <
typename T> nce T load1_x4(poly8_t
const *ptr);
151template <
typename T> nce T load1_x4(poly16_t
const *ptr);
152template <
typename T> nce T load1_x4(int64_t
const *ptr);
153template <
typename T> nce T load1_x4(uint64_t
const *ptr);
155template <
typename T> nce T load2(int64_t
const *ptr);
156template <
typename T> nce T load2(uint64_t
const *ptr);
157template <
typename T> nce T load3(int64_t
const *ptr);
158template <
typename T> nce T load3(uint64_t
const *ptr);
159template <
typename T> nce T load4(int64_t
const *ptr);
160template <
typename T> nce T load4(uint64_t
const *ptr);
161template <
typename T> nce T load2_duplicate(int64_t
const *ptr);
162template <
typename T> nce T load2_duplicate(uint64_t
const *ptr);
163template <
typename T> nce T load3_duplicate(int64_t
const *ptr);
164template <
typename T> nce T load3_duplicate(uint64_t
const *ptr);
165template <
typename T> nce T load4_duplicate(int64_t
const *ptr);
166template <
typename T> nce T load4_duplicate(uint64_t
const *ptr);
168inline void store1(int8_t *ptr, int8x8_t val);
169inline void store1(int8_t *ptr, int8x16_t val);
170inline void store1(int16_t *ptr, int16x4_t val);
171inline void store1(int16_t *ptr, int16x8_t val);
172inline void store1(int32_t *ptr, int32x2_t val);
173inline void store1(int32_t *ptr, int32x4_t val);
174inline void store1(int64_t *ptr, int64x1_t val);
175inline void store1(int64_t *ptr, int64x2_t val);
176inline void store1(uint8_t *ptr, uint8x8_t val);
177inline void store1(uint8_t *ptr, uint8x16_t val);
178inline void store1(uint16_t *ptr, uint16x4_t val);
179inline void store1(uint16_t *ptr, uint16x8_t val);
180inline void store1(uint32_t *ptr, uint32x2_t val);
181inline void store1(uint32_t *ptr, uint32x4_t val);
182inline void store1(uint64_t *ptr, uint64x1_t val);
183inline void store1(uint64_t *ptr, uint64x2_t val);
184inline void store1(float32_t *ptr, float32x2_t val);
185inline void store1(float32_t *ptr, float32x4_t val);
186inline void store1(poly8_t *ptr, poly8x8_t val);
187inline void store1(poly8_t *ptr, poly8x16_t val);
188inline void store1(poly16_t *ptr, poly16x4_t val);
189inline void store1(poly16_t *ptr, poly16x8_t val);
190inline void store2(int8_t *ptr, int8x8x2_t val);
191inline void store2(int16_t *ptr, int16x4x2_t val);
192inline void store2(int16_t *ptr, int16x8x2_t val);
193inline void store2(int32_t *ptr, int32x2x2_t val);
194inline void store2(int32_t *ptr, int32x4x2_t val);
195inline void store2(uint8_t *ptr, uint8x8x2_t val);
196inline void store2(uint16_t *ptr, uint16x4x2_t val);
197inline void store2(uint16_t *ptr, uint16x8x2_t val);
198inline void store2(uint32_t *ptr, uint32x2x2_t val);
199inline void store2(uint32_t *ptr, uint32x4x2_t val);
200inline void store2(float32_t *ptr, float32x2x2_t val);
201inline void store2(float32_t *ptr, float32x4x2_t val);
202inline void store2(poly8_t *ptr, poly8x8x2_t val);
203inline void store2(poly16_t *ptr, poly16x4x2_t val);
204inline void store2(poly16_t *ptr, poly16x8x2_t val);
205inline void store3(int8_t *ptr, int8x8x3_t val);
206inline void store3(int8_t *ptr, int8x16x3_t val);
207inline void store3(int16_t *ptr, int16x4x3_t val);
208inline void store3(int16_t *ptr, int16x8x3_t val);
209inline void store3(int32_t *ptr, int32x2x3_t val);
210inline void store3(int32_t *ptr, int32x4x3_t val);
211inline void store3(uint8_t *ptr, uint8x8x3_t val);
212inline void store3(uint8_t *ptr, uint8x16x3_t val);
213inline void store3(uint16_t *ptr, uint16x4x3_t val);
214inline void store3(uint16_t *ptr, uint16x8x3_t val);
215inline void store3(uint32_t *ptr, uint32x2x3_t val);
216inline void store3(uint32_t *ptr, uint32x4x3_t val);
217inline void store3(float32_t *ptr, float32x2x3_t val);
218inline void store3(float32_t *ptr, float32x4x3_t val);
219inline void store3(poly8_t *ptr, poly8x8x3_t val);
220inline void store3(poly8_t *ptr, poly8x16x3_t val);
221inline void store3(poly16_t *ptr, poly16x4x3_t val);
222inline void store3(poly16_t *ptr, poly16x8x3_t val);
223inline void store4(int8_t *ptr, int8x8x4_t val);
224inline void store4(int16_t *ptr, int16x4x4_t val);
225inline void store4(int16_t *ptr, int16x8x4_t val);
226inline void store4(int32_t *ptr, int32x2x4_t val);
227inline void store4(int32_t *ptr, int32x4x4_t val);
228inline void store4(uint8_t *ptr, uint8x8x4_t val);
229inline void store4(uint16_t *ptr, uint16x4x4_t val);
230inline void store4(uint16_t *ptr, uint16x8x4_t val);
231inline void store4(uint32_t *ptr, uint32x2x4_t val);
232inline void store4(uint32_t *ptr, uint32x4x4_t val);
233inline void store4(float32_t *ptr, float32x2x4_t val);
234inline void store4(float32_t *ptr, float32x4x4_t val);
235inline void store4(poly8_t *ptr, poly8x8x4_t val);
236inline void store4(poly16_t *ptr, poly16x4x4_t val);
237inline void store4(poly16_t *ptr, poly16x8x4_t val);
238[[gnu::always_inline]] nce uint8x8_t add(uint8x8_t a, uint8x8_t b) {
return vadd_u8(a, b); }
239[[gnu::always_inline]] nce uint16x8_t add_long(uint8x8_t a, uint8x8_t b) {
return vaddl_u8(a, b); }
240[[gnu::always_inline]] nce uint8x8_t add_halve(uint8x8_t a, uint8x8_t b) {
return vhadd_u8(a, b); }
241[[gnu::always_inline]] nce uint8x8_t add_halve_round(uint8x8_t a, uint8x8_t b) {
return vrhadd_u8(a, b); }
242[[gnu::always_inline]] nce uint8x8_t add_saturate(uint8x8_t a, uint8x8_t b) {
return vqadd_u8(a, b); }
243[[gnu::always_inline]] nce uint8x8_t multiply(uint8x8_t a, uint8x8_t b) {
return vmul_u8(a, b); }
244[[gnu::always_inline]] nce uint8x8_t multiply_add(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vmla_u8(a, b, c); }
245[[gnu::always_inline]] nce uint8x8_t multiply_subtract(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vmls_u8(a, b, c); }
246[[gnu::always_inline]] nce uint16x8_t multiply_long(uint8x8_t a, uint8x8_t b) {
return vmull_u8(a, b); }
247[[gnu::always_inline]] nce uint8x8_t subtract(uint8x8_t a, uint8x8_t b) {
return vsub_u8(a, b); }
248[[gnu::always_inline]] nce uint16x8_t subtract_long(uint8x8_t a, uint8x8_t b) {
return vsubl_u8(a, b); }
249[[gnu::always_inline]] nce uint8x8_t subtract_halve(uint8x8_t a, uint8x8_t b) {
return vhsub_u8(a, b); }
250[[gnu::always_inline]] nce uint8x8_t subtract_saturate(uint8x8_t a, uint8x8_t b) {
return vqsub_u8(a, b); }
251[[gnu::always_inline]] nce uint8x8_t subtract_absolute(uint8x8_t a, uint8x8_t b) {
return vabd_u8(a, b); }
252[[gnu::always_inline]] nce uint16x8_t subtract_absolute_long(uint8x8_t a, uint8x8_t b) {
return vabdl_u8(a, b); }
253[[gnu::always_inline]] nce uint8x8_t subtract_absolute_add(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vaba_u8(a, b, c); }
254[[gnu::always_inline]] nce uint8x8_t max(uint8x8_t a, uint8x8_t b) {
return vmax_u8(a, b); }
255[[gnu::always_inline]] nce uint8x8_t min(uint8x8_t a, uint8x8_t b) {
return vmin_u8(a, b); }
256[[gnu::always_inline]] nce uint8x8_t pairwise_add(uint8x8_t a, uint8x8_t b) {
return vpadd_u8(a, b); }
257[[gnu::always_inline]] nce uint16x4_t pairwise_add_long(uint8x8_t a) {
return vpaddl_u8(a); }
258[[gnu::always_inline]] nce uint8x8_t pairwise_max(uint8x8_t a, uint8x8_t b) {
return vpmax_u8(a, b); }
259[[gnu::always_inline]] nce uint8x8_t pairwise_min(uint8x8_t a, uint8x8_t b) {
return vpmin_u8(a, b); }
260[[gnu::always_inline]] nce uint8x8_t equal(uint8x8_t a, uint8x8_t b) {
return vceq_u8(a, b); }
261[[gnu::always_inline]] nce uint8x8_t greater_than_or_equal(uint8x8_t a, uint8x8_t b) {
return vcge_u8(a, b); }
262[[gnu::always_inline]] nce uint8x8_t less_than_or_equal(uint8x8_t a, uint8x8_t b) {
return vcle_u8(a, b); }
263[[gnu::always_inline]] nce uint8x8_t greater_than(uint8x8_t a, uint8x8_t b) {
return vcgt_u8(a, b); }
264[[gnu::always_inline]] nce uint8x8_t less_than(uint8x8_t a, uint8x8_t b) {
return vclt_u8(a, b); }
265[[gnu::always_inline]] nce uint8x8_t compare_test_nonzero(uint8x8_t a, uint8x8_t b) {
return vtst_u8(a, b); }
266[[gnu::always_inline]] nce uint8x8_t shift_left(uint8x8_t a, int8x8_t b) {
return vshl_u8(a, b); }
267template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_left(uint8x8_t a) {
return vshl_n_u8(a, n); }
268[[gnu::always_inline]] nce uint8x8_t shift_left_saturate(uint8x8_t a, int8x8_t b) {
return vqshl_u8(a, b); }
269template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_left_saturate(uint8x8_t a) {
return vqshl_n_u8(a, n); }
270[[gnu::always_inline]] nce uint8x8_t shift_left_round(uint8x8_t a, int8x8_t b) {
return vrshl_u8(a, b); }
271[[gnu::always_inline]] nce uint8x8_t shift_left_round_saturate(uint8x8_t a, int8x8_t b) {
return vqrshl_u8(a, b); }
272template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_left_long(uint8x8_t a) {
return vshll_n_u8(a, n); }
273template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_left_insert(uint8x8_t a, uint8x8_t b) {
return vsli_n_u8(a, b, n); }
274template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_right(uint8x8_t a) {
return vshr_n_u8(a, n); }
275template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_right_round(uint8x8_t a) {
return vrshr_n_u8(a, n); }
276template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_right_accumulate(uint8x8_t a, uint8x8_t b) {
return vsra_n_u8(a, b, n); }
277template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_right_accumulate_round(uint8x8_t a, uint8x8_t b) {
return vrsra_n_u8(a, b, n); }
278template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_right_insert(uint8x8_t a, uint8x8_t b) {
return vsri_n_u8(a, b, n); }
279template <> [[gnu::always_inline]] nce int8x8_t reinterpret(uint8x8_t a) {
return vreinterpret_s8_u8(a); }
280template <> [[gnu::always_inline]] nce int16x4_t reinterpret(uint8x8_t a) {
return vreinterpret_s16_u8(a); }
281template <> [[gnu::always_inline]] nce int32x2_t reinterpret(uint8x8_t a) {
return vreinterpret_s32_u8(a); }
282template <> [[gnu::always_inline]] nce float32x2_t reinterpret(uint8x8_t a) {
return vreinterpret_f32_u8(a); }
283template <> [[gnu::always_inline]] nce uint16x4_t reinterpret(uint8x8_t a) {
return vreinterpret_u16_u8(a); }
284template <> [[gnu::always_inline]] nce uint32x2_t reinterpret(uint8x8_t a) {
return vreinterpret_u32_u8(a); }
285template <> [[gnu::always_inline]] nce poly8x8_t reinterpret(uint8x8_t a) {
return vreinterpret_p8_u8(a); }
286template <> [[gnu::always_inline]] nce poly16x4_t reinterpret(uint8x8_t a) {
return vreinterpret_p16_u8(a); }
287template <> [[gnu::always_inline]] nce uint64x1_t reinterpret(uint8x8_t a) {
return vreinterpret_u64_u8(a); }
288template <> [[gnu::always_inline]] nce int64x1_t reinterpret(uint8x8_t a) {
return vreinterpret_s64_u8(a); }
289[[gnu::always_inline]] nce uint16x8_t move_long(uint8x8_t a) {
return vmovl_u8(a); }
290[[gnu::always_inline]] nce uint8x8_t bitwise_not(uint8x8_t a) {
return vmvn_u8(a); }
291[[gnu::always_inline]] nce uint8x8_t bitwise_and(uint8x8_t a, uint8x8_t b) {
return vand_u8(a, b); }
292[[gnu::always_inline]] nce uint8x8_t bitwise_or(uint8x8_t a, uint8x8_t b) {
return vorr_u8(a, b); }
293[[gnu::always_inline]] nce uint8x8_t bitwise_xor(uint8x8_t a, uint8x8_t b) {
return veor_u8(a, b); }
294[[gnu::always_inline]] nce uint8x8_t bitwise_or_not(uint8x8_t a, uint8x8_t b) {
return vorn_u8(a, b); }
296[[gnu::always_inline]] nce int8x8_t count_leading_sign_bits(uint8x8_t a) {
return vcls_u8(a); }
298[[gnu::always_inline]] nce uint8x8_t count_leading_zero_bits(uint8x8_t a) {
return vclz_u8(a); }
299[[gnu::always_inline]] nce uint8x8_t count_active_bits(uint8x8_t a) {
return vcnt_u8(a); }
300[[gnu::always_inline]] nce uint8x8_t bitwise_clear(uint8x8_t a, uint8x8_t b) {
return vbic_u8(a, b); }
301[[gnu::always_inline]] nce uint8x8_t bitwise_select(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
return vbsl_u8(a, b, c); }
302[[gnu::always_inline]] nce int8x8_t bitwise_select(uint8x8_t a, int8x8_t b, int8x8_t c) {
return vbsl_s8(a, b, c); }
303[[gnu::always_inline]] nce poly8x8_t bitwise_select(uint8x8_t a, poly8x8_t b, poly8x8_t c) {
return vbsl_p8(a, b, c); }
304template <
int lane>[[gnu::always_inline]] nce uint8x8_t duplicate_lane(uint8x8_t a) {
return vdup_lane_u8(a, lane); }
305template <
int lane>[[gnu::always_inline]] nce uint8x16_t duplicate_lane_quad(uint8x8_t a) {
return vdupq_lane_u8(a, lane); }
306[[gnu::always_inline]] nce uint8x16_t combine(uint8x8_t low, uint8x8_t high) {
return vcombine_u8(low, high); }
307template <
int lane>[[gnu::always_inline]] nce uint8_t get_lane(uint8x8_t v) {
return vget_lane_u8(v, lane); }
308template <
int n>[[gnu::always_inline]] nce uint8x8_t extract(uint8x8_t a, uint8x8_t b) {
return vext_u8(a, b, n); }
309[[gnu::always_inline]] nce uint8x8_t reverse_64bit(uint8x8_t a) {
return vrev64_u8(a); }
310[[gnu::always_inline]] nce uint8x8_t reverse_32bit(uint8x8_t a) {
return vrev32_u8(a); }
311[[gnu::always_inline]] nce uint8x8_t reverse_16bit(uint8x8_t a) {
return vrev16_u8(a); }
312[[gnu::always_inline]] nce uint8x8x2_t zip(uint8x8_t a, uint8x8_t b) {
return vzip_u8(a, b); }
313[[gnu::always_inline]] nce uint8x8x2_t unzip(uint8x8_t a, uint8x8_t b) {
return vuzp_u8(a, b); }
314[[gnu::always_inline]] nce uint8x8x2_t transpose(uint8x8_t a, uint8x8_t b) {
return vtrn_u8(a, b); }
315[[gnu::always_inline]] nce uint8x8_t table_lookup1(uint8x8_t a, uint8x8_t idx) {
return vtbl1_u8(a, idx); }
316[[gnu::always_inline]] nce uint8x8_t table_extension1(uint8x8_t a, uint8x8_t b, uint8x8_t idx) {
return vtbx1_u8(a, b, idx); }
317[[gnu::always_inline]] nce uint8x8_t table_extension2(uint8x8_t a, uint8x8x2_t b, uint8x8_t idx) {
return vtbx2_u8(a, b, idx); }
318[[gnu::always_inline]] nce uint8x8_t table_extension3(uint8x8_t a, uint8x8x3_t b, uint8x8_t idx) {
return vtbx3_u8(a, b, idx); }
319[[gnu::always_inline]] nce uint8x8_t table_extension4(uint8x8_t a, uint8x8x4_t b, uint8x8_t idx) {
return vtbx4_u8(a, b, idx); }
320[[gnu::always_inline]] nce uint8x16_t multiply_add(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vmlaq_u8(a, b, c); }
321[[gnu::always_inline]] nce uint8x16_t multiply_subtract(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vmlsq_u8(a, b, c); }
322[[gnu::always_inline]] nce uint8x16_t subtract_absolute_add(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vabaq_u8(a, b, c); }
323[[gnu::always_inline]] nce uint16x8_t pairwise_add_long(uint8x16_t a) {
return vpaddlq_u8(a); }
324[[gnu::always_inline]] nce uint8x16_t equal(uint8x16_t a, uint8x16_t b) {
return vceqq_u8(a, b); }
325[[gnu::always_inline]] nce uint8x16_t greater_than_or_equal(uint8x16_t a, uint8x16_t b) {
return vcgeq_u8(a, b); }
326[[gnu::always_inline]] nce uint8x16_t less_than_or_equal(uint8x16_t a, uint8x16_t b) {
return vcleq_u8(a, b); }
327[[gnu::always_inline]] nce uint8x16_t greater_than(uint8x16_t a, uint8x16_t b) {
return vcgtq_u8(a, b); }
328[[gnu::always_inline]] nce uint8x16_t less_than(uint8x16_t a, uint8x16_t b) {
return vcltq_u8(a, b); }
329[[gnu::always_inline]] nce uint8x16_t compare_test_nonzero(uint8x16_t a, uint8x16_t b) {
return vtstq_u8(a, b); }
330template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_left(uint8x16_t a) {
return vshlq_n_u8(a, n); }
331template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_right_accumulate(uint8x16_t a, uint8x16_t b) {
return vsraq_n_u8(a, b, n); }
332template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_right_accumulate_round(uint8x16_t a, uint8x16_t b) {
return vrsraq_n_u8(a, b, n); }
333template <> [[gnu::always_inline]] nce poly8x16_t reinterpret(uint8x16_t a) {
return vreinterpretq_p8_u8(a); }
334template <> [[gnu::always_inline]] nce poly16x8_t reinterpret(uint8x16_t a) {
return vreinterpretq_p16_u8(a); }
336[[gnu::always_inline]] nce int8x16_t count_leading_sign_bits(uint8x16_t a) {
return vclsq_u8(a); }
338[[gnu::always_inline]] nce uint8x16_t count_active_bits(uint8x16_t a) {
return vcntq_u8(a); }
339[[gnu::always_inline]] nce uint8x16_t bitwise_select(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vbslq_u8(a, b, c); }
340[[gnu::always_inline]] nce uint8x8_t get_high(uint8x16_t a) {
return vget_high_u8(a); }
341[[gnu::always_inline]] nce uint8x8_t get_low(uint8x16_t a) {
return vget_low_u8(a); }
342template <
int n>[[gnu::always_inline]] nce uint8x16_t extract(uint8x16_t a, uint8x16_t b) {
return vextq_u8(a, b, n); }
343[[gnu::always_inline]] nce uint8x16x2_t zip(uint8x16_t a, uint8x16_t b) {
return vzipq_u8(a, b); }
344[[gnu::always_inline]] nce uint8x16x2_t unzip(uint8x16_t a, uint8x16_t b) {
return vuzpq_u8(a, b); }
345[[gnu::always_inline]] nce uint8x16x2_t transpose(uint8x16_t a, uint8x16_t b) {
return vtrnq_u8(a, b); }
346[[gnu::always_inline]] nce int8x16_t bitwise_select(uint8x16_t a, int8x16_t b, int8x16_t c) {
return vbslq_s8(a, b, c); }
347[[gnu::always_inline]] nce poly8x16_t bitwise_select(uint8x16_t a, poly8x16_t b, poly8x16_t c) {
return vbslq_p8(a, b, c); }
348[[gnu::always_inline]] nce int8x8_t add(int8x8_t a, int8x8_t b) {
return vadd_s8(a, b); }
349[[gnu::always_inline]] nce int16x8_t add_long(int8x8_t a, int8x8_t b) {
return vaddl_s8(a, b); }
350[[gnu::always_inline]] nce int8x8_t add_halve(int8x8_t a, int8x8_t b) {
return vhadd_s8(a, b); }
351[[gnu::always_inline]] nce int8x8_t add_halve_round(int8x8_t a, int8x8_t b) {
return vrhadd_s8(a, b); }
352[[gnu::always_inline]] nce int8x8_t add_saturate(int8x8_t a, int8x8_t b) {
return vqadd_s8(a, b); }
353[[gnu::always_inline]] nce int8x8_t multiply(int8x8_t a, int8x8_t b) {
return vmul_s8(a, b); }
354[[gnu::always_inline]] nce int8x8_t multiply_add(int8x8_t a, int8x8_t b, int8x8_t c) {
return vmla_s8(a, b, c); }
355[[gnu::always_inline]] nce int8x8_t multiply_subtract(int8x8_t a, int8x8_t b, int8x8_t c) {
return vmls_s8(a, b, c); }
356[[gnu::always_inline]] nce int16x8_t multiply_long(int8x8_t a, int8x8_t b) {
return vmull_s8(a, b); }
357[[gnu::always_inline]] nce int8x8_t subtract(int8x8_t a, int8x8_t b) {
return vsub_s8(a, b); }
358[[gnu::always_inline]] nce int16x8_t subtract_long(int8x8_t a, int8x8_t b) {
return vsubl_s8(a, b); }
359[[gnu::always_inline]] nce int8x8_t subtract_halve(int8x8_t a, int8x8_t b) {
return vhsub_s8(a, b); }
360[[gnu::always_inline]] nce int8x8_t subtract_saturate(int8x8_t a, int8x8_t b) {
return vqsub_s8(a, b); }
361[[gnu::always_inline]] nce int8x8_t subtract_absolute(int8x8_t a, int8x8_t b) {
return vabd_s8(a, b); }
362[[gnu::always_inline]] nce int16x8_t subtract_absolute_long(int8x8_t a, int8x8_t b) {
return vabdl_s8(a, b); }
363[[gnu::always_inline]] nce int8x8_t subtract_absolute_add(int8x8_t a, int8x8_t b, int8x8_t c) {
return vaba_s8(a, b, c); }
364[[gnu::always_inline]] nce int8x8_t absolute(int8x8_t a) {
return vabs_s8(a); }
365[[gnu::always_inline]] nce int8x8_t absolute_saturate(int8x8_t a) {
return vqabs_s8(a); }
366[[gnu::always_inline]] nce int8x8_t max(int8x8_t a, int8x8_t b) {
return vmax_s8(a, b); }
367[[gnu::always_inline]] nce int8x8_t min(int8x8_t a, int8x8_t b) {
return vmin_s8(a, b); }
368[[gnu::always_inline]] nce int8x8_t pairwise_add(int8x8_t a, int8x8_t b) {
return vpadd_s8(a, b); }
369[[gnu::always_inline]] nce int16x4_t pairwise_add_long(int8x8_t a) {
return vpaddl_s8(a); }
370[[gnu::always_inline]] nce int8x8_t pairwise_max(int8x8_t a, int8x8_t b) {
return vpmax_s8(a, b); }
371[[gnu::always_inline]] nce int8x8_t pairwise_min(int8x8_t a, int8x8_t b) {
return vpmin_s8(a, b); }
372[[gnu::always_inline]] nce uint8x8_t equal(int8x8_t a, int8x8_t b) {
return vceq_s8(a, b); }
373[[gnu::always_inline]] nce uint8x8_t greater_than_or_equal(int8x8_t a, int8x8_t b) {
return vcge_s8(a, b); }
374[[gnu::always_inline]] nce uint8x8_t less_than_or_equal(int8x8_t a, int8x8_t b) {
return vcle_s8(a, b); }
375[[gnu::always_inline]] nce uint8x8_t greater_than(int8x8_t a, int8x8_t b) {
return vcgt_s8(a, b); }
376[[gnu::always_inline]] nce uint8x8_t less_than(int8x8_t a, int8x8_t b) {
return vclt_s8(a, b); }
377[[gnu::always_inline]] nce uint8x8_t compare_test_nonzero(int8x8_t a, int8x8_t b) {
return vtst_s8(a, b); }
378[[gnu::always_inline]] nce int8x8_t shift_left(int8x8_t a, int8x8_t b) {
return vshl_s8(a, b); }
379template <
int n>[[gnu::always_inline]] nce int8x8_t shift_left(int8x8_t a) {
return vshl_n_s8(a, n); }
380[[gnu::always_inline]] nce int8x8_t shift_left_saturate(int8x8_t a, int8x8_t b) {
return vqshl_s8(a, b); }
381template <
int n>[[gnu::always_inline]] nce int8x8_t shift_left_saturate(int8x8_t a) {
return vqshl_n_s8(a, n); }
382template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_left_unsigned_saturate(int8x8_t a) {
return vqshlu_n_s8(a, n); }
383[[gnu::always_inline]] nce int8x8_t shift_left_round(int8x8_t a, int8x8_t b) {
return vrshl_s8(a, b); }
384[[gnu::always_inline]] nce int8x8_t shift_left_round_saturate(int8x8_t a, int8x8_t b) {
return vqrshl_s8(a, b); }
385template <
int n>[[gnu::always_inline]] nce int16x8_t shift_left_long(int8x8_t a) {
return vshll_n_s8(a, n); }
386template <
int n>[[gnu::always_inline]] nce int8x8_t shift_left_insert(int8x8_t a, int8x8_t b) {
return vsli_n_s8(a, b, n); }
387template <
int n>[[gnu::always_inline]] nce int8x8_t shift_right(int8x8_t a) {
return vshr_n_s8(a, n); }
388template <
int n>[[gnu::always_inline]] nce int8x8_t shift_right_round(int8x8_t a) {
return vrshr_n_s8(a, n); }
389template <
int n>[[gnu::always_inline]] nce int8x8_t shift_right_accumulate(int8x8_t a, int8x8_t b) {
return vsra_n_s8(a, b, n); }
390template <
int n>[[gnu::always_inline]] nce int8x8_t shift_right_accumulate_round(int8x8_t a, int8x8_t b) {
return vrsra_n_s8(a, b, n); }
391template <
int n>[[gnu::always_inline]] nce int8x8_t shift_right_insert(int8x8_t a, int8x8_t b) {
return vsri_n_s8(a, b, n); }
392template <> [[gnu::always_inline]] nce int16x4_t reinterpret(int8x8_t a) {
return vreinterpret_s16_s8(a); }
393template <> [[gnu::always_inline]] nce int32x2_t reinterpret(int8x8_t a) {
return vreinterpret_s32_s8(a); }
394template <> [[gnu::always_inline]] nce float32x2_t reinterpret(int8x8_t a) {
return vreinterpret_f32_s8(a); }
395template <> [[gnu::always_inline]] nce uint8x8_t reinterpret(int8x8_t a) {
return vreinterpret_u8_s8(a); }
396template <> [[gnu::always_inline]] nce uint16x4_t reinterpret(int8x8_t a) {
return vreinterpret_u16_s8(a); }
397template <> [[gnu::always_inline]] nce uint32x2_t reinterpret(int8x8_t a) {
return vreinterpret_u32_s8(a); }
398template <> [[gnu::always_inline]] nce poly8x8_t reinterpret(int8x8_t a) {
return vreinterpret_p8_s8(a); }
399template <> [[gnu::always_inline]] nce poly16x4_t reinterpret(int8x8_t a) {
return vreinterpret_p16_s8(a); }
400template <> [[gnu::always_inline]] nce uint64x1_t reinterpret(int8x8_t a) {
return vreinterpret_u64_s8(a); }
401template <> [[gnu::always_inline]] nce int64x1_t reinterpret(int8x8_t a) {
return vreinterpret_s64_s8(a); }
402[[gnu::always_inline]] nce int16x8_t move_long(int8x8_t a) {
return vmovl_s8(a); }
403[[gnu::always_inline]] nce int8x8_t negate(int8x8_t a) {
return vneg_s8(a); }
404[[gnu::always_inline]] nce int8x8_t negate_saturate(int8x8_t a) {
return vqneg_s8(a); }
405[[gnu::always_inline]] nce int8x8_t bitwise_not(int8x8_t a) {
return vmvn_s8(a); }
406[[gnu::always_inline]] nce int8x8_t bitwise_and(int8x8_t a, int8x8_t b) {
return vand_s8(a, b); }
407[[gnu::always_inline]] nce int8x8_t bitwise_or(int8x8_t a, int8x8_t b) {
return vorr_s8(a, b); }
408[[gnu::always_inline]] nce int8x8_t bitwise_xor(int8x8_t a, int8x8_t b) {
return veor_s8(a, b); }
409[[gnu::always_inline]] nce int8x8_t bitwise_or_not(int8x8_t a, int8x8_t b) {
return vorn_s8(a, b); }
410[[gnu::always_inline]] nce int8x8_t count_leading_sign_bits(int8x8_t a) {
return vcls_s8(a); }
411[[gnu::always_inline]] nce int8x8_t count_leading_zero_bits(int8x8_t a) {
return vclz_s8(a); }
412[[gnu::always_inline]] nce int8x8_t count_active_bits(int8x8_t a) {
return vcnt_s8(a); }
413[[gnu::always_inline]] nce int8x8_t bitwise_clear(int8x8_t a, int8x8_t b) {
return vbic_s8(a, b); }
414template <
int lane>[[gnu::always_inline]] nce int8x8_t duplicate_lane(int8x8_t a) {
return vdup_lane_s8(a, lane); }
415template <
int lane>[[gnu::always_inline]] nce int8x16_t duplicate_lane_quad(int8x8_t a) {
return vdupq_lane_s8(a, lane); }
416[[gnu::always_inline]] nce int8x16_t combine(int8x8_t low, int8x8_t high) {
return vcombine_s8(low, high); }
417template <
int lane>[[gnu::always_inline]] nce int8_t get_lane(int8x8_t v) {
return vget_lane_s8(v, lane); }
418template <
int n>[[gnu::always_inline]] nce int8x8_t extract(int8x8_t a, int8x8_t b) {
return vext_s8(a, b, n); }
419[[gnu::always_inline]] nce int8x8_t reverse_64bit(int8x8_t a) {
return vrev64_s8(a); }
420[[gnu::always_inline]] nce int8x8_t reverse_32bit(int8x8_t a) {
return vrev32_s8(a); }
421[[gnu::always_inline]] nce int8x8_t reverse_16bit(int8x8_t a) {
return vrev16_s8(a); }
422[[gnu::always_inline]] nce int8x8x2_t zip(int8x8_t a, int8x8_t b) {
return vzip_s8(a, b); }
423[[gnu::always_inline]] nce int8x8x2_t unzip(int8x8_t a, int8x8_t b) {
return vuzp_s8(a, b); }
424[[gnu::always_inline]] nce int8x8x2_t transpose(int8x8_t a, int8x8_t b) {
return vtrn_s8(a, b); }
425[[gnu::always_inline]] nce int8x8_t table_lookup1(int8x8_t a, int8x8_t idx) {
return vtbl1_s8(a, idx); }
426[[gnu::always_inline]] nce int8x8_t table_extension1(int8x8_t a, int8x8_t b, int8x8_t idx) {
return vtbx1_s8(a, b, idx); }
427[[gnu::always_inline]] nce int8x8_t table_extension2(int8x8_t a, int8x8x2_t b, int8x8_t idx) {
return vtbx2_s8(a, b, idx); }
428[[gnu::always_inline]] nce int8x8_t table_extension3(int8x8_t a, int8x8x3_t b, int8x8_t idx) {
return vtbx3_s8(a, b, idx); }
429[[gnu::always_inline]] nce int8x8_t table_extension4(int8x8_t a, int8x8x4_t b, int8x8_t idx) {
return vtbx4_s8(a, b, idx); }
430[[gnu::always_inline]] nce int8x16_t multiply_add(int8x16_t a, int8x16_t b, int8x16_t c) {
return vmlaq_s8(a, b, c); }
431[[gnu::always_inline]] nce int8x16_t multiply_subtract(int8x16_t a, int8x16_t b, int8x16_t c) {
return vmlsq_s8(a, b, c); }
432[[gnu::always_inline]] nce int8x16_t subtract_absolute_add(int8x16_t a, int8x16_t b, int8x16_t c) {
return vabaq_s8(a, b, c); }
433[[gnu::always_inline]] nce int16x8_t pairwise_add_long(int8x16_t a) {
return vpaddlq_s8(a); }
434[[gnu::always_inline]] nce uint8x16_t equal(int8x16_t a, int8x16_t b) {
return vceqq_s8(a, b); }
435[[gnu::always_inline]] nce uint8x16_t greater_than_or_equal(int8x16_t a, int8x16_t b) {
return vcgeq_s8(a, b); }
436[[gnu::always_inline]] nce uint8x16_t less_than_or_equal(int8x16_t a, int8x16_t b) {
return vcleq_s8(a, b); }
437[[gnu::always_inline]] nce uint8x16_t greater_than(int8x16_t a, int8x16_t b) {
return vcgtq_s8(a, b); }
438[[gnu::always_inline]] nce uint8x16_t less_than(int8x16_t a, int8x16_t b) {
return vcltq_s8(a, b); }
439[[gnu::always_inline]] nce uint8x16_t compare_test_nonzero(int8x16_t a, int8x16_t b) {
return vtstq_s8(a, b); }
440template <
int n>[[gnu::always_inline]] nce int8x16_t shift_left(int8x16_t a) {
return vshlq_n_s8(a, n); }
441template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_left_unsigned_saturate(int8x16_t a) {
return vqshluq_n_s8(a, n); }
442template <
int n>[[gnu::always_inline]] nce int8x16_t shift_right_accumulate(int8x16_t a, int8x16_t b) {
return vsraq_n_s8(a, b, n); }
443template <
int n>[[gnu::always_inline]] nce int8x16_t shift_right_accumulate_round(int8x16_t a, int8x16_t b) {
return vrsraq_n_s8(a, b, n); }
444template <> [[gnu::always_inline]] nce poly8x16_t reinterpret(int8x16_t a) {
return vreinterpretq_p8_s8(a); }
445template <> [[gnu::always_inline]] nce poly16x8_t reinterpret(int8x16_t a) {
return vreinterpretq_p16_s8(a); }
446[[gnu::always_inline]] nce int8x16_t count_active_bits(int8x16_t a) {
return vcntq_s8(a); }
447[[gnu::always_inline]] nce int8x8_t get_high(int8x16_t a) {
return vget_high_s8(a); }
448[[gnu::always_inline]] nce int8x8_t get_low(int8x16_t a) {
return vget_low_s8(a); }
449template <
int n>[[gnu::always_inline]] nce int8x16_t extract(int8x16_t a, int8x16_t b) {
return vextq_s8(a, b, n); }
450[[gnu::always_inline]] nce int8x16x2_t zip(int8x16_t a, int8x16_t b) {
return vzipq_s8(a, b); }
451[[gnu::always_inline]] nce int8x16x2_t unzip(int8x16_t a, int8x16_t b) {
return vuzpq_s8(a, b); }
452[[gnu::always_inline]] nce int8x16x2_t transpose(int8x16_t a, int8x16_t b) {
return vtrnq_s8(a, b); }
453[[gnu::always_inline]] nce uint16x4_t add(uint16x4_t a, uint16x4_t b) {
return vadd_u16(a, b); }
454[[gnu::always_inline]] nce uint32x4_t add_long(uint16x4_t a, uint16x4_t b) {
return vaddl_u16(a, b); }
455[[gnu::always_inline]] nce uint16x4_t add_halve(uint16x4_t a, uint16x4_t b) {
return vhadd_u16(a, b); }
456[[gnu::always_inline]] nce uint16x4_t add_halve_round(uint16x4_t a, uint16x4_t b) {
return vrhadd_u16(a, b); }
457[[gnu::always_inline]] nce uint16x4_t add_saturate(uint16x4_t a, uint16x4_t b) {
return vqadd_u16(a, b); }
458[[gnu::always_inline]] nce uint16x4_t multiply(uint16x4_t a, uint16x4_t b) {
return vmul_u16(a, b); }
459[[gnu::always_inline]] nce uint16x4_t multiply_add(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vmla_u16(a, b, c); }
460[[gnu::always_inline]] nce uint16x4_t multiply_subtract(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vmls_u16(a, b, c); }
461[[gnu::always_inline]] nce uint32x4_t multiply_long(uint16x4_t a, uint16x4_t b) {
return vmull_u16(a, b); }
462[[gnu::always_inline]] nce uint16x4_t subtract(uint16x4_t a, uint16x4_t b) {
return vsub_u16(a, b); }
463[[gnu::always_inline]] nce uint32x4_t subtract_long(uint16x4_t a, uint16x4_t b) {
return vsubl_u16(a, b); }
464[[gnu::always_inline]] nce uint16x4_t subtract_halve(uint16x4_t a, uint16x4_t b) {
return vhsub_u16(a, b); }
465[[gnu::always_inline]] nce uint16x4_t subtract_saturate(uint16x4_t a, uint16x4_t b) {
return vqsub_u16(a, b); }
466[[gnu::always_inline]] nce uint16x4_t subtract_absolute(uint16x4_t a, uint16x4_t b) {
return vabd_u16(a, b); }
467[[gnu::always_inline]] nce uint32x4_t subtract_absolute_long(uint16x4_t a, uint16x4_t b) {
return vabdl_u16(a, b); }
468[[gnu::always_inline]] nce uint16x4_t subtract_absolute_add(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vaba_u16(a, b, c); }
469[[gnu::always_inline]] nce uint16x4_t max(uint16x4_t a, uint16x4_t b) {
return vmax_u16(a, b); }
470[[gnu::always_inline]] nce uint16x4_t min(uint16x4_t a, uint16x4_t b) {
return vmin_u16(a, b); }
471[[gnu::always_inline]] nce uint16x4_t pairwise_add(uint16x4_t a, uint16x4_t b) {
return vpadd_u16(a, b); }
472[[gnu::always_inline]] nce uint32x2_t pairwise_add_long(uint16x4_t a) {
return vpaddl_u16(a); }
473[[gnu::always_inline]] nce uint16x4_t pairwise_add_accumulate_long(uint16x4_t a, uint8x8_t b) {
return vpadal_u8(a, b); }
474[[gnu::always_inline]] nce uint16x4_t pairwise_max(uint16x4_t a, uint16x4_t b) {
return vpmax_u16(a, b); }
475[[gnu::always_inline]] nce uint16x4_t pairwise_min(uint16x4_t a, uint16x4_t b) {
return vpmin_u16(a, b); }
476[[gnu::always_inline]] nce uint16x4_t equal(uint16x4_t a, uint16x4_t b) {
return vceq_u16(a, b); }
477[[gnu::always_inline]] nce uint16x4_t greater_than_or_equal(uint16x4_t a, uint16x4_t b) {
return vcge_u16(a, b); }
478[[gnu::always_inline]] nce uint16x4_t less_than_or_equal(uint16x4_t a, uint16x4_t b) {
return vcle_u16(a, b); }
479[[gnu::always_inline]] nce uint16x4_t greater_than(uint16x4_t a, uint16x4_t b) {
return vcgt_u16(a, b); }
480[[gnu::always_inline]] nce uint16x4_t less_than(uint16x4_t a, uint16x4_t b) {
return vclt_u16(a, b); }
481[[gnu::always_inline]] nce uint16x4_t compare_test_nonzero(uint16x4_t a, uint16x4_t b) {
return vtst_u16(a, b); }
482[[gnu::always_inline]] nce uint16x4_t shift_left(uint16x4_t a, int16x4_t b) {
return vshl_u16(a, b); }
483template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_left(uint16x4_t a) {
return vshl_n_u16(a, n); }
484[[gnu::always_inline]] nce uint16x4_t shift_left_saturate(uint16x4_t a, int16x4_t b) {
return vqshl_u16(a, b); }
485template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_left_saturate(uint16x4_t a) {
return vqshl_n_u16(a, n); }
486template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_left_long(uint16x4_t a) {
return vshll_n_u16(a, n); }
487template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_left_insert(uint16x4_t a, uint16x4_t b) {
return vsli_n_u16(a, b, n); }
488[[gnu::always_inline]] nce uint16x4_t shift_left_round(uint16x4_t a, int16x4_t b) {
return vrshl_u16(a, b); }
489[[gnu::always_inline]] nce uint16x4_t shift_left_round_saturate(uint16x4_t a, int16x4_t b) {
return vqrshl_u16(a, b); }
490template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_right(uint16x4_t a) {
return vshr_n_u16(a, n); }
491template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_right_round(uint16x4_t a) {
return vrshr_n_u16(a, n); }
492template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_right_accumulate(uint16x4_t a, uint16x4_t b) {
return vsra_n_u16(a, b, n); }
493template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_right_accumulate_round(uint16x4_t a, uint16x4_t b) {
return vrsra_n_u16(a, b, n); }
494template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_right_insert(uint16x4_t a, uint16x4_t b) {
return vsri_n_u16(a, b, n); }
495template <> [[gnu::always_inline]] nce int8x8_t reinterpret(uint16x4_t a) {
return vreinterpret_s8_u16(a); }
496template <> [[gnu::always_inline]] nce int16x4_t reinterpret(uint16x4_t a) {
return vreinterpret_s16_u16(a); }
497template <> [[gnu::always_inline]] nce int32x2_t reinterpret(uint16x4_t a) {
return vreinterpret_s32_u16(a); }
498template <> [[gnu::always_inline]] nce float32x2_t reinterpret(uint16x4_t a) {
return vreinterpret_f32_u16(a); }
499template <> [[gnu::always_inline]] nce uint8x8_t reinterpret(uint16x4_t a) {
return vreinterpret_u8_u16(a); }
500template <> [[gnu::always_inline]] nce uint32x2_t reinterpret(uint16x4_t a) {
return vreinterpret_u32_u16(a); }
501template <> [[gnu::always_inline]] nce poly8x8_t reinterpret(uint16x4_t a) {
return vreinterpret_p8_u16(a); }
502template <> [[gnu::always_inline]] nce poly16x4_t reinterpret(uint16x4_t a) {
return vreinterpret_p16_u16(a); }
503template <> [[gnu::always_inline]] nce uint64x1_t reinterpret(uint16x4_t a) {
return vreinterpret_u64_u16(a); }
504template <> [[gnu::always_inline]] nce int64x1_t reinterpret(uint16x4_t a) {
return vreinterpret_s64_u16(a); }
505[[gnu::always_inline]] nce uint32x4_t move_long(uint16x4_t a) {
return vmovl_u16(a); }
506template <
int lane> [[gnu::always_inline]] nce uint16x4_t multiply_add_lane(uint16x4_t a, uint16x4_t b, uint16x4_t v) {
return vmla_lane_u16(a, b, v, lane); }
507template <
int lane> [[gnu::always_inline]] nce uint16x4_t multiply_subtract_lane(uint16x4_t a, uint16x4_t b, uint16x4_t v) {
return vmls_lane_u16(a, b, v, lane); }
508[[gnu::always_inline]] nce uint16x4_t multiply_add(uint16x4_t a, uint16x4_t b, uint16_t c) {
return vmla_n_u16(a, b, c); }
509template <
int lane> [[gnu::always_inline]] nce uint16x4_t multiply_lane(uint16x4_t a, uint16x4_t v) {
return vmul_lane_u16(a, v, lane); }
510template <
int lane> [[gnu::always_inline]] nce uint32x4_t multiply_long_lane(uint16x4_t a, uint16x4_t v) {
return vmull_lane_u16(a, v, lane); }
511[[gnu::always_inline]] nce uint16x4_t multiply_subtract(uint16x4_t a, uint16x4_t b, uint16_t c) {
return vmls_n_u16(a, b, c); }
512[[gnu::always_inline]] nce uint16x4_t bitwise_not(uint16x4_t a) {
return vmvn_u16(a); }
513[[gnu::always_inline]] nce uint16x4_t bitwise_and(uint16x4_t a, uint16x4_t b) {
return vand_u16(a, b); }
514[[gnu::always_inline]] nce uint16x4_t bitwise_or(uint16x4_t a, uint16x4_t b) {
return vorr_u16(a, b); }
515[[gnu::always_inline]] nce uint16x4_t bitwise_xor(uint16x4_t a, uint16x4_t b) {
return veor_u16(a, b); }
516[[gnu::always_inline]] nce uint16x4_t bitwise_or_not(uint16x4_t a, uint16x4_t b) {
return vorn_u16(a, b); }
518[[gnu::always_inline]] nce int16x4_t count_leading_sign_bits(uint16x4_t a) {
return vcls_u16(a); }
520[[gnu::always_inline]] nce uint16x4_t count_leading_zero_bits(uint16x4_t a) {
return vclz_u16(a); }
521[[gnu::always_inline]] nce uint16x4_t bitwise_clear(uint16x4_t a, uint16x4_t b) {
return vbic_u16(a, b); }
522[[gnu::always_inline]] nce uint16x4_t bitwise_select(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
return vbsl_u16(a, b, c); }
523template <
int lane>[[gnu::always_inline]] nce uint16x4_t duplicate_lane(uint16x4_t a) {
return vdup_lane_u16(a, lane); }
524template <
int lane>[[gnu::always_inline]] nce uint16x8_t duplicate_lane_quad(uint16x4_t a) {
return vdupq_lane_u16(a, lane); }
525[[gnu::always_inline]] nce uint16x8_t combine(uint16x4_t low, uint16x4_t high) {
return vcombine_u16(low, high); }
526template <
int lane>[[gnu::always_inline]] nce uint16_t get_lane(uint16x4_t v) {
return vget_lane_u16(v, lane); }
527template <
int n>[[gnu::always_inline]] nce uint16x4_t extract(uint16x4_t a, uint16x4_t b) {
return vext_u16(a, b, n); }
528[[gnu::always_inline]] nce uint16x4_t reverse_64bit(uint16x4_t a) {
return vrev64_u16(a); }
529[[gnu::always_inline]] nce uint16x4_t reverse_32bit(uint16x4_t a) {
return vrev32_u16(a); }
530[[gnu::always_inline]] nce uint16x4x2_t zip(uint16x4_t a, uint16x4_t b) {
return vzip_u16(a, b); }
531[[gnu::always_inline]] nce uint16x4x2_t unzip(uint16x4_t a, uint16x4_t b) {
return vuzp_u16(a, b); }
532[[gnu::always_inline]] nce uint16x4x2_t transpose(uint16x4_t a, uint16x4_t b) {
return vtrn_u16(a, b); }
533[[gnu::always_inline]] nce int16x4_t bitwise_select(uint16x4_t a, int16x4_t b, int16x4_t c) {
return vbsl_s16(a, b, c); }
534[[gnu::always_inline]] nce poly16x4_t bitwise_select(uint16x4_t a, poly16x4_t b, poly16x4_t c) {
return vbsl_p16(a, b, c); }
535[[gnu::always_inline]] nce uint16x4_t multiply(uint16x4_t a, uint16_t b) {
return vmul_n_u16(a, b); }
536[[gnu::always_inline]] nce uint32x4_t multiply_long(uint16x4_t a, uint16_t b) {
return vmull_n_u16(a, b); }
537[[gnu::always_inline]] nce uint16x8_t add(uint16x8_t a, uint8x8_t b) {
return vaddw_u8(a, b); }
538[[gnu::always_inline]] nce uint16x8_t multiply_add_long(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
return vmlal_u8(a, b, c); }
539[[gnu::always_inline]] nce uint16x8_t multiply_subtract_long(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
return vmlsl_u8(a, b, c); }
540[[gnu::always_inline]] nce uint16x8_t subtract(uint16x8_t a, uint8x8_t b) {
return vsubw_u8(a, b); }
541[[gnu::always_inline]] nce uint16x8_t subtract_absolute_add(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
return vabal_u8(a, b, c); }
542[[gnu::always_inline]] nce uint8x8_t add_narrow(uint16x8_t a, uint16x8_t b) {
return vaddhn_u16(a, b); }
543[[gnu::always_inline]] nce uint8x8_t add_round_narrow(uint16x8_t a, uint16x8_t b) {
return vraddhn_u16(a, b); }
544[[gnu::always_inline]] nce uint16x8_t multiply_add(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
return vmlaq_u16(a, b, c); }
545[[gnu::always_inline]] nce uint16x8_t multiply_subtract(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
return vmlsq_u16(a, b, c); }
546[[gnu::always_inline]] nce uint8x8_t subtract_narrow(uint16x8_t a, uint16x8_t b) {
return vsubhn_u16(a, b); }
547[[gnu::always_inline]] nce uint8x8_t subtract_round_narrow(uint16x8_t a, uint16x8_t b) {
return vrsubhn_u16(a, b); }
548[[gnu::always_inline]] nce uint16x8_t subtract_absolute_add(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
return vabaq_u16(a, b, c); }
549[[gnu::always_inline]] nce uint32x4_t pairwise_add_long(uint16x8_t a) {
return vpaddlq_u16(a); }
550[[gnu::always_inline]] nce uint16x8_t pairwise_add_accumulate_long(uint16x8_t a, uint8x16_t b) {
return vpadalq_u8(a, b); }
551[[gnu::always_inline]] nce uint16x8_t equal(uint16x8_t a, uint16x8_t b) {
return vceqq_u16(a, b); }
552[[gnu::always_inline]] nce uint16x8_t greater_than_or_equal(uint16x8_t a, uint16x8_t b) {
return vcgeq_u16(a, b); }
553[[gnu::always_inline]] nce uint16x8_t less_than_or_equal(uint16x8_t a, uint16x8_t b) {
return vcleq_u16(a, b); }
554[[gnu::always_inline]] nce uint16x8_t greater_than(uint16x8_t a, uint16x8_t b) {
return vcgtq_u16(a, b); }
555[[gnu::always_inline]] nce uint16x8_t less_than(uint16x8_t a, uint16x8_t b) {
return vcltq_u16(a, b); }
556[[gnu::always_inline]] nce uint16x8_t compare_test_nonzero(uint16x8_t a, uint16x8_t b) {
return vtstq_u16(a, b); }
557template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_right_accumulate(uint16x8_t a, uint16x8_t b) {
return vsraq_n_u16(a, b, n); }
558template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_right_accumulate_round(uint16x8_t a, uint16x8_t b) {
return vrsraq_n_u16(a, b, n); }
559template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_right_narrow(uint16x8_t a) {
return vshrn_n_u16(a, n); }
560template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_right_saturate_narrow(uint16x8_t a) {
return vqshrn_n_u16(a, n); }
561template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_right_round_saturate_narrow(uint16x8_t a) {
return vqrshrn_n_u16(a, n); }
562template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_right_round_narrow(uint16x8_t a) {
return vrshrn_n_u16(a, n); }
563template <> [[gnu::always_inline]] nce poly8x16_t reinterpret(uint16x8_t a) {
return vreinterpretq_p8_u16(a); }
564template <> [[gnu::always_inline]] nce poly16x8_t reinterpret(uint16x8_t a) {
return vreinterpretq_p16_u16(a); }
565[[gnu::always_inline]] nce uint8x8_t move_narrow(uint16x8_t a) {
return vmovn_u16(a); }
566[[gnu::always_inline]] nce uint8x8_t move_saturate_narrow(uint16x8_t a) {
return vqmovn_u16(a); }
567template <
int lane> [[gnu::always_inline]] nce uint16x8_t multiply_add_lane(uint16x8_t a, uint16x8_t b, uint16x4_t v) {
return vmlaq_lane_u16(a, b, v, lane); }
568template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_left(uint16x8_t a) {
return vshlq_n_u16(a, n); }
569template <
int lane> [[gnu::always_inline]] nce uint16x8_t multiply_lane(uint16x8_t a, uint16x4_t v) {
return vmulq_lane_u16(a, v, lane); }
570template <
int lane> [[gnu::always_inline]] nce uint16x8_t multiply_subtract_lane(uint16x8_t a, uint16x8_t b, uint16x4_t v) {
return vmlsq_lane_u16(a, b, v, lane); }
571[[gnu::always_inline]] nce uint16x8_t multiply_add(uint16x8_t a, uint16x8_t b, uint16_t c) {
return vmlaq_n_u16(a, b, c); }
572[[gnu::always_inline]] nce uint16x8_t multiply_subtract(uint16x8_t a, uint16x8_t b, uint16_t c) {
return vmlsq_n_u16(a, b, c); }
574[[gnu::always_inline]] nce int16x8_t count_leading_sign_bits(uint16x8_t a) {
return vclsq_u16(a); }
576[[gnu::always_inline]] nce uint16x8_t bitwise_select(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
return vbslq_u16(a, b, c); }
577[[gnu::always_inline]] nce uint16x4_t get_high(uint16x8_t a) {
return vget_high_u16(a); }
578[[gnu::always_inline]] nce uint16x4_t get_low(uint16x8_t a) {
return vget_low_u16(a); }
579template <
int n>[[gnu::always_inline]] nce uint16x8_t extract(uint16x8_t a, uint16x8_t b) {
return vextq_u16(a, b, n); }
580[[gnu::always_inline]] nce uint16x8x2_t zip(uint16x8_t a, uint16x8_t b) {
return vzipq_u16(a, b); }
581[[gnu::always_inline]] nce uint16x8x2_t unzip(uint16x8_t a, uint16x8_t b) {
return vuzpq_u16(a, b); }
582[[gnu::always_inline]] nce uint16x8x2_t transpose(uint16x8_t a, uint16x8_t b) {
return vtrnq_u16(a, b); }
583[[gnu::always_inline]] nce int16x8_t bitwise_select(uint16x8_t a, int16x8_t b, int16x8_t c) {
return vbslq_s16(a, b, c); }
584[[gnu::always_inline]] nce poly16x8_t bitwise_select(uint16x8_t a, poly16x8_t b, poly16x8_t c) {
return vbslq_p16(a, b, c); }
585[[gnu::always_inline]] nce int16x4_t pairwise_add_accumulate_long(int16x4_t a, int8x8_t b) {
return vpadal_s8(a, b); }
586[[gnu::always_inline]] nce int16x4_t add(int16x4_t a, int16x4_t b) {
return vadd_s16(a, b); }
587[[gnu::always_inline]] nce int32x4_t add_long(int16x4_t a, int16x4_t b) {
return vaddl_s16(a, b); }
588[[gnu::always_inline]] nce int16x4_t add_halve(int16x4_t a, int16x4_t b) {
return vhadd_s16(a, b); }
589[[gnu::always_inline]] nce int16x4_t add_halve_round(int16x4_t a, int16x4_t b) {
return vrhadd_s16(a, b); }
590[[gnu::always_inline]] nce int16x4_t add_saturate(int16x4_t a, int16x4_t b) {
return vqadd_s16(a, b); }
591[[gnu::always_inline]] nce int16x4_t multiply(int16x4_t a, int16x4_t b) {
return vmul_s16(a, b); }
592[[gnu::always_inline]] nce int16x4_t multiply_add(int16x4_t a, int16x4_t b, int16x4_t c) {
return vmla_s16(a, b, c); }
593[[gnu::always_inline]] nce int16x4_t multiply_subtract(int16x4_t a, int16x4_t b, int16x4_t c) {
return vmls_s16(a, b, c); }
594[[gnu::always_inline]] nce int16x4_t multiply_double_saturate_high(int16x4_t a, int16x4_t b) {
return vqdmulh_s16(a, b); }
595[[gnu::always_inline]] nce int16x4_t multiply_double_round_saturate_high(int16x4_t a, int16x4_t b) {
return vqrdmulh_s16(a, b); }
596[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long(int16x4_t a, int16x4_t b) {
return vqdmull_s16(a, b); }
597[[gnu::always_inline]] nce int32x4_t multiply_long(int16x4_t a, int16x4_t b) {
return vmull_s16(a, b); }
598template <
int lane> [[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_lane(int16x4_t a, int16x4_t v) {
return vqdmull_lane_s16(a, v, lane); }
599template <
int lane> [[gnu::always_inline]] nce int16x4_t multiply_double_saturate_high_lane(int16x4_t a, int16x4_t v) {
return vqdmulh_lane_s16(a, v, lane); }
600template <
int lane> [[gnu::always_inline]] nce int16x4_t multiply_double_round_saturate_high_lane(int16x4_t a, int16x4_t v) {
return vqrdmulh_lane_s16(a, v, lane); }
601[[gnu::always_inline]] nce int16x4_t subtract(int16x4_t a, int16x4_t b) {
return vsub_s16(a, b); }
602[[gnu::always_inline]] nce int32x4_t subtract_long(int16x4_t a, int16x4_t b) {
return vsubl_s16(a, b); }
603[[gnu::always_inline]] nce int16x4_t subtract_halve(int16x4_t a, int16x4_t b) {
return vhsub_s16(a, b); }
604[[gnu::always_inline]] nce int16x4_t subtract_saturate(int16x4_t a, int16x4_t b) {
return vqsub_s16(a, b); }
605[[gnu::always_inline]] nce int16x4_t subtract_absolute(int16x4_t a, int16x4_t b) {
return vabd_s16(a, b); }
606[[gnu::always_inline]] nce int32x4_t subtract_absolute_long(int16x4_t a, int16x4_t b) {
return vabdl_s16(a, b); }
607[[gnu::always_inline]] nce int16x4_t subtract_absolute_add(int16x4_t a, int16x4_t b, int16x4_t c) {
return vaba_s16(a, b, c); }
608[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long(int16x4_t a, int16_t b) {
return vqdmull_n_s16(a, b); }
609[[gnu::always_inline]] nce int16x4_t multiply_double_saturate_high(int16x4_t a, int16_t b) {
return vqdmulh_n_s16(a, b); }
610[[gnu::always_inline]] nce int16x4_t multiply_double_round_saturate_high(int16x4_t a, int16_t b) {
return vqrdmulh_n_s16(a, b); }
611[[gnu::always_inline]] nce int16x4_t absolute(int16x4_t a) {
return vabs_s16(a); }
612[[gnu::always_inline]] nce int16x4_t absolute_saturate(int16x4_t a) {
return vqabs_s16(a); }
613[[gnu::always_inline]] nce int16x4_t max(int16x4_t a, int16x4_t b) {
return vmax_s16(a, b); }
614[[gnu::always_inline]] nce int16x4_t min(int16x4_t a, int16x4_t b) {
return vmin_s16(a, b); }
615[[gnu::always_inline]] nce int16x4_t pairwise_add(int16x4_t a, int16x4_t b) {
return vpadd_s16(a, b); }
616[[gnu::always_inline]] nce int32x2_t pairwise_add_long(int16x4_t a) {
return vpaddl_s16(a); }
617[[gnu::always_inline]] nce int16x4_t pairwise_max(int16x4_t a, int16x4_t b) {
return vpmax_s16(a, b); }
618[[gnu::always_inline]] nce int16x4_t pairwise_min(int16x4_t a, int16x4_t b) {
return vpmin_s16(a, b); }
619[[gnu::always_inline]] nce uint16x4_t equal(int16x4_t a, int16x4_t b) {
return vceq_s16(a, b); }
620[[gnu::always_inline]] nce uint16x4_t greater_than_or_equal(int16x4_t a, int16x4_t b) {
return vcge_s16(a, b); }
621[[gnu::always_inline]] nce uint16x4_t less_than_or_equal(int16x4_t a, int16x4_t b) {
return vcle_s16(a, b); }
622[[gnu::always_inline]] nce uint16x4_t greater_than(int16x4_t a, int16x4_t b) {
return vcgt_s16(a, b); }
623[[gnu::always_inline]] nce uint16x4_t less_than(int16x4_t a, int16x4_t b) {
return vclt_s16(a, b); }
624[[gnu::always_inline]] nce uint16x4_t compare_test_nonzero(int16x4_t a, int16x4_t b) {
return vtst_s16(a, b); }
625[[gnu::always_inline]] nce int16x4_t shift_left(int16x4_t a, int16x4_t b) {
return vshl_s16(a, b); }
626template <
int n>[[gnu::always_inline]] nce int16x4_t shift_left(int16x4_t a) {
return vshl_n_s16(a, n); }
627[[gnu::always_inline]] nce int16x4_t shift_left_saturate(int16x4_t a, int16x4_t b) {
return vqshl_s16(a, b); }
628template <
int n>[[gnu::always_inline]] nce int16x4_t shift_left_saturate(int16x4_t a) {
return vqshl_n_s16(a, n); }
629template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_left_unsigned_saturate(int16x4_t a) {
return vqshlu_n_s16(a, n); }
630[[gnu::always_inline]] nce int16x4_t shift_left_round(int16x4_t a, int16x4_t b) {
return vrshl_s16(a, b); }
631[[gnu::always_inline]] nce int16x4_t shift_left_round_saturate(int16x4_t a, int16x4_t b) {
return vqrshl_s16(a, b); }
632template <
int n>[[gnu::always_inline]] nce int32x4_t shift_left_long(int16x4_t a) {
return vshll_n_s16(a, n); }
633template <
int n>[[gnu::always_inline]] nce int16x4_t shift_left_insert(int16x4_t a, int16x4_t b) {
return vsli_n_s16(a, b, n); }
634template <
int n>[[gnu::always_inline]] nce int16x4_t shift_right(int16x4_t a) {
return vshr_n_s16(a, n); }
635template <
int n>[[gnu::always_inline]] nce int16x4_t shift_right_round(int16x4_t a) {
return vrshr_n_s16(a, n); }
636template <
int n>[[gnu::always_inline]] nce int16x4_t shift_right_accumulate(int16x4_t a, int16x4_t b) {
return vsra_n_s16(a, b, n); }
637template <
int n>[[gnu::always_inline]] nce int16x4_t shift_right_accumulate_round(int16x4_t a, int16x4_t b) {
return vrsra_n_s16(a, b, n); }
638template <
int n>[[gnu::always_inline]] nce int16x4_t shift_right_insert(int16x4_t a, int16x4_t b) {
return vsri_n_s16(a, b, n); }
639template <> [[gnu::always_inline]] nce int8x8_t reinterpret(int16x4_t a) {
return vreinterpret_s8_s16(a); }
640template <> [[gnu::always_inline]] nce int32x2_t reinterpret(int16x4_t a) {
return vreinterpret_s32_s16(a); }
641template <> [[gnu::always_inline]] nce float32x2_t reinterpret(int16x4_t a) {
return vreinterpret_f32_s16(a); }
642template <> [[gnu::always_inline]] nce uint8x8_t reinterpret(int16x4_t a) {
return vreinterpret_u8_s16(a); }
643template <> [[gnu::always_inline]] nce uint16x4_t reinterpret(int16x4_t a) {
return vreinterpret_u16_s16(a); }
644template <> [[gnu::always_inline]] nce uint32x2_t reinterpret(int16x4_t a) {
return vreinterpret_u32_s16(a); }
645template <> [[gnu::always_inline]] nce poly8x8_t reinterpret(int16x4_t a) {
return vreinterpret_p8_s16(a); }
646template <> [[gnu::always_inline]] nce poly16x4_t reinterpret(int16x4_t a) {
return vreinterpret_p16_s16(a); }
647template <> [[gnu::always_inline]] nce uint64x1_t reinterpret(int16x4_t a) {
return vreinterpret_u64_s16(a); }
648template <> [[gnu::always_inline]] nce int64x1_t reinterpret(int16x4_t a) {
return vreinterpret_s64_s16(a); }
649[[gnu::always_inline]] nce int32x4_t move_long(int16x4_t a) {
return vmovl_s16(a); }
650template <
int lane> [[gnu::always_inline]] nce int16x4_t multiply_add_lane(int16x4_t a, int16x4_t b, int16x4_t v) {
return vmla_lane_s16(a, b, v, lane); }
651template <
int lane> [[gnu::always_inline]] nce int16x4_t multiply_subtract_lane(int16x4_t a, int16x4_t b, int16x4_t v) {
return vmls_lane_s16(a, b, v, lane); }
652[[gnu::always_inline]] nce int16x4_t multiply_add(int16x4_t a, int16x4_t b, int16_t c) {
return vmla_n_s16(a, b, c); }
653template <
int lane> [[gnu::always_inline]] nce int16x4_t multiply_lane(int16x4_t a, int16x4_t v) {
return vmul_lane_s16(a, v, lane); }
654template <
int lane> [[gnu::always_inline]] nce int32x4_t multiply_long_lane(int16x4_t a, int16x4_t v) {
return vmull_lane_s16(a, v, lane); }
655[[gnu::always_inline]] nce int16x4_t multiply_subtract(int16x4_t a, int16x4_t b, int16_t c) {
return vmls_n_s16(a, b, c); }
656[[gnu::always_inline]] nce int16x4_t negate(int16x4_t a) {
return vneg_s16(a); }
657[[gnu::always_inline]] nce int16x4_t negate_saturate(int16x4_t a) {
return vqneg_s16(a); }
658[[gnu::always_inline]] nce int16x4_t bitwise_not(int16x4_t a) {
return vmvn_s16(a); }
659[[gnu::always_inline]] nce int16x4_t bitwise_and(int16x4_t a, int16x4_t b) {
return vand_s16(a, b); }
660[[gnu::always_inline]] nce int16x4_t bitwise_or(int16x4_t a, int16x4_t b) {
return vorr_s16(a, b); }
661[[gnu::always_inline]] nce int16x4_t bitwise_xor(int16x4_t a, int16x4_t b) {
return veor_s16(a, b); }
662[[gnu::always_inline]] nce int16x4_t bitwise_or_not(int16x4_t a, int16x4_t b) {
return vorn_s16(a, b); }
663[[gnu::always_inline]] nce int16x4_t count_leading_sign_bits(int16x4_t a) {
return vcls_s16(a); }
664[[gnu::always_inline]] nce int16x4_t count_leading_zero_bits(int16x4_t a) {
return vclz_s16(a); }
665[[gnu::always_inline]] nce int16x4_t bitwise_clear(int16x4_t a, int16x4_t b) {
return vbic_s16(a, b); }
666template <
int lane>[[gnu::always_inline]] nce int16x4_t duplicate_lane(int16x4_t a) {
return vdup_lane_s16(a, lane); }
667template <
int lane>[[gnu::always_inline]] nce int16x8_t duplicate_lane_quad(int16x4_t a) {
return vdupq_lane_s16(a, lane); }
668[[gnu::always_inline]] nce int16x8_t combine(int16x4_t low, int16x4_t high) {
return vcombine_s16(low, high); }
669template <
int lane>[[gnu::always_inline]] nce int16_t get_lane(int16x4_t v) {
return vget_lane_s16(v, lane); }
670template <
int n>[[gnu::always_inline]] nce int16x4_t extract(int16x4_t a, int16x4_t b) {
return vext_s16(a, b, n); }
671[[gnu::always_inline]] nce int16x4_t reverse_64bit(int16x4_t a) {
return vrev64_s16(a); }
672[[gnu::always_inline]] nce int16x4_t reverse_32bit(int16x4_t a) {
return vrev32_s16(a); }
673[[gnu::always_inline]] nce int16x4x2_t zip(int16x4_t a, int16x4_t b) {
return vzip_s16(a, b); }
674[[gnu::always_inline]] nce int16x4x2_t unzip(int16x4_t a, int16x4_t b) {
return vuzp_s16(a, b); }
675[[gnu::always_inline]] nce int16x4x2_t transpose(int16x4_t a, int16x4_t b) {
return vtrn_s16(a, b); }
676[[gnu::always_inline]] nce int16x4_t multiply(int16x4_t a, int16_t b) {
return vmul_n_s16(a, b); }
677[[gnu::always_inline]] nce int32x4_t multiply_long(int16x4_t a, int16_t b) {
return vmull_n_s16(a, b); }
678[[gnu::always_inline]] nce int16x8_t add(int16x8_t a, int8x8_t b) {
return vaddw_s8(a, b); }
679[[gnu::always_inline]] nce int16x8_t multiply_add_long(int16x8_t a, int8x8_t b, int8x8_t c) {
return vmlal_s8(a, b, c); }
680[[gnu::always_inline]] nce int16x8_t multiply_subtract_long(int16x8_t a, int8x8_t b, int8x8_t c) {
return vmlsl_s8(a, b, c); }
681[[gnu::always_inline]] nce int16x8_t subtract(int16x8_t a, int8x8_t b) {
return vsubw_s8(a, b); }
682[[gnu::always_inline]] nce int16x8_t subtract_absolute_add(int16x8_t a, int8x8_t b, int8x8_t c) {
return vabal_s8(a, b, c); }
683template <
int lane> [[gnu::always_inline]] nce int16x8_t multiply_double_saturate_high_lane(int16x8_t a, int16x4_t v) {
return vqdmulhq_lane_s16(a, v, lane); }
684template <
int lane> [[gnu::always_inline]] nce int16x8_t multiply_double_round_saturate_high_lane(int16x8_t a, int16x4_t v) {
return vqrdmulhq_lane_s16(a, v, lane); }
685[[gnu::always_inline]] nce int8x8_t add_narrow(int16x8_t a, int16x8_t b) {
return vaddhn_s16(a, b); }
686[[gnu::always_inline]] nce int8x8_t add_round_narrow(int16x8_t a, int16x8_t b) {
return vraddhn_s16(a, b); }
687[[gnu::always_inline]] nce int16x8_t multiply_add(int16x8_t a, int16x8_t b, int16x8_t c) {
return vmlaq_s16(a, b, c); }
688[[gnu::always_inline]] nce int16x8_t multiply_subtract(int16x8_t a, int16x8_t b, int16x8_t c) {
return vmlsq_s16(a, b, c); }
689[[gnu::always_inline]] nce int8x8_t subtract_narrow(int16x8_t a, int16x8_t b) {
return vsubhn_s16(a, b); }
690[[gnu::always_inline]] nce int8x8_t subtract_round_narrow(int16x8_t a, int16x8_t b) {
return vrsubhn_s16(a, b); }
691[[gnu::always_inline]] nce int16x8_t subtract_absolute_add(int16x8_t a, int16x8_t b, int16x8_t c) {
return vabaq_s16(a, b, c); }
692[[gnu::always_inline]] nce int32x4_t pairwise_add_long(int16x8_t a) {
return vpaddlq_s16(a); }
693[[gnu::always_inline]] nce int16x8_t pairwise_add_accumulate_long(int16x8_t a, int8x16_t b) {
return vpadalq_s8(a, b); }
694[[gnu::always_inline]] nce uint16x8_t equal(int16x8_t a, int16x8_t b) {
return vceqq_s16(a, b); }
695[[gnu::always_inline]] nce uint16x8_t greater_than_or_equal(int16x8_t a, int16x8_t b) {
return vcgeq_s16(a, b); }
696[[gnu::always_inline]] nce uint16x8_t less_than_or_equal(int16x8_t a, int16x8_t b) {
return vcleq_s16(a, b); }
697[[gnu::always_inline]] nce uint16x8_t greater_than(int16x8_t a, int16x8_t b) {
return vcgtq_s16(a, b); }
698[[gnu::always_inline]] nce uint16x8_t less_than(int16x8_t a, int16x8_t b) {
return vcltq_s16(a, b); }
699[[gnu::always_inline]] nce uint16x8_t compare_test_nonzero(int16x8_t a, int16x8_t b) {
return vtstq_s16(a, b); }
700template <
int n>[[gnu::always_inline]] nce int16x8_t shift_left(int16x8_t a) {
return vshlq_n_s16(a, n); }
701template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_left_unsigned_saturate(int16x8_t a) {
return vqshluq_n_s16(a, n); }
702template <
int n>[[gnu::always_inline]] nce int16x8_t shift_right_accumulate(int16x8_t a, int16x8_t b) {
return vsraq_n_s16(a, b, n); }
703template <
int n>[[gnu::always_inline]] nce int16x8_t shift_right_accumulate_round(int16x8_t a, int16x8_t b) {
return vrsraq_n_s16(a, b, n); }
704template <
int n>[[gnu::always_inline]] nce int8x8_t shift_right_narrow(int16x8_t a) {
return vshrn_n_s16(a, n); }
705template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_right_saturate_narrow_unsigned(int16x8_t a) {
return vqshrun_n_s16(a, n); }
706template <
int n>[[gnu::always_inline]] nce int8x8_t shift_right_saturate_narrow(int16x8_t a) {
return vqshrn_n_s16(a, n); }
707template <
int n>[[gnu::always_inline]] nce uint8x8_t shift_right_round_saturate_narrow_unsigned(int16x8_t a) {
return vqrshrun_n_s16(a, n); }
708template <
int n>[[gnu::always_inline]] nce int8x8_t shift_right_round_saturate_narrow(int16x8_t a) {
return vqrshrn_n_s16(a, n); }
709template <
int n>[[gnu::always_inline]] nce int8x8_t shift_right_round_narrow(int16x8_t a) {
return vrshrn_n_s16(a, n); }
710template <> [[gnu::always_inline]] nce poly8x16_t reinterpret(int16x8_t a) {
return vreinterpretq_p8_s16(a); }
711template <> [[gnu::always_inline]] nce poly16x8_t reinterpret(int16x8_t a) {
return vreinterpretq_p16_s16(a); }
712[[gnu::always_inline]] nce int8x8_t move_narrow(int16x8_t a) {
return vmovn_s16(a); }
713[[gnu::always_inline]] nce int8x8_t move_saturate_narrow(int16x8_t a) {
return vqmovn_s16(a); }
714[[gnu::always_inline]] nce uint8x8_t move_unsigned_saturate_narrow(int16x8_t a) {
return vqmovun_s16(a); }
715template <
int lane> [[gnu::always_inline]] nce int16x8_t multiply_lane(int16x8_t a, int16x4_t v) {
return vmulq_lane_s16(a, v, lane); }
716template <
int lane> [[gnu::always_inline]] nce int16x8_t multiply_add_lane(int16x8_t a, int16x8_t b, int16x4_t v) {
return vmlaq_lane_s16(a, b, v, lane); }
717template <
int lane> [[gnu::always_inline]] nce int16x8_t multiply_subtract_lane(int16x8_t a, int16x8_t b, int16x4_t v) {
return vmlsq_lane_s16(a, b, v, lane); }
718[[gnu::always_inline]] nce int16x8_t multiply_add(int16x8_t a, int16x8_t b, int16_t c) {
return vmlaq_n_s16(a, b, c); }
719[[gnu::always_inline]] nce int16x8_t multiply_subtract(int16x8_t a, int16x8_t b, int16_t c) {
return vmlsq_n_s16(a, b, c); }
720[[gnu::always_inline]] nce int16x4_t get_high(int16x8_t a) {
return vget_high_s16(a); }
721[[gnu::always_inline]] nce int16x4_t get_low(int16x8_t a) {
return vget_low_s16(a); }
722template <
int n>[[gnu::always_inline]] nce int16x8_t extract(int16x8_t a, int16x8_t b) {
return vextq_s16(a, b, n); }
723[[gnu::always_inline]] nce int16x8x2_t zip(int16x8_t a, int16x8_t b) {
return vzipq_s16(a, b); }
724[[gnu::always_inline]] nce int16x8x2_t unzip(int16x8_t a, int16x8_t b) {
return vuzpq_s16(a, b); }
725[[gnu::always_inline]] nce int16x8x2_t transpose(int16x8_t a, int16x8_t b) {
return vtrnq_s16(a, b); }
726[[gnu::always_inline]] nce int32x2_t add(int32x2_t a, int32x2_t b) {
return vadd_s32(a, b); }
727[[gnu::always_inline]] nce int64x2_t add_long(int32x2_t a, int32x2_t b) {
return vaddl_s32(a, b); }
728[[gnu::always_inline]] nce int32x2_t add_halve(int32x2_t a, int32x2_t b) {
return vhadd_s32(a, b); }
729[[gnu::always_inline]] nce int32x2_t add_halve_round(int32x2_t a, int32x2_t b) {
return vrhadd_s32(a, b); }
730[[gnu::always_inline]] nce int32x2_t add_saturate(int32x2_t a, int32x2_t b) {
return vqadd_s32(a, b); }
731[[gnu::always_inline]] nce int32x2_t multiply(int32x2_t a, int32x2_t b) {
return vmul_s32(a, b); }
732[[gnu::always_inline]] nce int32x2_t multiply_add(int32x2_t a, int32x2_t b, int32x2_t c) {
return vmla_s32(a, b, c); }
733[[gnu::always_inline]] nce int32x2_t multiply_subtract(int32x2_t a, int32x2_t b, int32x2_t c) {
return vmls_s32(a, b, c); }
734[[gnu::always_inline]] nce int32x2_t multiply_double_saturate_high(int32x2_t a, int32x2_t b) {
return vqdmulh_s32(a, b); }
735[[gnu::always_inline]] nce int32x2_t multiply_double_round_saturate_high(int32x2_t a, int32x2_t b) {
return vqrdmulh_s32(a, b); }
736[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long(int32x2_t a, int32x2_t b) {
return vqdmull_s32(a, b); }
737[[gnu::always_inline]] nce int64x2_t multiply_long(int32x2_t a, int32x2_t b) {
return vmull_s32(a, b); }
738template <
int lane> [[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_lane(int32x2_t a, int32x2_t v) {
return vqdmull_lane_s32(a, v, lane); }
739template <
int lane> [[gnu::always_inline]] nce int32x2_t multiply_double_saturate_high_lane(int32x2_t a, int32x2_t v) {
return vqdmulh_lane_s32(a, v, lane); }
740template <
int lane> [[gnu::always_inline]] nce int32x2_t multiply_double_round_saturate_high_lane(int32x2_t a, int32x2_t v) {
return vqrdmulh_lane_s32(a, v, lane); }
741[[gnu::always_inline]] nce int32x2_t subtract(int32x2_t a, int32x2_t b) {
return vsub_s32(a, b); }
742[[gnu::always_inline]] nce int64x2_t subtract_long(int32x2_t a, int32x2_t b) {
return vsubl_s32(a, b); }
743[[gnu::always_inline]] nce int32x2_t subtract_halve(int32x2_t a, int32x2_t b) {
return vhsub_s32(a, b); }
744[[gnu::always_inline]] nce int32x2_t subtract_saturate(int32x2_t a, int32x2_t b) {
return vqsub_s32(a, b); }
745[[gnu::always_inline]] nce int32x2_t subtract_absolute(int32x2_t a, int32x2_t b) {
return vabd_s32(a, b); }
746[[gnu::always_inline]] nce int64x2_t subtract_absolute_long(int32x2_t a, int32x2_t b) {
return vabdl_s32(a, b); }
747[[gnu::always_inline]] nce int32x2_t subtract_absolute_add(int32x2_t a, int32x2_t b, int32x2_t c) {
return vaba_s32(a, b, c); }
748[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long(int32x2_t a, int32_t b) {
return vqdmull_n_s32(a, b); }
749[[gnu::always_inline]] nce int32x2_t multiply_double_saturate_high(int32x2_t a, int32_t b) {
return vqdmulh_n_s32(a, b); }
750[[gnu::always_inline]] nce int32x2_t multiply_double_round_saturate_high(int32x2_t a, int32_t b) {
return vqrdmulh_n_s32(a, b); }
751[[gnu::always_inline]] nce int32x2_t absolute(int32x2_t a) {
return vabs_s32(a); }
752[[gnu::always_inline]] nce int32x2_t absolute_saturate(int32x2_t a) {
return vqabs_s32(a); }
753[[gnu::always_inline]] nce int32x2_t max(int32x2_t a, int32x2_t b) {
return vmax_s32(a, b); }
754[[gnu::always_inline]] nce int32x2_t min(int32x2_t a, int32x2_t b) {
return vmin_s32(a, b); }
755[[gnu::always_inline]] nce int32x2_t pairwise_add(int32x2_t a, int32x2_t b) {
return vpadd_s32(a, b); }
756[[gnu::always_inline]] nce int64x1_t pairwise_add_long(int32x2_t a) {
return vpaddl_s32(a); }
757[[gnu::always_inline]] nce int32x2_t pairwise_add_accumulate_long(int32x2_t a, int16x4_t b) {
return vpadal_s16(a, b); }
758[[gnu::always_inline]] nce int32x2_t pairwise_max(int32x2_t a, int32x2_t b) {
return vpmax_s32(a, b); }
759[[gnu::always_inline]] nce int32x2_t pairwise_min(int32x2_t a, int32x2_t b) {
return vpmin_s32(a, b); }
760[[gnu::always_inline]] nce uint32x2_t equal(int32x2_t a, int32x2_t b) {
return vceq_s32(a, b); }
761[[gnu::always_inline]] nce uint32x2_t greater_than_or_equal(int32x2_t a, int32x2_t b) {
return vcge_s32(a, b); }
762[[gnu::always_inline]] nce uint32x2_t less_than_or_equal(int32x2_t a, int32x2_t b) {
return vcle_s32(a, b); }
763[[gnu::always_inline]] nce uint32x2_t greater_than(int32x2_t a, int32x2_t b) {
return vcgt_s32(a, b); }
764[[gnu::always_inline]] nce uint32x2_t less_than(int32x2_t a, int32x2_t b) {
return vclt_s32(a, b); }
765[[gnu::always_inline]] nce uint32x2_t compare_test_nonzero(int32x2_t a, int32x2_t b) {
return vtst_s32(a, b); }
766[[gnu::always_inline]] nce int32x2_t shift_left(int32x2_t a, int32x2_t b) {
return vshl_s32(a, b); }
767template <
int n>[[gnu::always_inline]] nce int32x2_t shift_left(int32x2_t a) {
return vshl_n_s32(a, n); }
768[[gnu::always_inline]] nce int32x2_t shift_left_saturate(int32x2_t a, int32x2_t b) {
return vqshl_s32(a, b); }
769template <
int n>[[gnu::always_inline]] nce int32x2_t shift_left_saturate(int32x2_t a) {
return vqshl_n_s32(a, n); }
770template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_left_unsigned_saturate(int32x2_t a) {
return vqshlu_n_s32(a, n); }
771[[gnu::always_inline]] nce int32x2_t shift_left_round(int32x2_t a, int32x2_t b) {
return vrshl_s32(a, b); }
772[[gnu::always_inline]] nce int32x2_t shift_left_round_saturate(int32x2_t a, int32x2_t b) {
return vqrshl_s32(a, b); }
773template <
int n>[[gnu::always_inline]] nce int64x2_t shift_left_long(int32x2_t a) {
return vshll_n_s32(a, n); }
774template <
int n>[[gnu::always_inline]] nce int32x2_t shift_left_insert(int32x2_t a, int32x2_t b) {
return vsli_n_s32(a, b, n); }
775template <
int n>[[gnu::always_inline]] nce int32x2_t shift_right(int32x2_t a) {
return vshr_n_s32(a, n); }
776template <
int n>[[gnu::always_inline]] nce int32x2_t shift_right_round(int32x2_t a) {
return vrshr_n_s32(a, n); }
777template <
int n>[[gnu::always_inline]] nce int32x2_t shift_right_accumulate(int32x2_t a, int32x2_t b) {
return vsra_n_s32(a, b, n); }
778template <
int n>[[gnu::always_inline]] nce int32x2_t shift_right_accumulate_round(int32x2_t a, int32x2_t b) {
return vrsra_n_s32(a, b, n); }
779template <
int n>[[gnu::always_inline]] nce int32x2_t shift_right_insert(int32x2_t a, int32x2_t b) {
return vsri_n_s32(a, b, n); }
780template <> [[gnu::always_inline]] nce float32x2_t convert(int32x2_t a) {
return vcvt_f32_s32(a); }
781template <
int fracbits> [[gnu::always_inline]] nce float32x2_t convert_n(int32x2_t a) {
return vcvt_n_f32_s32(a, fracbits); }
782template <> [[gnu::always_inline]] nce int8x8_t reinterpret(int32x2_t a) {
return vreinterpret_s8_s32(a); }
783template <> [[gnu::always_inline]] nce int16x4_t reinterpret(int32x2_t a) {
return vreinterpret_s16_s32(a); }
784template <> [[gnu::always_inline]] nce float32x2_t reinterpret(int32x2_t a) {
return vreinterpret_f32_s32(a); }
785template <> [[gnu::always_inline]] nce uint8x8_t reinterpret(int32x2_t a) {
return vreinterpret_u8_s32(a); }
786template <> [[gnu::always_inline]] nce uint16x4_t reinterpret(int32x2_t a) {
return vreinterpret_u16_s32(a); }
787template <> [[gnu::always_inline]] nce uint32x2_t reinterpret(int32x2_t a) {
return vreinterpret_u32_s32(a); }
788template <> [[gnu::always_inline]] nce poly8x8_t reinterpret(int32x2_t a) {
return vreinterpret_p8_s32(a); }
789template <> [[gnu::always_inline]] nce poly16x4_t reinterpret(int32x2_t a) {
return vreinterpret_p16_s32(a); }
790template <> [[gnu::always_inline]] nce uint64x1_t reinterpret(int32x2_t a) {
return vreinterpret_u64_s32(a); }
791template <> [[gnu::always_inline]] nce int64x1_t reinterpret(int32x2_t a) {
return vreinterpret_s64_s32(a); }
792[[gnu::always_inline]] nce int64x2_t move_long(int32x2_t a) {
return vmovl_s32(a); }
793template <
int lane> [[gnu::always_inline]] nce int32x2_t multiply_add_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
return vmla_lane_s32(a, b, v, lane); }
794template <
int lane> [[gnu::always_inline]] nce int32x2_t multiply_subtract_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
return vmls_lane_s32(a, b, v, lane); }
795[[gnu::always_inline]] nce int32x2_t multiply_add(int32x2_t a, int32x2_t b, int32_t c) {
return vmla_n_s32(a, b, c); }
796template <
int lane> [[gnu::always_inline]] nce int32x2_t multiply_lane(int32x2_t a, int32x2_t v) {
return vmul_lane_s32(a, v, lane); }
797template <
int lane> [[gnu::always_inline]] nce int64x2_t multiply_long_lane(int32x2_t a, int32x2_t v) {
return vmull_lane_s32(a, v, lane); }
798[[gnu::always_inline]] nce int32x2_t multiply_subtract(int32x2_t a, int32x2_t b, int32_t c) {
return vmls_n_s32(a, b, c); }
799[[gnu::always_inline]] nce int32x2_t negate(int32x2_t a) {
return vneg_s32(a); }
800[[gnu::always_inline]] nce int32x2_t negate_saturate(int32x2_t a) {
return vqneg_s32(a); }
801[[gnu::always_inline]] nce int32x2_t bitwise_not(int32x2_t a) {
return vmvn_s32(a); }
802[[gnu::always_inline]] nce int32x2_t bitwise_and(int32x2_t a, int32x2_t b) {
return vand_s32(a, b); }
803[[gnu::always_inline]] nce int32x2_t bitwise_or(int32x2_t a, int32x2_t b) {
return vorr_s32(a, b); }
804[[gnu::always_inline]] nce int32x2_t bitwise_xor(int32x2_t a, int32x2_t b) {
return veor_s32(a, b); }
805[[gnu::always_inline]] nce int32x2_t bitwise_or_not(int32x2_t a, int32x2_t b) {
return vorn_s32(a, b); }
806[[gnu::always_inline]] nce int32x2_t count_leading_sign_bits(int32x2_t a) {
return vcls_s32(a); }
807[[gnu::always_inline]] nce int32x2_t count_leading_zero_bits(int32x2_t a) {
return vclz_s32(a); }
808[[gnu::always_inline]] nce int32x2_t bitwise_clear(int32x2_t a, int32x2_t b) {
return vbic_s32(a, b); }
809template <
int lane>[[gnu::always_inline]] nce int32x2_t duplicate_lane(int32x2_t a) {
return vdup_lane_s32(a, lane); }
810template <
int lane>[[gnu::always_inline]] nce int32x4_t duplicate_lane_quad(int32x2_t a) {
return vdupq_lane_s32(a, lane); }
811[[gnu::always_inline]] nce int32x4_t combine(int32x2_t low, int32x2_t high) {
return vcombine_s32(low, high); }
812template <
int lane>[[gnu::always_inline]] nce int32_t get_lane(int32x2_t v) {
return vget_lane_s32(v, lane); }
813template <
int n>[[gnu::always_inline]] nce int32x2_t extract(int32x2_t a, int32x2_t b) {
return vext_s32(a, b, n); }
814[[gnu::always_inline]] nce int32x2_t reverse_64bit(int32x2_t a) {
return vrev64_s32(a); }
815[[gnu::always_inline]] nce int32x2x2_t zip(int32x2_t a, int32x2_t b) {
return vzip_s32(a, b); }
816[[gnu::always_inline]] nce int32x2x2_t unzip(int32x2_t a, int32x2_t b) {
return vuzp_s32(a, b); }
817[[gnu::always_inline]] nce int32x2x2_t transpose(int32x2_t a, int32x2_t b) {
return vtrn_s32(a, b); }
818[[gnu::always_inline]] nce int32x2_t multiply(int32x2_t a, int32_t b) {
return vmul_n_s32(a, b); }
819[[gnu::always_inline]] nce int64x2_t multiply_long(int32x2_t a, int32_t b) {
return vmull_n_s32(a, b); }
820[[gnu::always_inline]] nce int32x4_t add(int32x4_t a, int16x4_t b) {
return vaddw_s16(a, b); }
821[[gnu::always_inline]] nce int32x4_t multiply_add_long(int32x4_t a, int16x4_t b, int16x4_t c) {
return vmlal_s16(a, b, c); }
822[[gnu::always_inline]] nce int32x4_t multiply_subtract_long(int32x4_t a, int16x4_t b, int16x4_t c) {
return vmlsl_s16(a, b, c); }
823[[gnu::always_inline]] nce int32x4_t multiply_double_add_saturate_long(int32x4_t a, int16x4_t b, int16x4_t c) {
return vqdmlal_s16(a, b, c); }
824[[gnu::always_inline]] nce int32x4_t multiply_double_subtract_saturate_long(int32x4_t a, int16x4_t b, int16x4_t c) {
return vqdmlsl_s16(a, b, c); }
825template <
int lane> [[gnu::always_inline]] nce int32x4_t multiply_double_add_saturate_long_lane(int32x4_t a, int16x4_t b, int16x4_t v) {
return vqdmlal_lane_s16(a, b, v, lane); }
826template <
int lane> [[gnu::always_inline]] nce int32x4_t multiply_double_subtract_saturate_long_lane(int32x4_t a, int16x4_t b, int16x4_t v) {
return vqdmlsl_lane_s16(a, b, v, lane); }
827[[gnu::always_inline]] nce int32x4_t subtract_absolute_add(int32x4_t a, int16x4_t b, int16x4_t c) {
return vabal_s16(a, b, c); }
828[[gnu::always_inline]] nce int32x4_t multiply_double_add_saturate_long(int32x4_t a, int16x4_t b, int16_t c) {
return vqdmlal_n_s16(a, b, c); }
829[[gnu::always_inline]] nce int32x4_t multiply_double_subtract_saturate_long(int32x4_t a, int16x4_t b, int16_t c) {
return vqdmlsl_n_s16(a, b, c); }
830[[gnu::always_inline]] nce int32x4_t subtract(int32x4_t a, int16x4_t b) {
return vsubw_s16(a, b); }
831template <
int lane> [[gnu::always_inline]] nce int32x4_t multiply_double_saturate_high_lane(int32x4_t a, int32x2_t v) {
return vqdmulhq_lane_s32(a, v, lane); }
832template <
int lane> [[gnu::always_inline]] nce int32x4_t multiply_double_round_saturate_high_lane(int32x4_t a, int32x2_t v) {
return vqrdmulhq_lane_s32(a, v, lane); }
833[[gnu::always_inline]] nce int16x4_t add_narrow(int32x4_t a, int32x4_t b) {
return vaddhn_s32(a, b); }
834[[gnu::always_inline]] nce int16x4_t add_round_narrow(int32x4_t a, int32x4_t b) {
return vraddhn_s32(a, b); }
835[[gnu::always_inline]] nce int32x4_t multiply_add(int32x4_t a, int32x4_t b, int32x4_t c) {
return vmlaq_s32(a, b, c); }
836[[gnu::always_inline]] nce int32x4_t multiply_subtract(int32x4_t a, int32x4_t b, int32x4_t c) {
return vmlsq_s32(a, b, c); }
837[[gnu::always_inline]] nce int16x4_t subtract_narrow(int32x4_t a, int32x4_t b) {
return vsubhn_s32(a, b); }
838[[gnu::always_inline]] nce int16x4_t subtract_round_narrow(int32x4_t a, int32x4_t b) {
return vrsubhn_s32(a, b); }
839[[gnu::always_inline]] nce int32x4_t subtract_absolute_add(int32x4_t a, int32x4_t b, int32x4_t c) {
return vabaq_s32(a, b, c); }
840[[gnu::always_inline]] nce int64x2_t pairwise_add_long(int32x4_t a) {
return vpaddlq_s32(a); }
841[[gnu::always_inline]] nce int32x4_t pairwise_add_accumulate_long(int32x4_t a, int16x8_t b) {
return vpadalq_s16(a, b); }
842[[gnu::always_inline]] nce uint32x4_t equal(int32x4_t a, int32x4_t b) {
return vceqq_s32(a, b); }
843[[gnu::always_inline]] nce uint32x4_t greater_than_or_equal(int32x4_t a, int32x4_t b) {
return vcgeq_s32(a, b); }
844[[gnu::always_inline]] nce uint32x4_t less_than_or_equal(int32x4_t a, int32x4_t b) {
return vcleq_s32(a, b); }
845[[gnu::always_inline]] nce uint32x4_t greater_than(int32x4_t a, int32x4_t b) {
return vcgtq_s32(a, b); }
846[[gnu::always_inline]] nce uint32x4_t less_than(int32x4_t a, int32x4_t b) {
return vcltq_s32(a, b); }
847[[gnu::always_inline]] nce uint32x4_t compare_test_nonzero(int32x4_t a, int32x4_t b) {
return vtstq_s32(a, b); }
848template <
int n>[[gnu::always_inline]] nce int32x4_t shift_left(int32x4_t a) {
return vshlq_n_s32(a, n); }
849template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_left_unsigned_saturate(int32x4_t a) {
return vqshluq_n_s32(a, n); }
850template <
int n>[[gnu::always_inline]] nce int32x4_t shift_right_accumulate(int32x4_t a, int32x4_t b) {
return vsraq_n_s32(a, b, n); }
851template <
int n>[[gnu::always_inline]] nce int32x4_t shift_right_accumulate_round(int32x4_t a, int32x4_t b) {
return vrsraq_n_s32(a, b, n); }
852template <
int n>[[gnu::always_inline]] nce int16x4_t shift_right_narrow(int32x4_t a) {
return vshrn_n_s32(a, n); }
853template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_right_saturate_narrow_unsigned(int32x4_t a) {
return vqshrun_n_s32(a, n); }
854template <
int n>[[gnu::always_inline]] nce int16x4_t shift_right_saturate_narrow(int32x4_t a) {
return vqshrn_n_s32(a, n); }
855template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_right_round_saturate_narrow_unsigned(int32x4_t a) {
return vqrshrun_n_s32(a, n); }
856template <
int n>[[gnu::always_inline]] nce int16x4_t shift_right_round_saturate_narrow(int32x4_t a) {
return vqrshrn_n_s32(a, n); }
857template <
int n>[[gnu::always_inline]] nce int16x4_t shift_right_round_narrow(int32x4_t a) {
return vrshrn_n_s32(a, n); }
858template <
int fracbits> [[gnu::always_inline]] nce float32x4_t convert_n(int32x4_t a) {
return vcvtq_n_f32_s32(a, fracbits); }
859template <> [[gnu::always_inline]] nce poly8x16_t reinterpret(int32x4_t a) {
return vreinterpretq_p8_s32(a); }
860template <> [[gnu::always_inline]] nce poly16x8_t reinterpret(int32x4_t a) {
return vreinterpretq_p16_s32(a); }
861[[gnu::always_inline]] nce int16x4_t move_narrow(int32x4_t a) {
return vmovn_s32(a); }
862[[gnu::always_inline]] nce int16x4_t move_saturate_narrow(int32x4_t a) {
return vqmovn_s32(a); }
863[[gnu::always_inline]] nce uint16x4_t move_unsigned_saturate_narrow(int32x4_t a) {
return vqmovun_s32(a); }
864template <
int lane> [[gnu::always_inline]] nce int32x4_t multiply_add_long_lane(int32x4_t a, int16x4_t b, int16x4_t v) {
return vmlal_lane_s16(a, b, v, lane); }
865template <
int lane> [[gnu::always_inline]] nce int32x4_t multiply_subtract_long_lane(int32x4_t a, int16x4_t b, int16x4_t v) {
return vmlsl_lane_s16(a, b, v, lane); }
866[[gnu::always_inline]] nce int32x4_t multiply_add_long(int32x4_t a, int16x4_t b, int16_t c) {
return vmlal_n_s16(a, b, c); }
867[[gnu::always_inline]] nce int32x4_t multiply_subtract_long(int32x4_t a, int16x4_t b, int16_t c) {
return vmlsl_n_s16(a, b, c); }
868template <
int lane> [[gnu::always_inline]] nce int32x4_t multiply_lane(int32x4_t a, int32x2_t v) {
return vmulq_lane_s32(a, v, lane); }
869template <
int lane> [[gnu::always_inline]] nce int32x4_t multiply_add_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
return vmlaq_lane_s32(a, b, v, lane); }
870template <
int lane> [[gnu::always_inline]] nce int32x4_t multiply_subtract_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
return vmlsq_lane_s32(a, b, v, lane); }
871[[gnu::always_inline]] nce int32x4_t multiply_add(int32x4_t a, int32x4_t b, int32_t c) {
return vmlaq_n_s32(a, b, c); }
872[[gnu::always_inline]] nce int32x4_t multiply_subtract(int32x4_t a, int32x4_t b, int32_t c) {
return vmlsq_n_s32(a, b, c); }
873[[gnu::always_inline]] nce int32x2_t get_high(int32x4_t a) {
return vget_high_s32(a); }
874[[gnu::always_inline]] nce int32x2_t get_low(int32x4_t a) {
return vget_low_s32(a); }
875template <
int n>[[gnu::always_inline]] nce int32x4_t extract(int32x4_t a, int32x4_t b) {
return vextq_s32(a, b, n); }
876[[gnu::always_inline]] nce int32x4x2_t zip(int32x4_t a, int32x4_t b) {
return vzipq_s32(a, b); }
877[[gnu::always_inline]] nce int32x4x2_t unzip(int32x4_t a, int32x4_t b) {
return vuzpq_s32(a, b); }
878[[gnu::always_inline]] nce int32x4x2_t transpose(int32x4_t a, int32x4_t b) {
return vtrnq_s32(a, b); }
879[[gnu::always_inline]] nce uint64x1_t add(uint64x1_t a, uint64x1_t b) {
return vadd_u64(a, b); }
880[[gnu::always_inline]] nce uint64x1_t add_saturate(uint64x1_t a, uint64x1_t b) {
return vqadd_u64(a, b); }
881[[gnu::always_inline]] nce uint64x1_t subtract(uint64x1_t a, uint64x1_t b) {
return vsub_u64(a, b); }
882[[gnu::always_inline]] nce uint64x1_t subtract_saturate(uint64x1_t a, uint64x1_t b) {
return vqsub_u64(a, b); }
883[[gnu::always_inline]] nce uint64x1_t pairwise_add_accumulate_long(uint64x1_t a, uint32x2_t b) {
return vpadal_u32(a, b); }
884[[gnu::always_inline]] nce uint64x1_t shift_left(uint64x1_t a, int64x1_t b) {
return vshl_u64(a, b); }
885template <
int n>[[gnu::always_inline]] nce uint64x1_t shift_left(uint64x1_t a) {
return vshl_n_u64(a, n); }
886template <
int n>[[gnu::always_inline]] nce uint64x1_t shift_right(uint64x1_t a) {
return vshr_n_u64(a, n); }
887template <
int n>[[gnu::always_inline]] nce uint64x1_t shift_right_round(uint64x1_t a) {
return vrshr_n_u64(a, n); }
888template <
int n>[[gnu::always_inline]] nce uint64x1_t shift_right_accumulate(uint64x1_t a, uint64x1_t b) {
return vsra_n_u64(a, b, n); }
889template <
int n>[[gnu::always_inline]] nce uint64x1_t shift_right_accumulate_round(uint64x1_t a, uint64x1_t b) {
return vrsra_n_u64(a, b, n); }
890template <
int n>[[gnu::always_inline]] nce uint64x1_t shift_right_insert(uint64x1_t a, uint64x1_t b) {
return vsri_n_u64(a, b, n); }
891[[gnu::always_inline]] nce uint64x1_t shift_left_saturate(uint64x1_t a, int64x1_t b) {
return vqshl_u64(a, b); }
892template <
int n>[[gnu::always_inline]] nce uint64x1_t shift_left_saturate(uint64x1_t a) {
return vqshl_n_u64(a, n); }
893template <
int n>[[gnu::always_inline]] nce uint64x1_t shift_left_insert(uint64x1_t a, uint64x1_t b) {
return vsli_n_u64(a, b, n); }
894[[gnu::always_inline]] nce uint64x1_t shift_left_round(uint64x1_t a, int64x1_t b) {
return vrshl_u64(a, b); }
895[[gnu::always_inline]] nce uint64x1_t shift_left_round_saturate(uint64x1_t a, int64x1_t b) {
return vqrshl_u64(a, b); }
896template <> [[gnu::always_inline]] nce int8x8_t reinterpret(uint64x1_t a) {
return vreinterpret_s8_u64(a); }
897template <> [[gnu::always_inline]] nce int16x4_t reinterpret(uint64x1_t a) {
return vreinterpret_s16_u64(a); }
898template <> [[gnu::always_inline]] nce int32x2_t reinterpret(uint64x1_t a) {
return vreinterpret_s32_u64(a); }
899template <> [[gnu::always_inline]] nce float32x2_t reinterpret(uint64x1_t a) {
return vreinterpret_f32_u64(a); }
900template <> [[gnu::always_inline]] nce uint8x8_t reinterpret(uint64x1_t a) {
return vreinterpret_u8_u64(a); }
901template <> [[gnu::always_inline]] nce uint16x4_t reinterpret(uint64x1_t a) {
return vreinterpret_u16_u64(a); }
902template <> [[gnu::always_inline]] nce uint32x2_t reinterpret(uint64x1_t a) {
return vreinterpret_u32_u64(a); }
903template <> [[gnu::always_inline]] nce poly8x8_t reinterpret(uint64x1_t a) {
return vreinterpret_p8_u64(a); }
904template <> [[gnu::always_inline]] nce poly16x4_t reinterpret(uint64x1_t a) {
return vreinterpret_p16_u64(a); }
905template <> [[gnu::always_inline]] nce int64x1_t reinterpret(uint64x1_t a) {
return vreinterpret_s64_u64(a); }
906[[gnu::always_inline]] nce uint64x1_t bitwise_and(uint64x1_t a, uint64x1_t b) {
return vand_u64(a, b); }
907[[gnu::always_inline]] nce uint64x1_t bitwise_or(uint64x1_t a, uint64x1_t b) {
return vorr_u64(a, b); }
908[[gnu::always_inline]] nce uint64x1_t bitwise_xor(uint64x1_t a, uint64x1_t b) {
return veor_u64(a, b); }
909[[gnu::always_inline]] nce uint64x1_t bitwise_or_not(uint64x1_t a, uint64x1_t b) {
return vorn_u64(a, b); }
910[[gnu::always_inline]] nce uint64x1_t bitwise_clear(uint64x1_t a, uint64x1_t b) {
return vbic_u64(a, b); }
911[[gnu::always_inline]] nce uint64x1_t bitwise_select(uint64x1_t a, uint64x1_t b, uint64x1_t c) {
return vbsl_u64(a, b, c); }
912template <
int lane>[[gnu::always_inline]] nce uint64x1_t duplicate_lane(uint64x1_t a) {
return vdup_lane_u64(a, lane); }
913template <
int lane>[[gnu::always_inline]] nce uint64x2_t duplicate_lane_quad(uint64x1_t a) {
return vdupq_lane_u64(a, lane); }
914[[gnu::always_inline]] nce uint64x2_t combine(uint64x1_t low, uint64x1_t high) {
return vcombine_u64(low, high); }
915template <
int lane>[[gnu::always_inline]] nce uint64_t get_lane(uint64x1_t v) {
return vget_lane_u64(v, lane); }
916template <
int n>[[gnu::always_inline]] nce uint64x1_t extract(uint64x1_t a, uint64x1_t b) {
return vext_u64(a, b, n); }
917[[gnu::always_inline]] nce int64x1_t bitwise_select(uint64x1_t a, int64x1_t b, int64x1_t c) {
return vbsl_s64(a, b, c); }
918[[gnu::always_inline]] nce uint64x2_t add(uint64x2_t a, uint64x2_t b) {
return vaddq_u64(a, b); }
919[[gnu::always_inline]] nce uint32x2_t add_narrow(uint64x2_t a, uint64x2_t b) {
return vaddhn_u64(a, b); }
920[[gnu::always_inline]] nce uint32x2_t add_round_narrow(uint64x2_t a, uint64x2_t b) {
return vraddhn_u64(a, b); }
921[[gnu::always_inline]] nce uint64x2_t add_saturate(uint64x2_t a, uint64x2_t b) {
return vqaddq_u64(a, b); }
922[[gnu::always_inline]] nce uint64x2_t subtract(uint64x2_t a, uint64x2_t b) {
return vsubq_u64(a, b); }
923[[gnu::always_inline]] nce uint32x2_t subtract_narrow(uint64x2_t a, uint64x2_t b) {
return vsubhn_u64(a, b); }
924[[gnu::always_inline]] nce uint32x2_t subtract_round_narrow(uint64x2_t a, uint64x2_t b) {
return vrsubhn_u64(a, b); }
925[[gnu::always_inline]] nce uint64x2_t subtract_saturate(uint64x2_t a, uint64x2_t b) {
return vqsubq_u64(a, b); }
926[[gnu::always_inline]] nce uint64x2_t shift_left_saturate(uint64x2_t a, int64x2_t b) {
return vqshlq_u64(a, b); }
927template <
int n>[[gnu::always_inline]] nce uint64x2_t shift_left_saturate(uint64x2_t a) {
return vqshlq_n_u64(a, n); }
928template <
int n>[[gnu::always_inline]] nce uint64x2_t shift_left_insert(uint64x2_t a, uint64x2_t b) {
return vsliq_n_u64(a, b, n); }
929[[gnu::always_inline]] nce uint64x2_t add(uint64x2_t a, uint32x2_t b) {
return vaddw_u32(a, b); }
930[[gnu::always_inline]] nce uint64x2_t multiply_add_long(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vmlal_u32(a, b, c); }
931[[gnu::always_inline]] nce uint64x2_t multiply_subtract_long(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vmlsl_u32(a, b, c); }
932[[gnu::always_inline]] nce uint64x2_t subtract(uint64x2_t a, uint32x2_t b) {
return vsubw_u32(a, b); }
933[[gnu::always_inline]] nce uint64x2_t subtract_absolute_add(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
return vabal_u32(a, b, c); }
934[[gnu::always_inline]] nce uint64x2_t shift_left_round(uint64x2_t a, int64x2_t b) {
return vrshlq_u64(a, b); }
935[[gnu::always_inline]] nce uint64x2_t shift_left_round_saturate(uint64x2_t a, int64x2_t b) {
return vqrshlq_u64(a, b); }
936template <
int n>[[gnu::always_inline]] nce uint64x2_t shift_right(uint64x2_t a) {
return vshrq_n_u64(a, n); }
937template <
int n>[[gnu::always_inline]] nce uint64x2_t shift_right_round(uint64x2_t a) {
return vrshrq_n_u64(a, n); }
938template <
int n>[[gnu::always_inline]] nce uint64x2_t shift_right_accumulate(uint64x2_t a, uint64x2_t b) {
return vsraq_n_u64(a, b, n); }
939template <
int n>[[gnu::always_inline]] nce uint64x2_t shift_right_accumulate_round(uint64x2_t a, uint64x2_t b) {
return vrsraq_n_u64(a, b, n); }
940template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_right_narrow(uint64x2_t a) {
return vshrn_n_u64(a, n); }
941template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_right_saturate_narrow(uint64x2_t a) {
return vqshrn_n_u64(a, n); }
942template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_right_round_saturate_narrow(uint64x2_t a) {
return vqrshrn_n_u64(a, n); }
943template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_right_round_narrow(uint64x2_t a) {
return vrshrn_n_u64(a, n); }
944template <
int n>[[gnu::always_inline]] nce uint64x2_t shift_right_insert(uint64x2_t a, uint64x2_t b) {
return vsriq_n_u64(a, b, n); }
945template <> [[gnu::always_inline]] nce poly8x16_t reinterpret(uint64x2_t a) {
return vreinterpretq_p8_u64(a); }
946template <> [[gnu::always_inline]] nce poly16x8_t reinterpret(uint64x2_t a) {
return vreinterpretq_p16_u64(a); }
947[[gnu::always_inline]] nce uint32x2_t move_narrow(uint64x2_t a) {
return vmovn_u64(a); }
948[[gnu::always_inline]] nce uint32x2_t move_saturate_narrow(uint64x2_t a) {
return vqmovn_u64(a); }
949template <
int lane> [[gnu::always_inline]] nce uint64x2_t multiply_add_long_lane(uint64x2_t a, uint32x2_t b, uint32x2_t v) {
return vmlal_lane_u32(a, b, v, lane); }
950[[gnu::always_inline]] nce uint64x2_t pairwise_add_accumulate_long(uint64x2_t a, uint32x4_t b) {
return vpadalq_u32(a, b); }
951[[gnu::always_inline]] nce uint64x2_t shift_left(uint64x2_t a, int64x2_t b) {
return vshlq_u64(a, b); }
952template <
int n>[[gnu::always_inline]] nce uint64x2_t shift_left(uint64x2_t a) {
return vshlq_n_u64(a, n); }
953[[gnu::always_inline]] nce uint64x2_t bitwise_and(uint64x2_t a, uint64x2_t b) {
return vandq_u64(a, b); }
954[[gnu::always_inline]] nce uint64x2_t bitwise_or(uint64x2_t a, uint64x2_t b) {
return vorrq_u64(a, b); }
955[[gnu::always_inline]] nce uint64x2_t bitwise_xor(uint64x2_t a, uint64x2_t b) {
return veorq_u64(a, b); }
956[[gnu::always_inline]] nce uint64x2_t bitwise_or_not(uint64x2_t a, uint64x2_t b) {
return vornq_u64(a, b); }
957[[gnu::always_inline]] nce uint64x2_t bitwise_clear(uint64x2_t a, uint64x2_t b) {
return vbicq_u64(a, b); }
958[[gnu::always_inline]] nce uint64x2_t bitwise_select(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
return vbslq_u64(a, b, c); }
959[[gnu::always_inline]] nce uint64x1_t get_high(uint64x2_t a) {
return vget_high_u64(a); }
960[[gnu::always_inline]] nce uint64x1_t get_low(uint64x2_t a) {
return vget_low_u64(a); }
961template <
int n>[[gnu::always_inline]] nce uint64x2_t extract(uint64x2_t a, uint64x2_t b) {
return vextq_u64(a, b, n); }
962template <
int lane>[[gnu::always_inline]] nce uint64x2_t multiply_subtract_long_lane(uint64x2_t a, uint32x2_t b, uint32x2_t v) {
return vmlsl_lane_u32(a, b, v, lane); }
963[[gnu::always_inline]] nce uint64x2_t multiply_add_long(uint64x2_t a, uint32x2_t b, uint32_t c) {
return vmlal_n_u32(a, b, c); }
964[[gnu::always_inline]] nce uint64x2_t multiply_subtract_long(uint64x2_t a, uint32x2_t b, uint32_t c) {
return vmlsl_n_u32(a, b, c); }
965[[gnu::always_inline]] nce int64x2_t bitwise_select(uint64x2_t a, int64x2_t b, int64x2_t c) {
return vbslq_s64(a, b, c); }
966[[gnu::always_inline]] nce uint32x2_t shift_left(uint32x2_t a, int32x2_t b) {
return vshl_u32(a, b); }
967[[gnu::always_inline]] nce uint32x2_t add(uint32x2_t a, uint32x2_t b) {
return vadd_u32(a, b); }
968[[gnu::always_inline]] nce uint64x2_t add_long(uint32x2_t a, uint32x2_t b) {
return vaddl_u32(a, b); }
969[[gnu::always_inline]] nce uint32x2_t add_halve(uint32x2_t a, uint32x2_t b) {
return vhadd_u32(a, b); }
970[[gnu::always_inline]] nce uint32x2_t add_halve_round(uint32x2_t a, uint32x2_t b) {
return vrhadd_u32(a, b); }
971[[gnu::always_inline]] nce uint32x2_t add_saturate(uint32x2_t a, uint32x2_t b) {
return vqadd_u32(a, b); }
972[[gnu::always_inline]] nce uint32x2_t multiply(uint32x2_t a, uint32x2_t b) {
return vmul_u32(a, b); }
973[[gnu::always_inline]] nce uint32x2_t multiply_add(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vmla_u32(a, b, c); }
974[[gnu::always_inline]] nce uint32x2_t multiply_subtract(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vmls_u32(a, b, c); }
975[[gnu::always_inline]] nce uint64x2_t multiply_long(uint32x2_t a, uint32x2_t b) {
return vmull_u32(a, b); }
976[[gnu::always_inline]] nce uint32x2_t subtract(uint32x2_t a, uint32x2_t b) {
return vsub_u32(a, b); }
977[[gnu::always_inline]] nce uint64x2_t subtract_long(uint32x2_t a, uint32x2_t b) {
return vsubl_u32(a, b); }
978[[gnu::always_inline]] nce uint32x2_t subtract_halve(uint32x2_t a, uint32x2_t b) {
return vhsub_u32(a, b); }
979[[gnu::always_inline]] nce uint32x2_t subtract_saturate(uint32x2_t a, uint32x2_t b) {
return vqsub_u32(a, b); }
980[[gnu::always_inline]] nce uint32x2_t subtract_absolute(uint32x2_t a, uint32x2_t b) {
return vabd_u32(a, b); }
981[[gnu::always_inline]] nce uint64x2_t subtract_absolute_long(uint32x2_t a, uint32x2_t b) {
return vabdl_u32(a, b); }
982[[gnu::always_inline]] nce uint32x2_t subtract_absolute_add(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vaba_u32(a, b, c); }
983[[gnu::always_inline]] nce uint32x2_t max(uint32x2_t a, uint32x2_t b) {
return vmax_u32(a, b); }
984[[gnu::always_inline]] nce uint32x2_t min(uint32x2_t a, uint32x2_t b) {
return vmin_u32(a, b); }
985[[gnu::always_inline]] nce uint32x2_t reciprocal_estimate(uint32x2_t a) {
return vrecpe_u32(a); }
986[[gnu::always_inline]] nce uint32x2_t reciprocal_sqrt_estimate(uint32x2_t a) {
return vrsqrte_u32(a); }
987[[gnu::always_inline]] nce uint32x2_t pairwise_add(uint32x2_t a, uint32x2_t b) {
return vpadd_u32(a, b); }
988[[gnu::always_inline]] nce uint64x1_t pairwise_add_long(uint32x2_t a) {
return vpaddl_u32(a); }
989[[gnu::always_inline]] nce uint32x2_t pairwise_add_accumulate_long(uint32x2_t a, uint16x4_t b) {
return vpadal_u16(a, b); }
990[[gnu::always_inline]] nce uint32x2_t pairwise_max(uint32x2_t a, uint32x2_t b) {
return vpmax_u32(a, b); }
991[[gnu::always_inline]] nce uint32x2_t pairwise_min(uint32x2_t a, uint32x2_t b) {
return vpmin_u32(a, b); }
992[[gnu::always_inline]] nce uint32x2_t equal(uint32x2_t a, uint32x2_t b) {
return vceq_u32(a, b); }
993[[gnu::always_inline]] nce uint32x2_t greater_than_or_equal(uint32x2_t a, uint32x2_t b) {
return vcge_u32(a, b); }
994[[gnu::always_inline]] nce uint32x2_t less_than_or_equal(uint32x2_t a, uint32x2_t b) {
return vcle_u32(a, b); }
995[[gnu::always_inline]] nce uint32x2_t greater_than(uint32x2_t a, uint32x2_t b) {
return vcgt_u32(a, b); }
996[[gnu::always_inline]] nce uint32x2_t less_than(uint32x2_t a, uint32x2_t b) {
return vclt_u32(a, b); }
997[[gnu::always_inline]] nce uint32x2_t compare_test_nonzero(uint32x2_t a, uint32x2_t b) {
return vtst_u32(a, b); }
998template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_left(uint32x2_t a) {
return vshl_n_u32(a, n); }
999[[gnu::always_inline]] nce uint32x2_t shift_left_saturate(uint32x2_t a, int32x2_t b) {
return vqshl_u32(a, b); }
1000template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_left_saturate(uint32x2_t a) {
return vqshl_n_u32(a, n); }
1001[[gnu::always_inline]] nce uint32x2_t shift_left_round(uint32x2_t a, int32x2_t b) {
return vrshl_u32(a, b); }
1002[[gnu::always_inline]] nce uint32x2_t shift_left_round_saturate(uint32x2_t a, int32x2_t b) {
return vqrshl_u32(a, b); }
1003template <
int n>[[gnu::always_inline]] nce uint64x2_t shift_left_long(uint32x2_t a) {
return vshll_n_u32(a, n); }
1004template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_left_insert(uint32x2_t a, uint32x2_t b) {
return vsli_n_u32(a, b, n); }
1005template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_right(uint32x2_t a) {
return vshr_n_u32(a, n); }
1006template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_right_round(uint32x2_t a) {
return vrshr_n_u32(a, n); }
1007template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_right_accumulate(uint32x2_t a, uint32x2_t b) {
return vsra_n_u32(a, b, n); }
1008template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_right_accumulate_round(uint32x2_t a, uint32x2_t b) {
return vrsra_n_u32(a, b, n); }
1009template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_right_insert(uint32x2_t a, uint32x2_t b) {
return vsri_n_u32(a, b, n); }
1010template <> [[gnu::always_inline]] nce float32x2_t convert(uint32x2_t a) {
return vcvt_f32_u32(a); }
1011template <
int fracbits> [[gnu::always_inline]] nce float32x2_t convert_n(uint32x2_t a) {
return vcvt_n_f32_u32(a, fracbits); }
1012template <> [[gnu::always_inline]] nce int8x8_t reinterpret(uint32x2_t a) {
return vreinterpret_s8_u32(a); }
1013template <> [[gnu::always_inline]] nce int16x4_t reinterpret(uint32x2_t a) {
return vreinterpret_s16_u32(a); }
1014template <> [[gnu::always_inline]] nce int32x2_t reinterpret(uint32x2_t a) {
return vreinterpret_s32_u32(a); }
1015template <> [[gnu::always_inline]] nce float32x2_t reinterpret(uint32x2_t a) {
return vreinterpret_f32_u32(a); }
1016template <> [[gnu::always_inline]] nce uint8x8_t reinterpret(uint32x2_t a) {
return vreinterpret_u8_u32(a); }
1017template <> [[gnu::always_inline]] nce uint16x4_t reinterpret(uint32x2_t a) {
return vreinterpret_u16_u32(a); }
1018template <> [[gnu::always_inline]] nce poly8x8_t reinterpret(uint32x2_t a) {
return vreinterpret_p8_u32(a); }
1019template <> [[gnu::always_inline]] nce poly16x4_t reinterpret(uint32x2_t a) {
return vreinterpret_p16_u32(a); }
1020template <> [[gnu::always_inline]] nce uint64x1_t reinterpret(uint32x2_t a) {
return vreinterpret_u64_u32(a); }
1021template <> [[gnu::always_inline]] nce int64x1_t reinterpret(uint32x2_t a) {
return vreinterpret_s64_u32(a); }
1022[[gnu::always_inline]] nce uint64x2_t move_long(uint32x2_t a) {
return vmovl_u32(a); }
1023template <
int lane> [[gnu::always_inline]] nce uint32x2_t multiply_add_lane(uint32x2_t a, uint32x2_t b, uint32x2_t v) {
return vmla_lane_u32(a, b, v, lane); }
1024template <
int lane> [[gnu::always_inline]] nce uint32x2_t multiply_subtract_lane(uint32x2_t a, uint32x2_t b, uint32x2_t v) {
return vmls_lane_u32(a, b, v, lane); }
1025[[gnu::always_inline]] nce uint32x2_t multiply_add(uint32x2_t a, uint32x2_t b, uint32_t c) {
return vmla_n_u32(a, b, c); }
1026template <
int lane> [[gnu::always_inline]] nce uint32x2_t multiply_lane(uint32x2_t a, uint32x2_t v) {
return vmul_lane_u32(a, v, lane); }
1027template <
int lane> [[gnu::always_inline]] nce uint64x2_t multiply_long_lane(uint32x2_t a, uint32x2_t v) {
return vmull_lane_u32(a, v, lane); }
1028[[gnu::always_inline]] nce uint32x2_t multiply_subtract(uint32x2_t a, uint32x2_t b, uint32_t c) {
return vmls_n_u32(a, b, c); }
1029[[gnu::always_inline]] nce uint32x2_t bitwise_not(uint32x2_t a) {
return vmvn_u32(a); }
1030[[gnu::always_inline]] nce uint32x2_t bitwise_and(uint32x2_t a, uint32x2_t b) {
return vand_u32(a, b); }
1031[[gnu::always_inline]] nce uint32x2_t bitwise_or(uint32x2_t a, uint32x2_t b) {
return vorr_u32(a, b); }
1032[[gnu::always_inline]] nce uint32x2_t bitwise_xor(uint32x2_t a, uint32x2_t b) {
return veor_u32(a, b); }
1033[[gnu::always_inline]] nce uint32x2_t bitwise_or_not(uint32x2_t a, uint32x2_t b) {
return vorn_u32(a, b); }
1035[[gnu::always_inline]] nce int32x2_t count_leading_sign_bits(uint32x2_t a) {
return vcls_u32(a); }
1037[[gnu::always_inline]] nce uint32x2_t count_leading_zero_bits(uint32x2_t a) {
return vclz_u32(a); }
1038[[gnu::always_inline]] nce int32x2_t bitwise_select(uint32x2_t a, int32x2_t b, int32x2_t c) {
return vbsl_s32(a, b, c); }
1039[[gnu::always_inline]] nce uint32x2_t bitwise_clear(uint32x2_t a, uint32x2_t b) {
return vbic_u32(a, b); }
1040[[gnu::always_inline]] nce uint32x2_t bitwise_select(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
return vbsl_u32(a, b, c); }
1041template <
int lane>[[gnu::always_inline]] nce uint32x2_t duplicate_lane(uint32x2_t a) {
return vdup_lane_u32(a, lane); }
1042template <
int lane>[[gnu::always_inline]] nce uint32x4_t duplicate_lane_quad(uint32x2_t a) {
return vdupq_lane_u32(a, lane); }
1043[[gnu::always_inline]] nce uint32x4_t combine(uint32x2_t low, uint32x2_t high) {
return vcombine_u32(low, high); }
1044template <
int lane>[[gnu::always_inline]] nce uint32_t get_lane(uint32x2_t v) {
return vget_lane_u32(v, lane); }
1045template <
int n>[[gnu::always_inline]] nce uint32x2_t extract(uint32x2_t a, uint32x2_t b) {
return vext_u32(a, b, n); }
1046[[gnu::always_inline]] nce uint32x2_t reverse_64bit(uint32x2_t a) {
return vrev64_u32(a); }
1047[[gnu::always_inline]] nce uint32x2x2_t zip(uint32x2_t a, uint32x2_t b) {
return vzip_u32(a, b); }
1048[[gnu::always_inline]] nce uint32x2x2_t unzip(uint32x2_t a, uint32x2_t b) {
return vuzp_u32(a, b); }
1049[[gnu::always_inline]] nce uint32x2x2_t transpose(uint32x2_t a, uint32x2_t b) {
return vtrn_u32(a, b); }
1050[[gnu::always_inline]] nce float32x2_t bitwise_select(uint32x2_t a, float32x2_t b, float32x2_t c) {
return vbsl_f32(a, b, c); }
1051[[gnu::always_inline]] nce uint32x2_t multiply(uint32x2_t a, uint32_t b) {
return vmul_n_u32(a, b); }
1052[[gnu::always_inline]] nce uint64x2_t multiply_long(uint32x2_t a, uint32_t b) {
return vmull_n_u32(a, b); }
1053[[gnu::always_inline]] nce uint32x4_t add(uint32x4_t a, uint16x4_t b) {
return vaddw_u16(a, b); }
1054[[gnu::always_inline]] nce uint32x4_t multiply_add_long(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vmlal_u16(a, b, c); }
1055[[gnu::always_inline]] nce uint32x4_t multiply_subtract_long(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vmlsl_u16(a, b, c); }
1056[[gnu::always_inline]] nce uint32x4_t subtract(uint32x4_t a, uint16x4_t b) {
return vsubw_u16(a, b); }
1057[[gnu::always_inline]] nce uint32x4_t subtract_absolute_add(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
return vabal_u16(a, b, c); }
1058[[gnu::always_inline]] nce uint16x4_t add_narrow(uint32x4_t a, uint32x4_t b) {
return vaddhn_u32(a, b); }
1059[[gnu::always_inline]] nce uint16x4_t add_round_narrow(uint32x4_t a, uint32x4_t b) {
return vraddhn_u32(a, b); }
1060[[gnu::always_inline]] nce uint32x4_t multiply_add(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
return vmlaq_u32(a, b, c); }
1061[[gnu::always_inline]] nce uint32x4_t multiply_subtract(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
return vmlsq_u32(a, b, c); }
1062[[gnu::always_inline]] nce uint16x4_t subtract_narrow(uint32x4_t a, uint32x4_t b) {
return vsubhn_u32(a, b); }
1063[[gnu::always_inline]] nce uint16x4_t subtract_round_narrow(uint32x4_t a, uint32x4_t b) {
return vrsubhn_u32(a, b); }
1064[[gnu::always_inline]] nce uint32x4_t subtract_absolute_add(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
return vabaq_u32(a, b, c); }
1065[[gnu::always_inline]] nce uint32x4_t reciprocal_estimate(uint32x4_t a) {
return vrecpeq_u32(a); }
1066[[gnu::always_inline]] nce uint32x4_t reciprocal_sqrt_estimate(uint32x4_t a) {
return vrsqrteq_u32(a); }
1067[[gnu::always_inline]] nce uint64x2_t pairwise_add_long(uint32x4_t a) {
return vpaddlq_u32(a); }
1068[[gnu::always_inline]] nce uint32x4_t pairwise_add_accumulate_long(uint32x4_t a, uint16x8_t b) {
return vpadalq_u16(a, b); }
1069[[gnu::always_inline]] nce uint32x4_t equal(uint32x4_t a, uint32x4_t b) {
return vceqq_u32(a, b); }
1070[[gnu::always_inline]] nce uint32x4_t greater_than_or_equal(uint32x4_t a, uint32x4_t b) {
return vcgeq_u32(a, b); }
1071[[gnu::always_inline]] nce uint32x4_t less_than_or_equal(uint32x4_t a, uint32x4_t b) {
return vcleq_u32(a, b); }
1072[[gnu::always_inline]] nce uint32x4_t greater_than(uint32x4_t a, uint32x4_t b) {
return vcgtq_u32(a, b); }
1073[[gnu::always_inline]] nce uint32x4_t less_than(uint32x4_t a, uint32x4_t b) {
return vcltq_u32(a, b); }
1074[[gnu::always_inline]] nce uint32x4_t compare_test_nonzero(uint32x4_t a, uint32x4_t b) {
return vtstq_u32(a, b); }
1075template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_left(uint32x4_t a) {
return vshlq_n_u32(a, n); }
1076template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_right_accumulate(uint32x4_t a, uint32x4_t b) {
return vsraq_n_u32(a, b, n); }
1077template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_right_accumulate_round(uint32x4_t a, uint32x4_t b) {
return vrsraq_n_u32(a, b, n); }
1078template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_right_narrow(uint32x4_t a) {
return vshrn_n_u32(a, n); }
1079template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_right_saturate_narrow(uint32x4_t a) {
return vqshrn_n_u32(a, n); }
1080template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_right_round_saturate_narrow(uint32x4_t a) {
return vqrshrn_n_u32(a, n); }
1081template <
int n>[[gnu::always_inline]] nce uint16x4_t shift_right_round_narrow(uint32x4_t a) {
return vrshrn_n_u32(a, n); }
1082template <
int fracbits> [[gnu::always_inline]] nce float32x4_t convert_n(uint32x4_t a) {
return vcvtq_n_f32_u32(a, fracbits); }
1083template <> [[gnu::always_inline]] nce poly8x16_t reinterpret(uint32x4_t a) {
return vreinterpretq_p8_u32(a); }
1084template <> [[gnu::always_inline]] nce poly16x8_t reinterpret(uint32x4_t a) {
return vreinterpretq_p16_u32(a); }
1085[[gnu::always_inline]] nce uint16x4_t move_narrow(uint32x4_t a) {
return vmovn_u32(a); }
1086[[gnu::always_inline]] nce uint16x4_t move_saturate_narrow(uint32x4_t a) {
return vqmovn_u32(a); }
1087template <
int lane> [[gnu::always_inline]] nce uint32x4_t multiply_add_long_lane(uint32x4_t a, uint16x4_t b, uint16x4_t v) {
return vmlal_lane_u16(a, b, v, lane); }
1088template <
int lane> [[gnu::always_inline]] nce uint32x4_t multiply_subtract_long_lane(uint32x4_t a, uint16x4_t b, uint16x4_t v) {
return vmlsl_lane_u16(a, b, v, lane); }
1089[[gnu::always_inline]] nce uint32x4_t multiply_add_long(uint32x4_t a, uint16x4_t b, uint16_t c) {
return vmlal_n_u16(a, b, c); }
1090[[gnu::always_inline]] nce uint32x4_t multiply_subtract_long(uint32x4_t a, uint16x4_t b, uint16_t c) {
return vmlsl_n_u16(a, b, c); }
1091template <
int lane> [[gnu::always_inline]] nce uint32x4_t multiply_lane(uint32x4_t a, uint32x2_t v) {
return vmulq_lane_u32(a, v, lane); }
1092template <
int lane> [[gnu::always_inline]] nce uint32x4_t multiply_add_lane(uint32x4_t a, uint32x4_t b, uint32x2_t v) {
return vmlaq_lane_u32(a, b, v, lane); }
1093template <
int lane> [[gnu::always_inline]] nce uint32x4_t multiply_subtract_lane(uint32x4_t a, uint32x4_t b, uint32x2_t v) {
return vmlsq_lane_u32(a, b, v, lane); }
1094[[gnu::always_inline]] nce uint32x4_t multiply_add(uint32x4_t a, uint32x4_t b, uint32_t c) {
return vmlaq_n_u32(a, b, c); }
1095[[gnu::always_inline]] nce uint32x4_t multiply_subtract(uint32x4_t a, uint32x4_t b, uint32_t c) {
return vmlsq_n_u32(a, b, c); }
1097[[gnu::always_inline]] nce int32x4_t count_leading_sign_bits(uint32x4_t a) {
return vclsq_u32(a); }
1099[[gnu::always_inline]] nce int32x4_t bitwise_select(uint32x4_t a, int32x4_t b, int32x4_t c) {
return vbslq_s32(a, b, c); }
1100[[gnu::always_inline]] nce uint32x4_t bitwise_select(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
return vbslq_u32(a, b, c); }
1101[[gnu::always_inline]] nce uint32x2_t get_high(uint32x4_t a) {
return vget_high_u32(a); }
1102[[gnu::always_inline]] nce uint32x2_t get_low(uint32x4_t a) {
return vget_low_u32(a); }
1103template <
int n>[[gnu::always_inline]] nce uint32x4_t extract(uint32x4_t a, uint32x4_t b) {
return vextq_u32(a, b, n); }
1104[[gnu::always_inline]] nce uint32x4x2_t zip(uint32x4_t a, uint32x4_t b) {
return vzipq_u32(a, b); }
1105[[gnu::always_inline]] nce uint32x4x2_t unzip(uint32x4_t a, uint32x4_t b) {
return vuzpq_u32(a, b); }
1106[[gnu::always_inline]] nce uint32x4x2_t transpose(uint32x4_t a, uint32x4_t b) {
return vtrnq_u32(a, b); }
1107[[gnu::always_inline]] nce float32x4_t bitwise_select(uint32x4_t a, float32x4_t b, float32x4_t c) {
return vbslq_f32(a, b, c); }
1108[[gnu::always_inline]] nce float32x2_t add(float32x2_t a, float32x2_t b) {
return vadd_f32(a, b); }
1109[[gnu::always_inline]] nce float32x2_t multiply(float32x2_t a, float32x2_t b) {
return vmul_f32(a, b); }
1110[[gnu::always_inline]] nce float32x2_t multiply_add(float32x2_t a, float32x2_t b, float32x2_t c) {
return vmla_f32(a, b, c); }
1111[[gnu::always_inline]] nce float32x2_t multiply_subtract(float32x2_t a, float32x2_t b, float32x2_t c) {
return vmls_f32(a, b, c); }
1112[[gnu::always_inline]] nce float32x2_t subtract(float32x2_t a, float32x2_t b) {
return vsub_f32(a, b); }
1113[[gnu::always_inline]] nce float32x2_t subtract_absolute(float32x2_t a, float32x2_t b) {
return vabd_f32(a, b); }
1114[[gnu::always_inline]] nce float32x2_t absolute(float32x2_t a) {
return vabs_f32(a); }
1115[[gnu::always_inline]] nce float32x2_t max(float32x2_t a, float32x2_t b) {
return vmax_f32(a, b); }
1116[[gnu::always_inline]] nce float32x2_t min(float32x2_t a, float32x2_t b) {
return vmin_f32(a, b); }
1117[[gnu::always_inline]] nce float32x2_t reciprocal_estimate(float32x2_t a) {
return vrecpe_f32(a); }
1118[[gnu::always_inline]] nce float32x2_t reciprocal_step(float32x2_t a, float32x2_t b) {
return vrecps_f32(a, b); }
1119[[gnu::always_inline]] nce float32x2_t reciprocal_sqrt_estimate(float32x2_t a) {
return vrsqrte_f32(a); }
1120[[gnu::always_inline]] nce float32x2_t reciprocal_sqrt_step(float32x2_t a, float32x2_t b) {
return vrsqrts_f32(a, b); }
1121[[gnu::always_inline]] nce float32x2_t pairwise_add(float32x2_t a, float32x2_t b) {
return vpadd_f32(a, b); }
1122[[gnu::always_inline]] nce float32x2_t pairwise_max(float32x2_t a, float32x2_t b) {
return vpmax_f32(a, b); }
1123[[gnu::always_inline]] nce float32x2_t pairwise_min(float32x2_t a, float32x2_t b) {
return vpmin_f32(a, b); }
1124[[gnu::always_inline]] nce uint32x2_t equal(float32x2_t a, float32x2_t b) {
return vceq_f32(a, b); }
1125[[gnu::always_inline]] nce uint32x2_t greater_than_or_equal(float32x2_t a, float32x2_t b) {
return vcge_f32(a, b); }
1126[[gnu::always_inline]] nce uint32x2_t less_than_or_equal(float32x2_t a, float32x2_t b) {
return vcle_f32(a, b); }
1127[[gnu::always_inline]] nce uint32x2_t greater_than(float32x2_t a, float32x2_t b) {
return vcgt_f32(a, b); }
1128[[gnu::always_inline]] nce uint32x2_t less_than(float32x2_t a, float32x2_t b) {
return vclt_f32(a, b); }
1129[[gnu::always_inline]] nce uint32x2_t absolute_greater_than_or_equal(float32x2_t a, float32x2_t b) {
return vcage_f32(a, b); }
1130[[gnu::always_inline]] nce uint32x2_t absolute_less_than_or_equal(float32x2_t a, float32x2_t b) {
return vcale_f32(a, b); }
1131[[gnu::always_inline]] nce uint32x2_t absolute_greater_than(float32x2_t a, float32x2_t b) {
return vcagt_f32(a, b); }
1132[[gnu::always_inline]] nce uint32x2_t absolute_less_than(float32x2_t a, float32x2_t b) {
return vcalt_f32(a, b); }
1133template <> [[gnu::always_inline]] nce int32x2_t convert(float32x2_t a) {
return vcvt_s32_f32(a); }
1134template <> [[gnu::always_inline]] nce uint32x2_t convert(float32x2_t a) {
return vcvt_u32_f32(a); }
1135template <
int fracbits> [[gnu::always_inline]] nce int32x2_t convert_n_signed(float32x2_t a) {
return vcvt_n_s32_f32(a, fracbits); }
1136template <
int fracbits> [[gnu::always_inline]] nce uint32x2_t convert_n_unsigned(float32x2_t a) {
return vcvt_n_u32_f32(a, fracbits); }
1137template <> [[gnu::always_inline]] nce int8x8_t reinterpret(float32x2_t a) {
return vreinterpret_s8_f32(a); }
1138template <> [[gnu::always_inline]] nce int16x4_t reinterpret(float32x2_t a) {
return vreinterpret_s16_f32(a); }
1139template <> [[gnu::always_inline]] nce int32x2_t reinterpret(float32x2_t a) {
return vreinterpret_s32_f32(a); }
1140template <> [[gnu::always_inline]] nce uint8x8_t reinterpret(float32x2_t a) {
return vreinterpret_u8_f32(a); }
1141template <> [[gnu::always_inline]] nce uint16x4_t reinterpret(float32x2_t a) {
return vreinterpret_u16_f32(a); }
1142template <> [[gnu::always_inline]] nce uint32x2_t reinterpret(float32x2_t a) {
return vreinterpret_u32_f32(a); }
1143template <> [[gnu::always_inline]] nce poly8x8_t reinterpret(float32x2_t a) {
return vreinterpret_p8_f32(a); }
1144template <> [[gnu::always_inline]] nce poly16x4_t reinterpret(float32x2_t a) {
return vreinterpret_p16_f32(a); }
1145template <> [[gnu::always_inline]] nce uint64x1_t reinterpret(float32x2_t a) {
return vreinterpret_u64_f32(a); }
1146template <> [[gnu::always_inline]] nce int64x1_t reinterpret(float32x2_t a) {
return vreinterpret_s64_f32(a); }
1147template <
int lane> [[gnu::always_inline]] nce float32x2_t multiply_add_lane(float32x2_t a, float32x2_t b, float32x2_t v) {
return vmla_lane_f32(a, b, v, lane); }
1148template <
int lane> [[gnu::always_inline]] nce float32x2_t multiply_subtract_lane(float32x2_t a, float32x2_t b, float32x2_t v) {
return vmls_lane_f32(a, b, v, lane); }
1149[[gnu::always_inline]] nce float32x2_t multiply_add(float32x2_t a, float32x2_t b, float32_t c) {
return vmla_n_f32(a, b, c); }
1150template <
int lane> [[gnu::always_inline]] nce float32x2_t multiply_lane(float32x2_t a, float32x2_t v) {
return vmul_lane_f32(a, v, lane); }
1151[[gnu::always_inline]] nce float32x2_t multiply_subtract(float32x2_t a, float32x2_t b, float32_t c) {
return vmls_n_f32(a, b, c); }
1152template <
int lane>[[gnu::always_inline]] nce float32x2_t duplicate_lane(float32x2_t a) {
return vdup_lane_f32(a, lane); }
1153template <
int lane>[[gnu::always_inline]] nce float32x4_t duplicate_lane_quad(float32x2_t a) {
return vdupq_lane_f32(a, lane); }
1154[[gnu::always_inline]] nce float32x4_t combine(float32x2_t low, float32x2_t high) {
return vcombine_f32(low, high); }
1155template <
int lane>[[gnu::always_inline]] nce float32_t get_lane(float32x2_t v) {
return vget_lane_f32(v, lane); }
1156template <
int n>[[gnu::always_inline]] nce float32x2_t extract(float32x2_t a, float32x2_t b) {
return vext_f32(a, b, n); }
1157[[gnu::always_inline]] nce float32x2_t reverse_64bit(float32x2_t a) {
return vrev64_f32(a); }
1158[[gnu::always_inline]] nce float32x2x2_t zip(float32x2_t a, float32x2_t b) {
return vzip_f32(a, b); }
1159[[gnu::always_inline]] nce float32x2x2_t unzip(float32x2_t a, float32x2_t b) {
return vuzp_f32(a, b); }
1160[[gnu::always_inline]] nce float32x2x2_t transpose(float32x2_t a, float32x2_t b) {
return vtrn_f32(a, b); }
1161[[gnu::always_inline]] nce float32x2_t multiply(float32x2_t a, float32_t b) {
return vmul_n_f32(a, b); }
1162[[gnu::always_inline]] nce float32x2_t negate(float32x2_t a) {
return vneg_f32(a); }
1163[[gnu::always_inline]] nce float32x4_t multiply_add(float32x4_t a, float32x4_t b, float32x4_t c) {
return vmlaq_f32(a, b, c); }
1164[[gnu::always_inline]] nce float32x4_t multiply_subtract(float32x4_t a, float32x4_t b, float32x4_t c) {
return vmlsq_f32(a, b, c); }
1165[[gnu::always_inline]] nce float32x4_t max(float32x4_t a, float32x4_t b) {
return vmaxq_f32(a, b); }
1166[[gnu::always_inline]] nce float32x4_t min(float32x4_t a, float32x4_t b) {
return vminq_f32(a, b); }
1167[[gnu::always_inline]] nce float32x4_t reciprocal_estimate(float32x4_t a) {
return vrecpeq_f32(a); }
1168[[gnu::always_inline]] nce float32x4_t reciprocal_step(float32x4_t a, float32x4_t b) {
return vrecpsq_f32(a, b); }
1169[[gnu::always_inline]] nce float32x4_t reciprocal_sqrt_estimate(float32x4_t a) {
return vrsqrteq_f32(a); }
1170[[gnu::always_inline]] nce float32x4_t reciprocal_sqrt_step(float32x4_t a, float32x4_t b) {
return vrsqrtsq_f32(a, b); }
1171[[gnu::always_inline]] nce uint32x4_t equal(float32x4_t a, float32x4_t b) {
return vceqq_f32(a, b); }
1172[[gnu::always_inline]] nce uint32x4_t greater_than_or_equal(float32x4_t a, float32x4_t b) {
return vcgeq_f32(a, b); }
1173[[gnu::always_inline]] nce uint32x4_t less_than_or_equal(float32x4_t a, float32x4_t b) {
return vcleq_f32(a, b); }
1174[[gnu::always_inline]] nce uint32x4_t greater_than(float32x4_t a, float32x4_t b) {
return vcgtq_f32(a, b); }
1175[[gnu::always_inline]] nce uint32x4_t less_than(float32x4_t a, float32x4_t b) {
return vcltq_f32(a, b); }
1176[[gnu::always_inline]] nce uint32x4_t absolute_greater_than_or_equal(float32x4_t a, float32x4_t b) {
return vcageq_f32(a, b); }
1177[[gnu::always_inline]] nce uint32x4_t absolute_less_than_or_equal(float32x4_t a, float32x4_t b) {
return vcaleq_f32(a, b); }
1178[[gnu::always_inline]] nce uint32x4_t absolute_greater_than(float32x4_t a, float32x4_t b) {
return vcagtq_f32(a, b); }
1179[[gnu::always_inline]] nce uint32x4_t absolute_less_than(float32x4_t a, float32x4_t b) {
return vcaltq_f32(a, b); }
1180template <
int fracbits> [[gnu::always_inline]] nce int32x4_t convert_n_signed(float32x4_t a) {
return vcvtq_n_s32_f32(a, fracbits); }
1181template <
int fracbits> [[gnu::always_inline]] nce uint32x4_t convert_n_unsigned(float32x4_t a) {
return vcvtq_n_u32_f32(a, fracbits); }
1182template <> [[gnu::always_inline]] nce poly8x16_t reinterpret(float32x4_t a) {
return vreinterpretq_p8_f32(a); }
1183template <> [[gnu::always_inline]] nce poly16x8_t reinterpret(float32x4_t a) {
return vreinterpretq_p16_f32(a); }
1184template <
int lane> [[gnu::always_inline]] nce float32x4_t multiply_lane(float32x4_t a, float32x2_t v) {
return vmulq_lane_f32(a, v, lane); }
1185template <
int lane> [[gnu::always_inline]] nce float32x4_t multiply_add_lane(float32x4_t a, float32x4_t b, float32x2_t v) {
return vmlaq_lane_f32(a, b, v, lane); }
1186template <
int lane> [[gnu::always_inline]] nce float32x4_t multiply_subtract_lane(float32x4_t a, float32x4_t b, float32x2_t v) {
return vmlsq_lane_f32(a, b, v, lane); }
1187[[gnu::always_inline]] nce float32x4_t multiply_add(float32x4_t a, float32x4_t b, float32_t c) {
return vmlaq_n_f32(a, b, c); }
1188[[gnu::always_inline]] nce float32x4_t multiply_subtract(float32x4_t a, float32x4_t b, float32_t c) {
return vmlsq_n_f32(a, b, c); }
1189[[gnu::always_inline]] nce float32x2_t get_high(float32x4_t a) {
return vget_high_f32(a); }
1190[[gnu::always_inline]] nce float32x2_t get_low(float32x4_t a) {
return vget_low_f32(a); }
1191template <
int n>[[gnu::always_inline]] nce float32x4_t extract(float32x4_t a, float32x4_t b) {
return vextq_f32(a, b, n); }
1192[[gnu::always_inline]] nce float32x4x2_t zip(float32x4_t a, float32x4_t b) {
return vzipq_f32(a, b); }
1193[[gnu::always_inline]] nce float32x4x2_t unzip(float32x4_t a, float32x4_t b) {
return vuzpq_f32(a, b); }
1194[[gnu::always_inline]] nce float32x4x2_t transpose(float32x4_t a, float32x4_t b) {
return vtrnq_f32(a, b); }
1195[[gnu::always_inline]] nce poly8x8_t multiply(poly8x8_t a, poly8x8_t b) {
return vmul_p8(a, b); }
1196[[gnu::always_inline]] nce poly16x8_t multiply_long(poly8x8_t a, poly8x8_t b) {
return vmull_p8(a, b); }
1197[[gnu::always_inline]] nce uint8x8_t equal(poly8x8_t a, poly8x8_t b) {
return vceq_p8(a, b); }
1198[[gnu::always_inline]] nce uint8x8_t compare_test_nonzero(poly8x8_t a, poly8x8_t b) {
return vtst_p8(a, b); }
1199template <
int n>[[gnu::always_inline]] nce poly8x8_t shift_left_insert(poly8x8_t a, poly8x8_t b) {
return vsli_n_p8(a, b, n); }
1200template <
int n>[[gnu::always_inline]] nce poly8x8_t shift_right_insert(poly8x8_t a, poly8x8_t b) {
return vsri_n_p8(a, b, n); }
1201template <> [[gnu::always_inline]] nce int8x8_t reinterpret(poly8x8_t a) {
return vreinterpret_s8_p8(a); }
1202template <> [[gnu::always_inline]] nce int16x4_t reinterpret(poly8x8_t a) {
return vreinterpret_s16_p8(a); }
1203template <> [[gnu::always_inline]] nce int32x2_t reinterpret(poly8x8_t a) {
return vreinterpret_s32_p8(a); }
1204template <> [[gnu::always_inline]] nce float32x2_t reinterpret(poly8x8_t a) {
return vreinterpret_f32_p8(a); }
1205template <> [[gnu::always_inline]] nce uint8x8_t reinterpret(poly8x8_t a) {
return vreinterpret_u8_p8(a); }
1206template <> [[gnu::always_inline]] nce uint16x4_t reinterpret(poly8x8_t a) {
return vreinterpret_u16_p8(a); }
1207template <> [[gnu::always_inline]] nce uint32x2_t reinterpret(poly8x8_t a) {
return vreinterpret_u32_p8(a); }
1208template <> [[gnu::always_inline]] nce poly16x4_t reinterpret(poly8x8_t a) {
return vreinterpret_p16_p8(a); }
1209template <> [[gnu::always_inline]] nce uint64x1_t reinterpret(poly8x8_t a) {
return vreinterpret_u64_p8(a); }
1210template <> [[gnu::always_inline]] nce int64x1_t reinterpret(poly8x8_t a) {
return vreinterpret_s64_p8(a); }
1211[[gnu::always_inline]] nce poly8x8_t bitwise_not(poly8x8_t a) {
return vmvn_p8(a); }
1212[[gnu::always_inline]] nce poly8x8_t count_active_bits(poly8x8_t a) {
return vcnt_p8(a); }
1213template <
int lane>[[gnu::always_inline]] nce poly8x8_t duplicate_lane(poly8x8_t a) {
return vdup_lane_p8(a, lane); }
1214template <
int lane>[[gnu::always_inline]] nce poly8x16_t duplicate_lane_quad(poly8x8_t a) {
return vdupq_lane_p8(a, lane); }
1215[[gnu::always_inline]] nce poly8x16_t combine(poly8x8_t low, poly8x8_t high) {
return vcombine_p8(low, high); }
1216template <
int lane>[[gnu::always_inline]] nce poly8_t get_lane(poly8x8_t v) {
return vget_lane_p8(v, lane); }
1217template <
int n>[[gnu::always_inline]] nce poly8x8_t extract(poly8x8_t a, poly8x8_t b) {
return vext_p8(a, b, n); }
1218[[gnu::always_inline]] nce poly8x8_t reverse_64bit(poly8x8_t a) {
return vrev64_p8(a); }
1219[[gnu::always_inline]] nce poly8x8_t reverse_32bit(poly8x8_t a) {
return vrev32_p8(a); }
1220[[gnu::always_inline]] nce poly8x8_t reverse_16bit(poly8x8_t a) {
return vrev16_p8(a); }
1221[[gnu::always_inline]] nce poly8x8_t table_lookup1(poly8x8_t a, uint8x8_t idx) {
return vtbl1_p8(a, idx); }
1222[[gnu::always_inline]] nce poly8x8x2_t zip(poly8x8_t a, poly8x8_t b) {
return vzip_p8(a, b); }
1223[[gnu::always_inline]] nce poly8x8x2_t unzip(poly8x8_t a, poly8x8_t b) {
return vuzp_p8(a, b); }
1224[[gnu::always_inline]] nce poly8x8x2_t transpose(poly8x8_t a, poly8x8_t b) {
return vtrn_p8(a, b); }
1225[[gnu::always_inline]] nce poly8x8_t table_extension1(poly8x8_t a, poly8x8_t b, uint8x8_t idx) {
return vtbx1_p8(a, b, idx); }
1227[[gnu::always_inline]] nce poly8x8_t add(poly8x8_t a, poly8x8_t b) {
return vadd_p8(a, b); }
1229[[gnu::always_inline]] nce poly8x8_t table_extension2(poly8x8_t a, poly8x8x2_t b, uint8x8_t idx) {
return vtbx2_p8(a, b, idx); }
1230[[gnu::always_inline]] nce poly8x8_t table_extension3(poly8x8_t a, poly8x8x3_t b, uint8x8_t idx) {
return vtbx3_p8(a, b, idx); }
1231[[gnu::always_inline]] nce poly8x8_t table_extension4(poly8x8_t a, poly8x8x4_t b, uint8x8_t idx) {
return vtbx4_p8(a, b, idx); }
1232template <
int n>[[gnu::always_inline]] nce poly16x4_t shift_left_insert(poly16x4_t a, poly16x4_t b) {
return vsli_n_p16(a, b, n); }
1233template <
int n>[[gnu::always_inline]] nce poly16x4_t shift_right_insert(poly16x4_t a, poly16x4_t b) {
return vsri_n_p16(a, b, n); }
1234template <> [[gnu::always_inline]] nce int8x8_t reinterpret(poly16x4_t a) {
return vreinterpret_s8_p16(a); }
1235template <> [[gnu::always_inline]] nce int16x4_t reinterpret(poly16x4_t a) {
return vreinterpret_s16_p16(a); }
1236template <> [[gnu::always_inline]] nce int32x2_t reinterpret(poly16x4_t a) {
return vreinterpret_s32_p16(a); }
1237template <> [[gnu::always_inline]] nce float32x2_t reinterpret(poly16x4_t a) {
return vreinterpret_f32_p16(a); }
1238template <> [[gnu::always_inline]] nce uint8x8_t reinterpret(poly16x4_t a) {
return vreinterpret_u8_p16(a); }
1239template <> [[gnu::always_inline]] nce uint16x4_t reinterpret(poly16x4_t a) {
return vreinterpret_u16_p16(a); }
1240template <> [[gnu::always_inline]] nce uint32x2_t reinterpret(poly16x4_t a) {
return vreinterpret_u32_p16(a); }
1241template <> [[gnu::always_inline]] nce poly8x8_t reinterpret(poly16x4_t a) {
return vreinterpret_p8_p16(a); }
1242template <> [[gnu::always_inline]] nce uint64x1_t reinterpret(poly16x4_t a) {
return vreinterpret_u64_p16(a); }
1243template <> [[gnu::always_inline]] nce int64x1_t reinterpret(poly16x4_t a) {
return vreinterpret_s64_p16(a); }
1244template <
int lane>[[gnu::always_inline]] nce poly16x4_t duplicate_lane(poly16x4_t a) {
return vdup_lane_p16(a, lane); }
1245template <
int lane>[[gnu::always_inline]] nce poly16x8_t duplicate_lane_quad(poly16x4_t a) {
return vdupq_lane_p16(a, lane); }
1246[[gnu::always_inline]] nce poly16x8_t combine(poly16x4_t low, poly16x4_t high) {
return vcombine_p16(low, high); }
1247template <
int lane>[[gnu::always_inline]] nce poly16_t get_lane(poly16x4_t v) {
return vget_lane_p16(v, lane); }
1248template <
int n>[[gnu::always_inline]] nce poly16x4_t extract(poly16x4_t a, poly16x4_t b) {
return vext_p16(a, b, n); }
1249[[gnu::always_inline]] nce poly16x4_t reverse_64bit(poly16x4_t a) {
return vrev64_p16(a); }
1250[[gnu::always_inline]] nce poly16x4_t reverse_32bit(poly16x4_t a) {
return vrev32_p16(a); }
1251[[gnu::always_inline]] nce poly16x4x2_t zip(poly16x4_t a, poly16x4_t b) {
return vzip_p16(a, b); }
1252[[gnu::always_inline]] nce poly16x4x2_t unzip(poly16x4_t a, poly16x4_t b) {
return vuzp_p16(a, b); }
1253[[gnu::always_inline]] nce poly16x4x2_t transpose(poly16x4_t a, poly16x4_t b) {
return vtrn_p16(a, b); }
1255[[gnu::always_inline]] nce poly16x4_t add(poly16x4_t a, poly16x4_t b) {
return vadd_p16(a, b); }
1257[[gnu::always_inline]] nce int64x1_t add(int64x1_t a, int64x1_t b) {
return vadd_s64(a, b); }
1258[[gnu::always_inline]] nce int64x2_t add(int64x2_t a, int64x2_t b) {
return vaddq_s64(a, b); }
1259[[gnu::always_inline]] nce int64x2_t add(int64x2_t a, int32x2_t b) {
return vaddw_s32(a, b); }
1260[[gnu::always_inline]] nce int32x2_t add_narrow(int64x2_t a, int64x2_t b) {
return vaddhn_s64(a, b); }
1261[[gnu::always_inline]] nce int32x2_t add_round_narrow(int64x2_t a, int64x2_t b) {
return vraddhn_s64(a, b); }
1262[[gnu::always_inline]] nce int64x1_t add_saturate(int64x1_t a, int64x1_t b) {
return vqadd_s64(a, b); }
1263[[gnu::always_inline]] nce int64x2_t add_saturate(int64x2_t a, int64x2_t b) {
return vqaddq_s64(a, b); }
1264[[gnu::always_inline]] nce int64x2_t multiply_add_long(int64x2_t a, int32x2_t b, int32x2_t c) {
return vmlal_s32(a, b, c); }
1265[[gnu::always_inline]] nce int64x2_t multiply_subtract_long(int64x2_t a, int32x2_t b, int32x2_t c) {
return vmlsl_s32(a, b, c); }
1266[[gnu::always_inline]] nce int64x2_t multiply_double_add_saturate_long(int64x2_t a, int32x2_t b, int32x2_t c) {
return vqdmlal_s32(a, b, c); }
1267[[gnu::always_inline]] nce int64x2_t multiply_double_subtract_saturate_long(int64x2_t a, int32x2_t b, int32x2_t c) {
return vqdmlsl_s32(a, b, c); }
1268template <
int lane> [[gnu::always_inline]] nce int64x2_t multiply_double_add_saturate_long_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
return vqdmlal_lane_s32(a, b, v, lane); }
1269template <
int lane> [[gnu::always_inline]] nce int64x2_t multiply_double_subtract_saturate_long_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
return vqdmlsl_lane_s32(a, b, v, lane); }
1270[[gnu::always_inline]] nce int64x2_t multiply_double_add_saturate_long(int64x2_t a, int32x2_t b, int32_t c) {
return vqdmlal_n_s32(a, b, c); }
1271[[gnu::always_inline]] nce int64x2_t multiply_double_subtract_saturate_long(int64x2_t a, int32x2_t b, int32_t c) {
return vqdmlsl_n_s32(a, b, c); }
1272[[gnu::always_inline]] nce poly8x16_t multiply(poly8x16_t a, poly8x16_t b) {
return vmulq_p8(a, b); }
1273[[gnu::always_inline]] nce int64x1_t subtract(int64x1_t a, int64x1_t b) {
return vsub_s64(a, b); }
1274[[gnu::always_inline]] nce int64x2_t subtract(int64x2_t a, int64x2_t b) {
return vsubq_s64(a, b); }
1275[[gnu::always_inline]] nce int64x2_t subtract(int64x2_t a, int32x2_t b) {
return vsubw_s32(a, b); }
1276[[gnu::always_inline]] nce int32x2_t subtract_narrow(int64x2_t a, int64x2_t b) {
return vsubhn_s64(a, b); }
1277[[gnu::always_inline]] nce int32x2_t subtract_round_narrow(int64x2_t a, int64x2_t b) {
return vrsubhn_s64(a, b); }
1278[[gnu::always_inline]] nce int64x1_t subtract_saturate(int64x1_t a, int64x1_t b) {
return vqsub_s64(a, b); }
1279[[gnu::always_inline]] nce int64x2_t subtract_saturate(int64x2_t a, int64x2_t b) {
return vqsubq_s64(a, b); }
1280[[gnu::always_inline]] nce int64x2_t subtract_absolute_add(int64x2_t a, int32x2_t b, int32x2_t c) {
return vabal_s32(a, b, c); }
1281[[gnu::always_inline]] nce int64x1_t pairwise_add_accumulate_long(int64x1_t a, int32x2_t b) {
return vpadal_s32(a, b); }
1282[[gnu::always_inline]] nce int64x2_t pairwise_add_accumulate_long(int64x2_t a, int32x4_t b) {
return vpadalq_s32(a, b); }
1283[[gnu::always_inline]] nce uint8x16_t equal(poly8x16_t a, poly8x16_t b) {
return vceqq_p8(a, b); }
1284[[gnu::always_inline]] nce uint8x16_t compare_test_nonzero(poly8x16_t a, poly8x16_t b) {
return vtstq_p8(a, b); }
1285[[gnu::always_inline]] nce int64x1_t shift_left(int64x1_t a, int64x1_t b) {
return vshl_s64(a, b); }
1286[[gnu::always_inline]] nce int64x2_t shift_left(int64x2_t a, int64x2_t b) {
return vshlq_s64(a, b); }
1287template <
int n>[[gnu::always_inline]] nce int64x1_t shift_left(int64x1_t a) {
return vshl_n_s64(a, n); }
1288template <
int n>[[gnu::always_inline]] nce int64x2_t shift_left(int64x2_t a) {
return vshlq_n_s64(a, n); }
1289[[gnu::always_inline]] nce int64x1_t shift_left_saturate(int64x1_t a, int64x1_t b) {
return vqshl_s64(a, b); }
1290[[gnu::always_inline]] nce int64x2_t shift_left_saturate(int64x2_t a, int64x2_t b) {
return vqshlq_s64(a, b); }
1291template <
int n>[[gnu::always_inline]] nce int64x1_t shift_left_saturate(int64x1_t a) {
return vqshl_n_s64(a, n); }
1292template <
int n>[[gnu::always_inline]] nce int64x2_t shift_left_saturate(int64x2_t a) {
return vqshlq_n_s64(a, n); }
1293template <
int n>[[gnu::always_inline]] nce uint64x1_t shift_left_unsigned_saturate(int64x1_t a) {
return vqshlu_n_s64(a, n); }
1294template <
int n>[[gnu::always_inline]] nce uint64x2_t shift_left_unsigned_saturate(int64x2_t a) {
return vqshluq_n_s64(a, n); }
1295[[gnu::always_inline]] nce int64x1_t shift_left_round(int64x1_t a, int64x1_t b) {
return vrshl_s64(a, b); }
1296[[gnu::always_inline]] nce int64x2_t shift_left_round(int64x2_t a, int64x2_t b) {
return vrshlq_s64(a, b); }
1297[[gnu::always_inline]] nce int64x1_t shift_left_round_saturate(int64x1_t a, int64x1_t b) {
return vqrshl_s64(a, b); }
1298[[gnu::always_inline]] nce int64x2_t shift_left_round_saturate(int64x2_t a, int64x2_t b) {
return vqrshlq_s64(a, b); }
1299template <
int n>[[gnu::always_inline]] nce int64x1_t shift_left_insert(int64x1_t a, int64x1_t b) {
return vsli_n_s64(a, b, n); }
1300template <
int n>[[gnu::always_inline]] nce int64x2_t shift_left_insert(int64x2_t a, int64x2_t b) {
return vsliq_n_s64(a, b, n); }
1301template <
int n>[[gnu::always_inline]] nce poly8x16_t shift_left_insert(poly8x16_t a, poly8x16_t b) {
return vsliq_n_p8(a, b, n); }
1302template <
int n>[[gnu::always_inline]] nce poly16x8_t shift_left_insert(poly16x8_t a, poly16x8_t b) {
return vsliq_n_p16(a, b, n); }
1303template <
int n>[[gnu::always_inline]] nce int64x1_t shift_right(int64x1_t a) {
return vshr_n_s64(a, n); }
1304template <
int n>[[gnu::always_inline]] nce int64x2_t shift_right(int64x2_t a) {
return vshrq_n_s64(a, n); }
1305template <
int n>[[gnu::always_inline]] nce int64x1_t shift_right_round(int64x1_t a) {
return vrshr_n_s64(a, n); }
1306template <
int n>[[gnu::always_inline]] nce int64x2_t shift_right_round(int64x2_t a) {
return vrshrq_n_s64(a, n); }
1307template <
int n>[[gnu::always_inline]] nce int64x1_t shift_right_accumulate(int64x1_t a, int64x1_t b) {
return vsra_n_s64(a, b, n); }
1308template <
int n>[[gnu::always_inline]] nce int64x2_t shift_right_accumulate(int64x2_t a, int64x2_t b) {
return vsraq_n_s64(a, b, n); }
1309template <
int n>[[gnu::always_inline]] nce int64x1_t shift_right_accumulate_round(int64x1_t a, int64x1_t b) {
return vrsra_n_s64(a, b, n); }
1310template <
int n>[[gnu::always_inline]] nce int64x2_t shift_right_accumulate_round(int64x2_t a, int64x2_t b) {
return vrsraq_n_s64(a, b, n); }
1311template <
int n>[[gnu::always_inline]] nce int32x2_t shift_right_narrow(int64x2_t a) {
return vshrn_n_s64(a, n); }
1312template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_right_saturate_narrow_unsigned(int64x2_t a) {
return vqshrun_n_s64(a, n); }
1313template <
int n>[[gnu::always_inline]] nce int32x2_t shift_right_saturate_narrow(int64x2_t a) {
return vqshrn_n_s64(a, n); }
1314template <
int n>[[gnu::always_inline]] nce uint32x2_t shift_right_round_saturate_narrow_unsigned(int64x2_t a) {
return vqrshrun_n_s64(a, n); }
1315template <
int n>[[gnu::always_inline]] nce int32x2_t shift_right_round_saturate_narrow(int64x2_t a) {
return vqrshrn_n_s64(a, n); }
1316template <
int n>[[gnu::always_inline]] nce int32x2_t shift_right_round_narrow(int64x2_t a) {
return vrshrn_n_s64(a, n); }
1317template <
int n>[[gnu::always_inline]] nce int64x1_t shift_right_insert(int64x1_t a, int64x1_t b) {
return vsri_n_s64(a, b, n); }
1318template <
int n>[[gnu::always_inline]] nce int64x2_t shift_right_insert(int64x2_t a, int64x2_t b) {
return vsriq_n_s64(a, b, n); }
1319template <
int n>[[gnu::always_inline]] nce poly8x16_t shift_right_insert(poly8x16_t a, poly8x16_t b) {
return vsriq_n_p8(a, b, n); }
1320template <
int n>[[gnu::always_inline]] nce poly16x8_t shift_right_insert(poly16x8_t a, poly16x8_t b) {
return vsriq_n_p16(a, b, n); }
1321template <> [[gnu::always_inline]] nce int8x8_t reinterpret(int64x1_t a) {
return vreinterpret_s8_s64(a); }
1322template <> [[gnu::always_inline]] nce int16x4_t reinterpret(int64x1_t a) {
return vreinterpret_s16_s64(a); }
1323template <> [[gnu::always_inline]] nce int32x2_t reinterpret(int64x1_t a) {
return vreinterpret_s32_s64(a); }
1324template <> [[gnu::always_inline]] nce float32x2_t reinterpret(int64x1_t a) {
return vreinterpret_f32_s64(a); }
1325template <> [[gnu::always_inline]] nce uint8x8_t reinterpret(int64x1_t a) {
return vreinterpret_u8_s64(a); }
1326template <> [[gnu::always_inline]] nce uint16x4_t reinterpret(int64x1_t a) {
return vreinterpret_u16_s64(a); }
1327template <> [[gnu::always_inline]] nce uint32x2_t reinterpret(int64x1_t a) {
return vreinterpret_u32_s64(a); }
1328template <> [[gnu::always_inline]] nce poly8x8_t reinterpret(int64x1_t a) {
return vreinterpret_p8_s64(a); }
1329template <> [[gnu::always_inline]] nce poly16x4_t reinterpret(int64x1_t a) {
return vreinterpret_p16_s64(a); }
1330template <> [[gnu::always_inline]] nce uint64x1_t reinterpret(int64x1_t a) {
return vreinterpret_u64_s64(a); }
1331template <> [[gnu::always_inline]] nce int8x16_t reinterpret(poly8x16_t a) {
return vreinterpretq_s8_p8(a); }
1332template <> [[gnu::always_inline]] nce int16x8_t reinterpret(poly8x16_t a) {
return vreinterpretq_s16_p8(a); }
1333template <> [[gnu::always_inline]] nce int32x4_t reinterpret(poly8x16_t a) {
return vreinterpretq_s32_p8(a); }
1334template <> [[gnu::always_inline]] nce float32x4_t reinterpret(poly8x16_t a) {
return vreinterpretq_f32_p8(a); }
1335template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(poly8x16_t a) {
return vreinterpretq_u8_p8(a); }
1336template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(poly8x16_t a) {
return vreinterpretq_u16_p8(a); }
1337template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(poly8x16_t a) {
return vreinterpretq_u32_p8(a); }
1338template <> [[gnu::always_inline]] nce poly16x8_t reinterpret(poly8x16_t a) {
return vreinterpretq_p16_p8(a); }
1339template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(poly8x16_t a) {
return vreinterpretq_u64_p8(a); }
1340template <> [[gnu::always_inline]] nce int64x2_t reinterpret(poly8x16_t a) {
return vreinterpretq_s64_p8(a); }
1341template <> [[gnu::always_inline]] nce int8x16_t reinterpret(poly16x8_t a) {
return vreinterpretq_s8_p16(a); }
1342template <> [[gnu::always_inline]] nce int16x8_t reinterpret(poly16x8_t a) {
return vreinterpretq_s16_p16(a); }
1343template <> [[gnu::always_inline]] nce int32x4_t reinterpret(poly16x8_t a) {
return vreinterpretq_s32_p16(a); }
1344template <> [[gnu::always_inline]] nce float32x4_t reinterpret(poly16x8_t a) {
return vreinterpretq_f32_p16(a); }
1345template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(poly16x8_t a) {
return vreinterpretq_u8_p16(a); }
1346template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(poly16x8_t a) {
return vreinterpretq_u16_p16(a); }
1347template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(poly16x8_t a) {
return vreinterpretq_u32_p16(a); }
1348template <> [[gnu::always_inline]] nce poly8x16_t reinterpret(poly16x8_t a) {
return vreinterpretq_p8_p16(a); }
1349template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(poly16x8_t a) {
return vreinterpretq_u64_p16(a); }
1350template <> [[gnu::always_inline]] nce int64x2_t reinterpret(poly16x8_t a) {
return vreinterpretq_s64_p16(a); }
1351template <> [[gnu::always_inline]] nce poly8x16_t reinterpret(int64x2_t a) {
return vreinterpretq_p8_s64(a); }
1352template <> [[gnu::always_inline]] nce poly16x8_t reinterpret(int64x2_t a) {
return vreinterpretq_p16_s64(a); }
1353[[gnu::always_inline]] nce int32x2_t move_narrow(int64x2_t a) {
return vmovn_s64(a); }
1354[[gnu::always_inline]] nce int32x2_t move_saturate_narrow(int64x2_t a) {
return vqmovn_s64(a); }
1355[[gnu::always_inline]] nce uint32x2_t move_unsigned_saturate_narrow(int64x2_t a) {
return vqmovun_s64(a); }
1356template <
int lane> [[gnu::always_inline]] nce int64x2_t multiply_add_long_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
return vmlal_lane_s32(a, b, v, lane); }
1357template <
int lane> [[gnu::always_inline]] nce int64x2_t multiply_subtract_long_lane(int64x2_t a, int32x2_t b, int32x2_t v) {
return vmlsl_lane_s32(a, b, v, lane); }
1358[[gnu::always_inline]] nce int64x2_t multiply_add_long(int64x2_t a, int32x2_t b, int32_t c) {
return vmlal_n_s32(a, b, c); }
1359[[gnu::always_inline]] nce int64x2_t multiply_subtract_long(int64x2_t a, int32x2_t b, int32_t c) {
return vmlsl_n_s32(a, b, c); }
1360[[gnu::always_inline]] nce poly8x16_t bitwise_not(poly8x16_t a) {
return vmvnq_p8(a); }
1361[[gnu::always_inline]] nce int64x1_t bitwise_and(int64x1_t a, int64x1_t b) {
return vand_s64(a, b); }
1362[[gnu::always_inline]] nce int64x2_t bitwise_and(int64x2_t a, int64x2_t b) {
return vandq_s64(a, b); }
1363[[gnu::always_inline]] nce int64x1_t bitwise_or(int64x1_t a, int64x1_t b) {
return vorr_s64(a, b); }
1364[[gnu::always_inline]] nce int64x2_t bitwise_or(int64x2_t a, int64x2_t b) {
return vorrq_s64(a, b); }
1365[[gnu::always_inline]] nce int64x1_t bitwise_xor(int64x1_t a, int64x1_t b) {
return veor_s64(a, b); }
1366[[gnu::always_inline]] nce int64x2_t bitwise_xor(int64x2_t a, int64x2_t b) {
return veorq_s64(a, b); }
1367[[gnu::always_inline]] nce int64x1_t bitwise_or_not(int64x1_t a, int64x1_t b) {
return vorn_s64(a, b); }
1368[[gnu::always_inline]] nce int64x2_t bitwise_or_not(int64x2_t a, int64x2_t b) {
return vornq_s64(a, b); }
1369[[gnu::always_inline]] nce poly8x16_t count_active_bits(poly8x16_t a) {
return vcntq_p8(a); }
1370[[gnu::always_inline]] nce int64x1_t bitwise_clear(int64x1_t a, int64x1_t b) {
return vbic_s64(a, b); }
1371[[gnu::always_inline]] nce int64x2_t bitwise_clear(int64x2_t a, int64x2_t b) {
return vbicq_s64(a, b); }
1372template <> [[gnu::always_inline]] nce int8x8_t create(uint64_t a) {
return vcreate_s8(a); }
1373template <> [[gnu::always_inline]] nce int16x4_t create(uint64_t a) {
return vcreate_s16(a); }
1374template <> [[gnu::always_inline]] nce int32x2_t create(uint64_t a) {
return vcreate_s32(a); }
1375template <> [[gnu::always_inline]] nce int64x1_t create(uint64_t a) {
return vcreate_s64(a); }
1376template <> [[gnu::always_inline]] nce uint8x8_t create(uint64_t a) {
return vcreate_u8(a); }
1377template <> [[gnu::always_inline]] nce uint16x4_t create(uint64_t a) {
return vcreate_u16(a); }
1378template <> [[gnu::always_inline]] nce uint32x2_t create(uint64_t a) {
return vcreate_u32(a); }
1379template <> [[gnu::always_inline]] nce uint64x1_t create(uint64_t a) {
return vcreate_u64(a); }
1380template <> [[gnu::always_inline]] nce float32x2_t create(uint64_t a) {
return vcreate_f32(a); }
1381template <> [[gnu::always_inline]] nce poly8x8_t create(uint64_t a) {
return vcreate_p8(a); }
1382template <> [[gnu::always_inline]] nce poly16x4_t create(uint64_t a) {
return vcreate_p16(a); }
1383template <> [[gnu::always_inline]] nce int8x8_t duplicate(int8_t value) {
return vdup_n_s8(value); }
1384template <> [[gnu::always_inline]] nce int8x16_t duplicate(int8_t value) {
return vdupq_n_s8(value); }
1385template <> [[gnu::always_inline]] nce int16x4_t duplicate(int16_t value) {
return vdup_n_s16(value); }
1386template <> [[gnu::always_inline]] nce int16x8_t duplicate(int16_t value) {
return vdupq_n_s16(value); }
1387template <> [[gnu::always_inline]] nce int32x2_t duplicate(int32_t value) {
return vdup_n_s32(value); }
1388template <> [[gnu::always_inline]] nce int32x4_t duplicate(int32_t value) {
return vdupq_n_s32(value); }
1389template <> [[gnu::always_inline]] nce int64x1_t duplicate(int64_t value) {
return vdup_n_s64(value); }
1390template <> [[gnu::always_inline]] nce int64x2_t duplicate(int64_t value) {
return vdupq_n_s64(value); }
1391template <> [[gnu::always_inline]] nce uint8x8_t duplicate(uint8_t value) {
return vdup_n_u8(value); }
1392template <> [[gnu::always_inline]] nce uint8x16_t duplicate(uint8_t value) {
return vdupq_n_u8(value); }
1393template <> [[gnu::always_inline]] nce uint16x4_t duplicate(uint16_t value) {
return vdup_n_u16(value); }
1394template <> [[gnu::always_inline]] nce uint16x8_t duplicate(uint16_t value) {
return vdupq_n_u16(value); }
1395template <> [[gnu::always_inline]] nce uint32x2_t duplicate(uint32_t value) {
return vdup_n_u32(value); }
1396template <> [[gnu::always_inline]] nce uint32x4_t duplicate(uint32_t value) {
return vdupq_n_u32(value); }
1397template <> [[gnu::always_inline]] nce uint64x1_t duplicate(uint64_t value) {
return vdup_n_u64(value); }
1398template <> [[gnu::always_inline]] nce uint64x2_t duplicate(uint64_t value) {
return vdupq_n_u64(value); }
1399template <> [[gnu::always_inline]] nce float32x2_t duplicate(float32_t value) {
return vdup_n_f32(value); }
1400template <> [[gnu::always_inline]] nce float32x4_t duplicate(float32_t value) {
return vdupq_n_f32(value); }
1401template <> [[gnu::always_inline]] nce poly8x8_t duplicate(poly8_t value) {
return vdup_n_p8(value); }
1402template <> [[gnu::always_inline]] nce poly8x16_t duplicate(poly8_t value) {
return vdupq_n_p8(value); }
1403template <> [[gnu::always_inline]] nce poly16x4_t duplicate(poly16_t value) {
return vdup_n_p16(value); }
1404template <> [[gnu::always_inline]] nce poly16x8_t duplicate(poly16_t value) {
return vdupq_n_p16(value); }
1405template <
int lane>[[gnu::always_inline]] nce int64x1_t duplicate_lane(int64x1_t a) {
return vdup_lane_s64(a, lane); }
1406template <
int lane>[[gnu::always_inline]] nce int64x2_t duplicate_lane_quad(int64x1_t a) {
return vdupq_lane_s64(a, lane); }
1407[[gnu::always_inline]] nce int64x2_t combine(int64x1_t low, int64x1_t high) {
return vcombine_s64(low, high); }
1408[[gnu::always_inline]] nce int64x1_t get_high(int64x2_t a) {
return vget_high_s64(a); }
1409[[gnu::always_inline]] nce poly8x8_t get_high(poly8x16_t a) {
return vget_high_p8(a); }
1410[[gnu::always_inline]] nce poly16x4_t get_high(poly16x8_t a) {
return vget_high_p16(a); }
1411[[gnu::always_inline]] nce int64x1_t get_low(int64x2_t a) {
return vget_low_s64(a); }
1412[[gnu::always_inline]] nce poly8x8_t get_low(poly8x16_t a) {
return vget_low_p8(a); }
1413[[gnu::always_inline]] nce poly16x4_t get_low(poly16x8_t a) {
return vget_low_p16(a); }
1414template <
int lane>[[gnu::always_inline]] nce int64_t get_lane(int64x1_t v) {
return vget_lane_s64(v, lane); }
1415template <
int lane>[[gnu::always_inline]] nce poly8_t get_lane(poly8x16_t v) {
return vgetq_lane_p8(v, lane); }
1416template <
int lane>[[gnu::always_inline]] nce poly16_t get_lane(poly16x8_t v) {
return vgetq_lane_p16(v, lane); }
1417template <
int n>[[gnu::always_inline]] nce int64x1_t extract(int64x1_t a, int64x1_t b) {
return vext_s64(a, b, n); }
1418template <
int n>[[gnu::always_inline]] nce int64x2_t extract(int64x2_t a, int64x2_t b) {
return vextq_s64(a, b, n); }
1419template <
int n>[[gnu::always_inline]] nce poly8x16_t extract(poly8x16_t a, poly8x16_t b) {
return vextq_p8(a, b, n); }
1420template <
int n>[[gnu::always_inline]] nce poly16x8_t extract(poly16x8_t a, poly16x8_t b) {
return vextq_p16(a, b, n); }
1421[[gnu::always_inline]] nce poly8x16_t reverse_64bit(poly8x16_t a) {
return vrev64q_p8(a); }
1422[[gnu::always_inline]] nce poly16x8_t reverse_64bit(poly16x8_t a) {
return vrev64q_p16(a); }
1423[[gnu::always_inline]] nce poly8x16_t reverse_32bit(poly8x16_t a) {
return vrev32q_p8(a); }
1424[[gnu::always_inline]] nce poly16x8_t reverse_32bit(poly16x8_t a) {
return vrev32q_p16(a); }
1425[[gnu::always_inline]] nce poly8x16_t reverse_16bit(poly8x16_t a) {
return vrev16q_p8(a); }
1426[[gnu::always_inline]] nce poly8x16x2_t zip(poly8x16_t a, poly8x16_t b) {
return vzipq_p8(a, b); }
1427[[gnu::always_inline]] nce poly16x8x2_t zip(poly16x8_t a, poly16x8_t b) {
return vzipq_p16(a, b); }
1428[[gnu::always_inline]] nce poly8x16x2_t unzip(poly8x16_t a, poly8x16_t b) {
return vuzpq_p8(a, b); }
1429[[gnu::always_inline]] nce poly16x8x2_t unzip(poly16x8_t a, poly16x8_t b) {
return vuzpq_p16(a, b); }
1430[[gnu::always_inline]] nce poly8x16x2_t transpose(poly8x16_t a, poly8x16_t b) {
return vtrnq_p8(a, b); }
1431[[gnu::always_inline]] nce poly16x8x2_t transpose(poly16x8_t a, poly16x8_t b) {
return vtrnq_p16(a, b); }
1432template <
int lane>[[gnu::always_inline]] nce uint8x8_t set_lane(uint8_t a, uint8x8_t v) {
return vset_lane_u8(a, v, lane); }
1433template <
int lane>[[gnu::always_inline]] nce uint16x4_t set_lane(uint16_t a, uint16x4_t v) {
return vset_lane_u16(a, v, lane); }
1434template <
int lane>[[gnu::always_inline]] nce uint32x2_t set_lane(uint32_t a, uint32x2_t v) {
return vset_lane_u32(a, v, lane); }
1435template <
int lane>[[gnu::always_inline]] nce uint64x1_t set_lane(uint64_t a, uint64x1_t v) {
return vset_lane_u64(a, v, lane); }
1436template <
int lane>[[gnu::always_inline]] nce int8x8_t set_lane(int8_t a, int8x8_t v) {
return vset_lane_s8(a, v, lane); }
1437template <
int lane>[[gnu::always_inline]] nce int16x4_t set_lane(int16_t a, int16x4_t v) {
return vset_lane_s16(a, v, lane); }
1438template <
int lane>[[gnu::always_inline]] nce int32x2_t set_lane(int32_t a, int32x2_t v) {
return vset_lane_s32(a, v, lane); }
1439template <
int lane>[[gnu::always_inline]] nce int64x1_t set_lane(int64_t a, int64x1_t v) {
return vset_lane_s64(a, v, lane); }
1440template <
int lane>[[gnu::always_inline]] nce poly8x8_t set_lane(poly8_t a, poly8x8_t v) {
return vset_lane_p8(a, v, lane); }
1441template <
int lane>[[gnu::always_inline]] nce poly16x4_t set_lane(poly16_t a, poly16x4_t v) {
return vset_lane_p16(a, v, lane); }
1442template <
int lane>[[gnu::always_inline]] nce float32x2_t set_lane(float32_t a, float32x2_t v) {
return vset_lane_f32(a, v, lane); }
1443template <
int lane>[[gnu::always_inline]] nce poly8x16_t set_lane(poly8_t a, poly8x16_t v) {
return vsetq_lane_p8(a, v, lane); }
1444template <
int lane>[[gnu::always_inline]] nce poly16x8_t set_lane(poly16_t a, poly16x8_t v) {
return vsetq_lane_p16(a, v, lane); }
1445template <> [[gnu::always_inline]]
inline int8x8_t load1(int8_t
const *ptr) {
return vld1_s8(ptr); }
1446template <> [[gnu::always_inline]]
inline int8x16_t load1(int8_t
const *ptr) {
return vld1q_s8(ptr); }
1447template <> [[gnu::always_inline]]
inline int16x4_t load1(int16_t
const *ptr) {
return vld1_s16(ptr); }
1448template <> [[gnu::always_inline]]
inline int16x8_t load1(int16_t
const *ptr) {
return vld1q_s16(ptr); }
1449template <> [[gnu::always_inline]]
inline int32x2_t load1(int32_t
const *ptr) {
return vld1_s32(ptr); }
1450template <> [[gnu::always_inline]]
inline int32x4_t load1(int32_t
const *ptr) {
return vld1q_s32(ptr); }
1451template <> [[gnu::always_inline]]
inline int64x1_t load1(int64_t
const *ptr) {
return vld1_s64(ptr); }
1452template <> [[gnu::always_inline]]
inline int64x2_t load1(int64_t
const *ptr) {
return vld1q_s64(ptr); }
1453template <> [[gnu::always_inline]]
inline uint8x8_t load1(uint8_t
const *ptr) {
return vld1_u8(ptr); }
1454template <> [[gnu::always_inline]]
inline uint8x16_t load1(uint8_t
const *ptr) {
return vld1q_u8(ptr); }
1455template <> [[gnu::always_inline]]
inline uint16x4_t load1(uint16_t
const *ptr) {
return vld1_u16(ptr); }
1456template <> [[gnu::always_inline]]
inline uint16x8_t load1(uint16_t
const *ptr) {
return vld1q_u16(ptr); }
1457template <> [[gnu::always_inline]]
inline uint32x2_t load1(uint32_t
const *ptr) {
return vld1_u32(ptr); }
1458template <> [[gnu::always_inline]]
inline uint32x4_t load1(uint32_t
const *ptr) {
return vld1q_u32(ptr); }
1459template <> [[gnu::always_inline]]
inline uint64x1_t load1(uint64_t
const *ptr) {
return vld1_u64(ptr); }
1460template <> [[gnu::always_inline]]
inline uint64x2_t load1(uint64_t
const *ptr) {
return vld1q_u64(ptr); }
1461template <> [[gnu::always_inline]]
inline float32x2_t load1(float32_t
const *ptr) {
return vld1_f32(ptr); }
1462template <> [[gnu::always_inline]]
inline float32x4_t load1(float32_t
const *ptr) {
return vld1q_f32(ptr); }
1463template <> [[gnu::always_inline]]
inline poly8x8_t load1(poly8_t
const *ptr) {
return vld1_p8(ptr); }
1464template <> [[gnu::always_inline]]
inline poly8x16_t load1(poly8_t
const *ptr) {
return vld1q_p8(ptr); }
1465template <> [[gnu::always_inline]]
inline poly16x4_t load1(poly16_t
const *ptr) {
return vld1_p16(ptr); }
1466template <> [[gnu::always_inline]]
inline poly16x8_t load1(poly16_t
const *ptr) {
return vld1q_p16(ptr); }
1467template <
int lane>[[gnu::always_inline]] nce int8x8_t load1_lane(int8_t
const *ptr, int8x8_t src) {
return vld1_lane_s8(ptr, src, lane); }
1468template <
int lane>[[gnu::always_inline]] nce int8x16_t load1_lane_quad(int8_t
const *ptr, int8x16_t src) {
return vld1q_lane_s8(ptr, src, lane); }
1469template <
int lane>[[gnu::always_inline]] nce int16x4_t load1_lane(int16_t
const *ptr, int16x4_t src) {
return vld1_lane_s16(ptr, src, lane); }
1470template <
int lane>[[gnu::always_inline]] nce int16x8_t load1_lane_quad(int16_t
const *ptr, int16x8_t src) {
return vld1q_lane_s16(ptr, src, lane); }
1471template <
int lane>[[gnu::always_inline]] nce int32x2_t load1_lane(int32_t
const *ptr, int32x2_t src) {
return vld1_lane_s32(ptr, src, lane); }
1472template <
int lane>[[gnu::always_inline]] nce int32x4_t load1_lane_quad(int32_t
const *ptr, int32x4_t src) {
return vld1q_lane_s32(ptr, src, lane); }
1473template <
int lane>[[gnu::always_inline]] nce int64x1_t load1_lane(int64_t
const *ptr, int64x1_t src) {
return vld1_lane_s64(ptr, src, lane); }
1474template <
int lane>[[gnu::always_inline]] nce int64x2_t load1_lane_quad(int64_t
const *ptr, int64x2_t src) {
return vld1q_lane_s64(ptr, src, lane); }
1475template <
int lane>[[gnu::always_inline]] nce uint8x8_t load1_lane(uint8_t
const *ptr, uint8x8_t src) {
return vld1_lane_u8(ptr, src, lane); }
1476template <
int lane>[[gnu::always_inline]] nce uint8x16_t load1_lane_quad(uint8_t
const *ptr, uint8x16_t src) {
return vld1q_lane_u8(ptr, src, lane); }
1477template <
int lane>[[gnu::always_inline]] nce uint16x4_t load1_lane(uint16_t
const *ptr, uint16x4_t src) {
return vld1_lane_u16(ptr, src, lane); }
1478template <
int lane>[[gnu::always_inline]] nce uint16x8_t load1_lane_quad(uint16_t
const *ptr, uint16x8_t src) {
return vld1q_lane_u16(ptr, src, lane); }
1479template <
int lane>[[gnu::always_inline]] nce uint32x2_t load1_lane(uint32_t
const *ptr, uint32x2_t src) {
return vld1_lane_u32(ptr, src, lane); }
1480template <
int lane>[[gnu::always_inline]] nce uint32x4_t load1_lane_quad(uint32_t
const *ptr, uint32x4_t src) {
return vld1q_lane_u32(ptr, src, lane); }
1481template <
int lane>[[gnu::always_inline]] nce uint64x1_t load1_lane(uint64_t
const *ptr, uint64x1_t src) {
return vld1_lane_u64(ptr, src, lane); }
1482template <
int lane>[[gnu::always_inline]] nce uint64x2_t load1_lane_quad(uint64_t
const *ptr, uint64x2_t src) {
return vld1q_lane_u64(ptr, src, lane); }
1483template <
int lane>[[gnu::always_inline]] nce float32x2_t load1_lane(float32_t
const *ptr, float32x2_t src) {
return vld1_lane_f32(ptr, src, lane); }
1484template <
int lane>[[gnu::always_inline]] nce float32x4_t load1_lane_quad(float32_t
const *ptr, float32x4_t src) {
return vld1q_lane_f32(ptr, src, lane); }
1485template <
int lane>[[gnu::always_inline]] nce poly8x8_t load1_lane(poly8_t
const *ptr, poly8x8_t src) {
return vld1_lane_p8(ptr, src, lane); }
1486template <
int lane>[[gnu::always_inline]] nce poly8x16_t load1_lane_quad(poly8_t
const *ptr, poly8x16_t src) {
return vld1q_lane_p8(ptr, src, lane); }
1487template <
int lane>[[gnu::always_inline]] nce poly16x4_t load1_lane(poly16_t
const *ptr, poly16x4_t src) {
return vld1_lane_p16(ptr, src, lane); }
1488template <
int lane>[[gnu::always_inline]] nce poly16x8_t load1_lane_quad(poly16_t
const *ptr, poly16x8_t src) {
return vld1q_lane_p16(ptr, src, lane); }
1489template <> [[gnu::always_inline]]
inline int8x8_t load1_duplicate(int8_t
const *ptr) {
return vld1_dup_s8(ptr); }
1490template <> [[gnu::always_inline]]
inline int8x16_t load1_duplicate(int8_t
const *ptr) {
return vld1q_dup_s8(ptr); }
1491template <> [[gnu::always_inline]]
inline int16x4_t load1_duplicate(int16_t
const *ptr) {
return vld1_dup_s16(ptr); }
1492template <> [[gnu::always_inline]]
inline int16x8_t load1_duplicate(int16_t
const *ptr) {
return vld1q_dup_s16(ptr); }
1493template <> [[gnu::always_inline]]
inline int32x2_t load1_duplicate(int32_t
const *ptr) {
return vld1_dup_s32(ptr); }
1494template <> [[gnu::always_inline]]
inline int32x4_t load1_duplicate(int32_t
const *ptr) {
return vld1q_dup_s32(ptr); }
1495template <> [[gnu::always_inline]]
inline int64x1_t load1_duplicate(int64_t
const *ptr) {
return vld1_dup_s64(ptr); }
1496template <> [[gnu::always_inline]]
inline int64x2_t load1_duplicate(int64_t
const *ptr) {
return vld1q_dup_s64(ptr); }
1497template <> [[gnu::always_inline]]
inline uint8x8_t load1_duplicate(uint8_t
const *ptr) {
return vld1_dup_u8(ptr); }
1498template <> [[gnu::always_inline]]
inline uint8x16_t load1_duplicate(uint8_t
const *ptr) {
return vld1q_dup_u8(ptr); }
1499template <> [[gnu::always_inline]]
inline uint16x4_t load1_duplicate(uint16_t
const *ptr) {
return vld1_dup_u16(ptr); }
1500template <> [[gnu::always_inline]]
inline uint16x8_t load1_duplicate(uint16_t
const *ptr) {
return vld1q_dup_u16(ptr); }
1501template <> [[gnu::always_inline]]
inline uint32x2_t load1_duplicate(uint32_t
const *ptr) {
return vld1_dup_u32(ptr); }
1502template <> [[gnu::always_inline]]
inline uint32x4_t load1_duplicate(uint32_t
const *ptr) {
return vld1q_dup_u32(ptr); }
1503template <> [[gnu::always_inline]]
inline uint64x1_t load1_duplicate(uint64_t
const *ptr) {
return vld1_dup_u64(ptr); }
1504template <> [[gnu::always_inline]]
inline uint64x2_t load1_duplicate(uint64_t
const *ptr) {
return vld1q_dup_u64(ptr); }
1505template <> [[gnu::always_inline]]
inline float32x2_t load1_duplicate(float32_t
const *ptr) {
return vld1_dup_f32(ptr); }
1506template <> [[gnu::always_inline]]
inline float32x4_t load1_duplicate(float32_t
const *ptr) {
return vld1q_dup_f32(ptr); }
1507template <> [[gnu::always_inline]]
inline poly8x8_t load1_duplicate(poly8_t
const *ptr) {
return vld1_dup_p8(ptr); }
1508template <> [[gnu::always_inline]]
inline poly8x16_t load1_duplicate(poly8_t
const *ptr) {
return vld1q_dup_p8(ptr); }
1509template <> [[gnu::always_inline]]
inline poly16x4_t load1_duplicate(poly16_t
const *ptr) {
return vld1_dup_p16(ptr); }
1510template <> [[gnu::always_inline]]
inline poly16x8_t load1_duplicate(poly16_t
const *ptr) {
return vld1q_dup_p16(ptr); }
1511template <> [[gnu::always_inline]]
inline int8x8x2_t load2(int8_t
const *ptr) {
return vld2_s8(ptr); }
1512template <> [[gnu::always_inline]]
inline int8x16x2_t load2(int8_t
const *ptr) {
return vld2q_s8(ptr); }
1513template <> [[gnu::always_inline]]
inline int16x4x2_t load2(int16_t
const *ptr) {
return vld2_s16(ptr); }
1514template <> [[gnu::always_inline]]
inline int16x8x2_t load2(int16_t
const *ptr) {
return vld2q_s16(ptr); }
1515template <> [[gnu::always_inline]]
inline int32x2x2_t load2(int32_t
const *ptr) {
return vld2_s32(ptr); }
1516template <> [[gnu::always_inline]]
inline int32x4x2_t load2(int32_t
const *ptr) {
return vld2q_s32(ptr); }
1517template <> [[gnu::always_inline]]
inline uint8x8x2_t load2(uint8_t
const *ptr) {
return vld2_u8(ptr); }
1518template <> [[gnu::always_inline]]
inline uint8x16x2_t load2(uint8_t
const *ptr) {
return vld2q_u8(ptr); }
1519template <> [[gnu::always_inline]]
inline uint16x4x2_t load2(uint16_t
const *ptr) {
return vld2_u16(ptr); }
1520template <> [[gnu::always_inline]]
inline uint16x8x2_t load2(uint16_t
const *ptr) {
return vld2q_u16(ptr); }
1521template <> [[gnu::always_inline]]
inline uint32x2x2_t load2(uint32_t
const *ptr) {
return vld2_u32(ptr); }
1522template <> [[gnu::always_inline]]
inline uint32x4x2_t load2(uint32_t
const *ptr) {
return vld2q_u32(ptr); }
1523template <> [[gnu::always_inline]]
inline float32x2x2_t load2(float32_t
const *ptr) {
return vld2_f32(ptr); }
1524template <> [[gnu::always_inline]]
inline float32x4x2_t load2(float32_t
const *ptr) {
return vld2q_f32(ptr); }
1525template <> [[gnu::always_inline]]
inline poly8x8x2_t load2(poly8_t
const *ptr) {
return vld2_p8(ptr); }
1526template <> [[gnu::always_inline]]
inline poly8x16x2_t load2(poly8_t
const *ptr) {
return vld2q_p8(ptr); }
1527template <> [[gnu::always_inline]]
inline poly16x4x2_t load2(poly16_t
const *ptr) {
return vld2_p16(ptr); }
1528template <> [[gnu::always_inline]]
inline poly16x8x2_t load2(poly16_t
const *ptr) {
return vld2q_p16(ptr); }
1529template <> [[gnu::always_inline]]
inline int64x1x2_t load2(int64_t
const *ptr) {
return vld2_s64(ptr); }
1530template <> [[gnu::always_inline]]
inline uint64x1x2_t load2(uint64_t
const *ptr) {
return vld2_u64(ptr); }
1531template <> [[gnu::always_inline]]
inline int8x8x3_t load3(int8_t
const *ptr) {
return vld3_s8(ptr); }
1532template <> [[gnu::always_inline]]
inline int8x16x3_t load3(int8_t
const *ptr) {
return vld3q_s8(ptr); }
1533template <> [[gnu::always_inline]]
inline int16x4x3_t load3(int16_t
const *ptr) {
return vld3_s16(ptr); }
1534template <> [[gnu::always_inline]]
inline int16x8x3_t load3(int16_t
const *ptr) {
return vld3q_s16(ptr); }
1535template <> [[gnu::always_inline]]
inline int32x2x3_t load3(int32_t
const *ptr) {
return vld3_s32(ptr); }
1536template <> [[gnu::always_inline]]
inline int32x4x3_t load3(int32_t
const *ptr) {
return vld3q_s32(ptr); }
1537template <> [[gnu::always_inline]]
inline uint8x8x3_t load3(uint8_t
const *ptr) {
return vld3_u8(ptr); }
1538template <> [[gnu::always_inline]]
inline uint8x16x3_t load3(uint8_t
const *ptr) {
return vld3q_u8(ptr); }
1539template <> [[gnu::always_inline]]
inline uint16x4x3_t load3(uint16_t
const *ptr) {
return vld3_u16(ptr); }
1540template <> [[gnu::always_inline]]
inline uint16x8x3_t load3(uint16_t
const *ptr) {
return vld3q_u16(ptr); }
1541template <> [[gnu::always_inline]]
inline uint32x2x3_t load3(uint32_t
const *ptr) {
return vld3_u32(ptr); }
1542template <> [[gnu::always_inline]]
inline uint32x4x3_t load3(uint32_t
const *ptr) {
return vld3q_u32(ptr); }
1543template <> [[gnu::always_inline]]
inline float32x2x3_t load3(float32_t
const *ptr) {
return vld3_f32(ptr); }
1544template <> [[gnu::always_inline]]
inline float32x4x3_t load3(float32_t
const *ptr) {
return vld3q_f32(ptr); }
1545template <> [[gnu::always_inline]]
inline poly8x8x3_t load3(poly8_t
const *ptr) {
return vld3_p8(ptr); }
1546template <> [[gnu::always_inline]]
inline poly8x16x3_t load3(poly8_t
const *ptr) {
return vld3q_p8(ptr); }
1547template <> [[gnu::always_inline]]
inline poly16x4x3_t load3(poly16_t
const *ptr) {
return vld3_p16(ptr); }
1548template <> [[gnu::always_inline]]
inline poly16x8x3_t load3(poly16_t
const *ptr) {
return vld3q_p16(ptr); }
1549template <> [[gnu::always_inline]]
inline int64x1x3_t load3(int64_t
const *ptr) {
return vld3_s64(ptr); }
1550template <> [[gnu::always_inline]]
inline uint64x1x3_t load3(uint64_t
const *ptr) {
return vld3_u64(ptr); }
1551template <> [[gnu::always_inline]]
inline int8x8x4_t load4(int8_t
const *ptr) {
return vld4_s8(ptr); }
1552template <> [[gnu::always_inline]]
inline int8x16x4_t load4(int8_t
const *ptr) {
return vld4q_s8(ptr); }
1553template <> [[gnu::always_inline]]
inline int16x4x4_t load4(int16_t
const *ptr) {
return vld4_s16(ptr); }
1554template <> [[gnu::always_inline]]
inline int16x8x4_t load4(int16_t
const *ptr) {
return vld4q_s16(ptr); }
1555template <> [[gnu::always_inline]]
inline int32x2x4_t load4(int32_t
const *ptr) {
return vld4_s32(ptr); }
1556template <> [[gnu::always_inline]]
inline int32x4x4_t load4(int32_t
const *ptr) {
return vld4q_s32(ptr); }
1557template <> [[gnu::always_inline]]
inline uint8x8x4_t load4(uint8_t
const *ptr) {
return vld4_u8(ptr); }
1558template <> [[gnu::always_inline]]
inline uint8x16x4_t load4(uint8_t
const *ptr) {
return vld4q_u8(ptr); }
1559template <> [[gnu::always_inline]]
inline uint16x4x4_t load4(uint16_t
const *ptr) {
return vld4_u16(ptr); }
1560template <> [[gnu::always_inline]]
inline uint16x8x4_t load4(uint16_t
const *ptr) {
return vld4q_u16(ptr); }
1561template <> [[gnu::always_inline]]
inline uint32x2x4_t load4(uint32_t
const *ptr) {
return vld4_u32(ptr); }
1562template <> [[gnu::always_inline]]
inline uint32x4x4_t load4(uint32_t
const *ptr) {
return vld4q_u32(ptr); }
1563template <> [[gnu::always_inline]]
inline float32x2x4_t load4(float32_t
const *ptr) {
return vld4_f32(ptr); }
1564template <> [[gnu::always_inline]]
inline float32x4x4_t load4(float32_t
const *ptr) {
return vld4q_f32(ptr); }
1565template <> [[gnu::always_inline]]
inline poly8x8x4_t load4(poly8_t
const *ptr) {
return vld4_p8(ptr); }
1566template <> [[gnu::always_inline]]
inline poly8x16x4_t load4(poly8_t
const *ptr) {
return vld4q_p8(ptr); }
1567template <> [[gnu::always_inline]]
inline poly16x4x4_t load4(poly16_t
const *ptr) {
return vld4_p16(ptr); }
1568template <> [[gnu::always_inline]]
inline poly16x8x4_t load4(poly16_t
const *ptr) {
return vld4q_p16(ptr); }
1569template <> [[gnu::always_inline]]
inline int64x1x4_t load4(int64_t
const *ptr) {
return vld4_s64(ptr); }
1570template <> [[gnu::always_inline]]
inline uint64x1x4_t load4(uint64_t
const *ptr) {
return vld4_u64(ptr); }
1571template <> [[gnu::always_inline]]
inline int8x8x2_t load2_duplicate(int8_t
const *ptr) {
return vld2_dup_s8(ptr); }
1572template <> [[gnu::always_inline]]
inline int16x4x2_t load2_duplicate(int16_t
const *ptr) {
return vld2_dup_s16(ptr); }
1573template <> [[gnu::always_inline]]
inline int32x2x2_t load2_duplicate(int32_t
const *ptr) {
return vld2_dup_s32(ptr); }
1574template <> [[gnu::always_inline]]
inline uint8x8x2_t load2_duplicate(uint8_t
const *ptr) {
return vld2_dup_u8(ptr); }
1575template <> [[gnu::always_inline]]
inline uint16x4x2_t load2_duplicate(uint16_t
const *ptr) {
return vld2_dup_u16(ptr); }
1576template <> [[gnu::always_inline]]
inline uint32x2x2_t load2_duplicate(uint32_t
const *ptr) {
return vld2_dup_u32(ptr); }
1577template <> [[gnu::always_inline]]
inline float32x2x2_t load2_duplicate(float32_t
const *ptr) {
return vld2_dup_f32(ptr); }
1578template <> [[gnu::always_inline]]
inline poly8x8x2_t load2_duplicate(poly8_t
const *ptr) {
return vld2_dup_p8(ptr); }
1579template <> [[gnu::always_inline]]
inline poly16x4x2_t load2_duplicate(poly16_t
const *ptr) {
return vld2_dup_p16(ptr); }
1580template <> [[gnu::always_inline]]
inline int64x1x2_t load2_duplicate(int64_t
const *ptr) {
return vld2_dup_s64(ptr); }
1581template <> [[gnu::always_inline]]
inline uint64x1x2_t load2_duplicate(uint64_t
const *ptr) {
return vld2_dup_u64(ptr); }
1582template <> [[gnu::always_inline]]
inline int8x8x3_t load3_duplicate(int8_t
const *ptr) {
return vld3_dup_s8(ptr); }
1583template <> [[gnu::always_inline]]
inline int16x4x3_t load3_duplicate(int16_t
const *ptr) {
return vld3_dup_s16(ptr); }
1584template <> [[gnu::always_inline]]
inline int32x2x3_t load3_duplicate(int32_t
const *ptr) {
return vld3_dup_s32(ptr); }
1585template <> [[gnu::always_inline]]
inline uint8x8x3_t load3_duplicate(uint8_t
const *ptr) {
return vld3_dup_u8(ptr); }
1586template <> [[gnu::always_inline]]
inline uint16x4x3_t load3_duplicate(uint16_t
const *ptr) {
return vld3_dup_u16(ptr); }
1587template <> [[gnu::always_inline]]
inline uint32x2x3_t load3_duplicate(uint32_t
const *ptr) {
return vld3_dup_u32(ptr); }
1588template <> [[gnu::always_inline]]
inline float32x2x3_t load3_duplicate(float32_t
const *ptr) {
return vld3_dup_f32(ptr); }
1589template <> [[gnu::always_inline]]
inline poly8x8x3_t load3_duplicate(poly8_t
const *ptr) {
return vld3_dup_p8(ptr); }
1590template <> [[gnu::always_inline]]
inline poly16x4x3_t load3_duplicate(poly16_t
const *ptr) {
return vld3_dup_p16(ptr); }
1591template <> [[gnu::always_inline]]
inline int64x1x3_t load3_duplicate(int64_t
const *ptr) {
return vld3_dup_s64(ptr); }
1592template <> [[gnu::always_inline]]
inline uint64x1x3_t load3_duplicate(uint64_t
const *ptr) {
return vld3_dup_u64(ptr); }
1593template <> [[gnu::always_inline]]
inline int8x8x4_t load4_duplicate(int8_t
const *ptr) {
return vld4_dup_s8(ptr); }
1594template <> [[gnu::always_inline]]
inline int16x4x4_t load4_duplicate(int16_t
const *ptr) {
return vld4_dup_s16(ptr); }
1595template <> [[gnu::always_inline]]
inline int32x2x4_t load4_duplicate(int32_t
const *ptr) {
return vld4_dup_s32(ptr); }
1596template <> [[gnu::always_inline]]
inline uint8x8x4_t load4_duplicate(uint8_t
const *ptr) {
return vld4_dup_u8(ptr); }
1597template <> [[gnu::always_inline]]
inline uint16x4x4_t load4_duplicate(uint16_t
const *ptr) {
return vld4_dup_u16(ptr); }
1598template <> [[gnu::always_inline]]
inline uint32x2x4_t load4_duplicate(uint32_t
const *ptr) {
return vld4_dup_u32(ptr); }
1599template <> [[gnu::always_inline]]
inline float32x2x4_t load4_duplicate(float32_t
const *ptr) {
return vld4_dup_f32(ptr); }
1600template <> [[gnu::always_inline]]
inline poly8x8x4_t load4_duplicate(poly8_t
const *ptr) {
return vld4_dup_p8(ptr); }
1601template <> [[gnu::always_inline]]
inline poly16x4x4_t load4_duplicate(poly16_t
const *ptr) {
return vld4_dup_p16(ptr); }
1603template <> [[gnu::always_inline]]
inline int8x16x2_t load2_duplicate(int8_t
const *ptr) {
return vld2q_dup_s8(ptr); }
1604template <> [[gnu::always_inline]]
inline int16x8x2_t load2_duplicate(int16_t
const *ptr) {
return vld2q_dup_s16(ptr); }
1605template <> [[gnu::always_inline]]
inline int32x4x2_t load2_duplicate(int32_t
const *ptr) {
return vld2q_dup_s32(ptr); }
1606template <> [[gnu::always_inline]]
inline uint8x16x2_t load2_duplicate(uint8_t
const *ptr) {
return vld2q_dup_u8(ptr); }
1607template <> [[gnu::always_inline]]
inline uint16x8x2_t load2_duplicate(uint16_t
const *ptr) {
return vld2q_dup_u16(ptr); }
1608template <> [[gnu::always_inline]]
inline uint32x4x2_t load2_duplicate(uint32_t
const *ptr) {
return vld2q_dup_u32(ptr); }
1609template <> [[gnu::always_inline]]
inline float32x4x2_t load2_duplicate(float32_t
const *ptr) {
return vld2q_dup_f32(ptr); }
1610template <> [[gnu::always_inline]]
inline poly8x16x2_t load2_duplicate(poly8_t
const *ptr) {
return vld2q_dup_p8(ptr); }
1611template <> [[gnu::always_inline]]
inline poly16x8x2_t load2_duplicate(poly16_t
const *ptr) {
return vld2q_dup_p16(ptr); }
1612template <> [[gnu::always_inline]]
inline int8x16x3_t load3_duplicate(int8_t
const *ptr) {
return vld3q_dup_s8(ptr); }
1613template <> [[gnu::always_inline]]
inline int16x8x3_t load3_duplicate(int16_t
const *ptr) {
return vld3q_dup_s16(ptr); }
1614template <> [[gnu::always_inline]]
inline int32x4x3_t load3_duplicate(int32_t
const *ptr) {
return vld3q_dup_s32(ptr); }
1615template <> [[gnu::always_inline]]
inline uint8x16x3_t load3_duplicate(uint8_t
const *ptr) {
return vld3q_dup_u8(ptr); }
1616template <> [[gnu::always_inline]]
inline uint16x8x3_t load3_duplicate(uint16_t
const *ptr) {
return vld3q_dup_u16(ptr); }
1617template <> [[gnu::always_inline]]
inline uint32x4x3_t load3_duplicate(uint32_t
const *ptr) {
return vld3q_dup_u32(ptr); }
1618template <> [[gnu::always_inline]]
inline float32x4x3_t load3_duplicate(float32_t
const *ptr) {
return vld3q_dup_f32(ptr); }
1619template <> [[gnu::always_inline]]
inline poly8x16x3_t load3_duplicate(poly8_t
const *ptr) {
return vld3q_dup_p8(ptr); }
1620template <> [[gnu::always_inline]]
inline poly16x8x3_t load3_duplicate(poly16_t
const *ptr) {
return vld3q_dup_p16(ptr); }
1621template <> [[gnu::always_inline]]
inline int8x16x4_t load4_duplicate(int8_t
const *ptr) {
return vld4q_dup_s8(ptr); }
1622template <> [[gnu::always_inline]]
inline int16x8x4_t load4_duplicate(int16_t
const *ptr) {
return vld4q_dup_s16(ptr); }
1623template <> [[gnu::always_inline]]
inline int32x4x4_t load4_duplicate(int32_t
const *ptr) {
return vld4q_dup_s32(ptr); }
1624template <> [[gnu::always_inline]]
inline uint8x16x4_t load4_duplicate(uint8_t
const *ptr) {
return vld4q_dup_u8(ptr); }
1625template <> [[gnu::always_inline]]
inline uint16x8x4_t load4_duplicate(uint16_t
const *ptr) {
return vld4q_dup_u16(ptr); }
1626template <> [[gnu::always_inline]]
inline uint32x4x4_t load4_duplicate(uint32_t
const *ptr) {
return vld4q_dup_u32(ptr); }
1627template <> [[gnu::always_inline]]
inline float32x4x4_t load4_duplicate(float32_t
const *ptr) {
return vld4q_dup_f32(ptr); }
1628template <> [[gnu::always_inline]]
inline poly8x16x4_t load4_duplicate(poly8_t
const *ptr) {
return vld4q_dup_p8(ptr); }
1629template <> [[gnu::always_inline]]
inline poly16x8x4_t load4_duplicate(poly16_t
const *ptr) {
return vld4q_dup_p16(ptr); }
1631[[gnu::always_inline]]
inline int64x1x4_t load4_duplicate(int64_t
const *ptr) {
return vld4_dup_s64(ptr); }
1632[[gnu::always_inline]]
inline uint64x1x4_t load4_duplicate(uint64_t
const *ptr) {
return vld4_dup_u64(ptr); }
1633template <
int lane>[[gnu::always_inline]] nce int16x4x2_t load2_lane(int16_t
const *ptr, int16x4x2_t src) {
return vld2_lane_s16(ptr, src, lane); }
1634template <
int lane>[[gnu::always_inline]] nce int16x8x2_t load2_lane_quad(int16_t
const *ptr, int16x8x2_t src) {
return vld2q_lane_s16(ptr, src, lane); }
1635template <
int lane>[[gnu::always_inline]] nce int32x2x2_t load2_lane(int32_t
const *ptr, int32x2x2_t src) {
return vld2_lane_s32(ptr, src, lane); }
1636template <
int lane>[[gnu::always_inline]] nce int32x4x2_t load2_lane_quad(int32_t
const *ptr, int32x4x2_t src) {
return vld2q_lane_s32(ptr, src, lane); }
1637template <
int lane>[[gnu::always_inline]] nce uint16x4x2_t load2_lane(uint16_t
const *ptr, uint16x4x2_t src) {
return vld2_lane_u16(ptr, src, lane); }
1638template <
int lane>[[gnu::always_inline]] nce uint16x8x2_t load2_lane_quad(uint16_t
const *ptr, uint16x8x2_t src) {
return vld2q_lane_u16(ptr, src, lane); }
1639template <
int lane>[[gnu::always_inline]] nce uint32x2x2_t load2_lane(uint32_t
const *ptr, uint32x2x2_t src) {
return vld2_lane_u32(ptr, src, lane); }
1640template <
int lane>[[gnu::always_inline]] nce uint32x4x2_t load2_lane_quad(uint32_t
const *ptr, uint32x4x2_t src) {
return vld2q_lane_u32(ptr, src, lane); }
1641template <
int lane>[[gnu::always_inline]] nce float32x2x2_t load2_lane(float32_t
const *ptr, float32x2x2_t src) {
return vld2_lane_f32(ptr, src, lane); }
1642template <
int lane>[[gnu::always_inline]] nce float32x4x2_t load2_lane_quad(float32_t
const *ptr, float32x4x2_t src) {
return vld2q_lane_f32(ptr, src, lane); }
1643template <
int lane>[[gnu::always_inline]] nce poly16x4x2_t load2_lane(poly16_t
const *ptr, poly16x4x2_t src) {
return vld2_lane_p16(ptr, src, lane); }
1644template <
int lane>[[gnu::always_inline]] nce poly16x8x2_t load2_lane_quad(poly16_t
const *ptr, poly16x8x2_t src) {
return vld2q_lane_p16(ptr, src, lane); }
1645template <
int lane>[[gnu::always_inline]] nce int8x8x2_t load2_lane(int8_t
const *ptr, int8x8x2_t src) {
return vld2_lane_s8(ptr, src, lane); }
1646template <
int lane>[[gnu::always_inline]] nce uint8x8x2_t load2_lane(uint8_t
const *ptr, uint8x8x2_t src) {
return vld2_lane_u8(ptr, src, lane); }
1647template <
int lane>[[gnu::always_inline]] nce poly8x8x2_t load2_lane(poly8_t
const *ptr, poly8x8x2_t src) {
return vld2_lane_p8(ptr, src, lane); }
1648template <
int lane>[[gnu::always_inline]] nce int16x4x3_t load3_lane(int16_t
const *ptr, int16x4x3_t src) {
return vld3_lane_s16(ptr, src, lane); }
1649template <
int lane>[[gnu::always_inline]] nce int16x8x3_t load3_lane_quad(int16_t
const *ptr, int16x8x3_t src) {
return vld3q_lane_s16(ptr, src, lane); }
1650template <
int lane>[[gnu::always_inline]] nce int32x2x3_t load3_lane(int32_t
const *ptr, int32x2x3_t src) {
return vld3_lane_s32(ptr, src, lane); }
1651template <
int lane>[[gnu::always_inline]] nce int32x4x3_t load3_lane_quad(int32_t
const *ptr, int32x4x3_t src) {
return vld3q_lane_s32(ptr, src, lane); }
1652template <
int lane>[[gnu::always_inline]] nce uint16x4x3_t load3_lane(uint16_t
const *ptr, uint16x4x3_t src) {
return vld3_lane_u16(ptr, src, lane); }
1653template <
int lane>[[gnu::always_inline]] nce uint16x8x3_t load3_lane_quad(uint16_t
const *ptr, uint16x8x3_t src) {
return vld3q_lane_u16(ptr, src, lane); }
1654template <
int lane>[[gnu::always_inline]] nce uint32x2x3_t load3_lane(uint32_t
const *ptr, uint32x2x3_t src) {
return vld3_lane_u32(ptr, src, lane); }
1655template <
int lane>[[gnu::always_inline]] nce uint32x4x3_t load3_lane_quad(uint32_t
const *ptr, uint32x4x3_t src) {
return vld3q_lane_u32(ptr, src, lane); }
1656template <
int lane>[[gnu::always_inline]] nce float32x2x3_t load3_lane(float32_t
const *ptr, float32x2x3_t src) {
return vld3_lane_f32(ptr, src, lane); }
1657template <
int lane>[[gnu::always_inline]] nce float32x4x3_t load3_lane_quad(float32_t
const *ptr, float32x4x3_t src) {
return vld3q_lane_f32(ptr, src, lane); }
1658template <
int lane>[[gnu::always_inline]] nce poly16x4x3_t load3_lane(poly16_t
const *ptr, poly16x4x3_t src) {
return vld3_lane_p16(ptr, src, lane); }
1659template <
int lane>[[gnu::always_inline]] nce poly16x8x3_t load3_lane_quad(poly16_t
const *ptr, poly16x8x3_t src) {
return vld3q_lane_p16(ptr, src, lane); }
1660template <
int lane>[[gnu::always_inline]] nce int8x8x3_t load3_lane(int8_t
const *ptr, int8x8x3_t src) {
return vld3_lane_s8(ptr, src, lane); }
1661template <
int lane>[[gnu::always_inline]] nce uint8x8x3_t load3_lane(uint8_t
const *ptr, uint8x8x3_t src) {
return vld3_lane_u8(ptr, src, lane); }
1662template <
int lane>[[gnu::always_inline]] nce poly8x8x3_t load3_lane(poly8_t
const *ptr, poly8x8x3_t src) {
return vld3_lane_p8(ptr, src, lane); }
1663template <
int lane>[[gnu::always_inline]] nce int16x4x4_t load4_lane(int16_t
const *ptr, int16x4x4_t src) {
return vld4_lane_s16(ptr, src, lane); }
1664template <
int lane>[[gnu::always_inline]] nce int16x8x4_t load4_lane_quad(int16_t
const *ptr, int16x8x4_t src) {
return vld4q_lane_s16(ptr, src, lane); }
1665template <
int lane>[[gnu::always_inline]] nce int32x2x4_t load4_lane(int32_t
const *ptr, int32x2x4_t src) {
return vld4_lane_s32(ptr, src, lane); }
1666template <
int lane>[[gnu::always_inline]] nce int32x4x4_t load4_lane_quad(int32_t
const *ptr, int32x4x4_t src) {
return vld4q_lane_s32(ptr, src, lane); }
1667template <
int lane>[[gnu::always_inline]] nce uint16x4x4_t load4_lane(uint16_t
const *ptr, uint16x4x4_t src) {
return vld4_lane_u16(ptr, src, lane); }
1668template <
int lane>[[gnu::always_inline]] nce uint16x8x4_t load4_lane_quad(uint16_t
const *ptr, uint16x8x4_t src) {
return vld4q_lane_u16(ptr, src, lane); }
1669template <
int lane>[[gnu::always_inline]] nce uint32x2x4_t load4_lane(uint32_t
const *ptr, uint32x2x4_t src) {
return vld4_lane_u32(ptr, src, lane); }
1670template <
int lane>[[gnu::always_inline]] nce uint32x4x4_t load4_lane_quad(uint32_t
const *ptr, uint32x4x4_t src) {
return vld4q_lane_u32(ptr, src, lane); }
1671template <
int lane>[[gnu::always_inline]] nce float32x2x4_t load4_lane(float32_t
const *ptr, float32x2x4_t src) {
return vld4_lane_f32(ptr, src, lane); }
1672template <
int lane>[[gnu::always_inline]] nce float32x4x4_t load4_lane_quad(float32_t
const *ptr, float32x4x4_t src) {
return vld4q_lane_f32(ptr, src, lane); }
1673template <
int lane>[[gnu::always_inline]] nce poly16x4x4_t load4_lane(poly16_t
const *ptr, poly16x4x4_t src) {
return vld4_lane_p16(ptr, src, lane); }
1674template <
int lane>[[gnu::always_inline]] nce poly16x8x4_t load4_lane_quad(poly16_t
const *ptr, poly16x8x4_t src) {
return vld4q_lane_p16(ptr, src, lane); }
1675template <
int lane>[[gnu::always_inline]] nce int8x8x4_t load4_lane(int8_t
const *ptr, int8x8x4_t src) {
return vld4_lane_s8(ptr, src, lane); }
1676template <
int lane>[[gnu::always_inline]] nce uint8x8x4_t load4_lane(uint8_t
const *ptr, uint8x8x4_t src) {
return vld4_lane_u8(ptr, src, lane); }
1677template <
int lane>[[gnu::always_inline]] nce poly8x8x4_t load4_lane(poly8_t
const *ptr, poly8x8x4_t src) {
return vld4_lane_p8(ptr, src, lane); }
1679template <
int lane>[[gnu::always_inline]] nce int8x16x2_t load2_lane_quad(int8_t
const *ptr, int8x16x2_t src) {
return vld2q_lane_s8(ptr, src, lane); }
1680template <
int lane>[[gnu::always_inline]] nce uint8x16x2_t load2_lane_quad(uint8_t
const *ptr, uint8x16x2_t src) {
return vld2q_lane_u8(ptr, src, lane); }
1681template <
int lane>[[gnu::always_inline]] nce int8x16x3_t load3_lane_quad(int8_t
const *ptr, int8x16x3_t src) {
return vld3q_lane_s8(ptr, src, lane); }
1682template <
int lane>[[gnu::always_inline]] nce uint8x16x3_t load3_lane_quad(uint8_t
const *ptr, uint8x16x3_t src) {
return vld3q_lane_u8(ptr, src, lane); }
1683template <
int lane>[[gnu::always_inline]] nce int8x16x4_t load4_lane_quad(int8_t
const *ptr, int8x16x4_t src) {
return vld4q_lane_s8(ptr, src, lane); }
1684template <
int lane>[[gnu::always_inline]] nce uint8x16x4_t load4_lane_quad(uint8_t
const *ptr, uint8x16x4_t src) {
return vld4q_lane_u8(ptr, src, lane); }
1686#if defined(__clang__) || (__GNUC__ > 13)
1687template <> [[gnu::always_inline]]
inline int8x8x2_t load1_x2(int8_t
const *ptr) {
return vld1_s8_x2(ptr); }
1688template <> [[gnu::always_inline]]
inline int8x16x2_t load1_x2(int8_t
const *ptr) {
return vld1q_s8_x2(ptr); }
1689template <> [[gnu::always_inline]]
inline int16x4x2_t load1_x2(int16_t
const *ptr) {
return vld1_s16_x2(ptr); }
1690template <> [[gnu::always_inline]]
inline int16x8x2_t load1_x2(int16_t
const *ptr) {
return vld1q_s16_x2(ptr); }
1691template <> [[gnu::always_inline]]
inline int32x2x2_t load1_x2(int32_t
const *ptr) {
return vld1_s32_x2(ptr); }
1692template <> [[gnu::always_inline]]
inline int32x4x2_t load1_x2(int32_t
const *ptr) {
return vld1q_s32_x2(ptr); }
1693template <> [[gnu::always_inline]]
inline uint8x8x2_t load1_x2(uint8_t
const *ptr) {
return vld1_u8_x2(ptr); }
1694template <> [[gnu::always_inline]]
inline uint8x16x2_t load1_x2(uint8_t
const *ptr) {
return vld1q_u8_x2(ptr); }
1695template <> [[gnu::always_inline]]
inline uint16x4x2_t load1_x2(uint16_t
const *ptr) {
return vld1_u16_x2(ptr); }
1696template <> [[gnu::always_inline]]
inline uint16x8x2_t load1_x2(uint16_t
const *ptr) {
return vld1q_u16_x2(ptr); }
1697template <> [[gnu::always_inline]]
inline uint32x2x2_t load1_x2(uint32_t
const *ptr) {
return vld1_u32_x2(ptr); }
1698template <> [[gnu::always_inline]]
inline uint32x4x2_t load1_x2(uint32_t
const *ptr) {
return vld1q_u32_x2(ptr); }
1699template <> [[gnu::always_inline]]
inline float32x2x2_t load1_x2(float32_t
const *ptr) {
return vld1_f32_x2(ptr); }
1700template <> [[gnu::always_inline]]
inline float32x4x2_t load1_x2(float32_t
const *ptr) {
return vld1q_f32_x2(ptr); }
1701template <> [[gnu::always_inline]]
inline poly8x8x2_t load1_x2(poly8_t
const *ptr) {
return vld1_p8_x2(ptr); }
1702template <> [[gnu::always_inline]]
inline poly8x16x2_t load1_x2(poly8_t
const *ptr) {
return vld1q_p8_x2(ptr); }
1703template <> [[gnu::always_inline]]
inline poly16x4x2_t load1_x2(poly16_t
const *ptr) {
return vld1_p16_x2(ptr); }
1704template <> [[gnu::always_inline]]
inline poly16x8x2_t load1_x2(poly16_t
const *ptr) {
return vld1q_p16_x2(ptr); }
1705template <> [[gnu::always_inline]]
inline int64x1x2_t load1_x2(int64_t
const *ptr) {
return vld1_s64_x2(ptr); }
1706template <> [[gnu::always_inline]]
inline uint64x1x2_t load1_x2(uint64_t
const *ptr) {
return vld1_u64_x2(ptr); }
1707template <> [[gnu::always_inline]]
inline int64x2x2_t load1_x2(int64_t
const *ptr) {
return vld1q_s64_x2(ptr); }
1708template <> [[gnu::always_inline]]
inline uint64x2x2_t load1_x2(uint64_t
const *ptr) {
return vld1q_u64_x2(ptr); }
1709template <> [[gnu::always_inline]]
inline int8x8x3_t load1_x3(int8_t
const *ptr) {
return vld1_s8_x3(ptr); }
1710template <> [[gnu::always_inline]]
inline int16x4x3_t load1_x3(int16_t
const *ptr) {
return vld1_s16_x3(ptr); }
1712#if defined(__GNUC__) && __GNUC__ == 14 && defined(__ARM_ARCH) && (__ARM_ARCH < 8)
1713template <> [[gnu::always_inline]]
inline int8x16x3_t load1_x3(int8_t
const *ptr) {
return vld1q_s8_x3((
const uint8_t*)ptr); }
1714template <> [[gnu::always_inline]]
inline int16x8x3_t load1_x3(int16_t
const *ptr) {
return vld1q_s16_x3((
const uint16_t*)ptr); }
1716template <> [[gnu::always_inline]]
inline int8x16x3_t load1_x3(int8_t
const *ptr) {
return vld1q_s8_x3(ptr); }
1717template <> [[gnu::always_inline]]
inline int16x8x3_t load1_x3(int16_t
const *ptr) {
return vld1q_s16_x3(ptr); }
1720template <> [[gnu::always_inline]]
inline int32x2x3_t load1_x3(int32_t
const *ptr) {
return vld1_s32_x3(ptr); }
1721template <> [[gnu::always_inline]]
inline int32x4x3_t load1_x3(int32_t
const *ptr) {
return vld1q_s32_x3(ptr); }
1722template <> [[gnu::always_inline]]
inline uint8x8x3_t load1_x3(uint8_t
const *ptr) {
return vld1_u8_x3(ptr); }
1723template <> [[gnu::always_inline]]
inline uint8x16x3_t load1_x3(uint8_t
const *ptr) {
return vld1q_u8_x3(ptr); }
1724template <> [[gnu::always_inline]]
inline uint16x4x3_t load1_x3(uint16_t
const *ptr) {
return vld1_u16_x3(ptr); }
1725template <> [[gnu::always_inline]]
inline uint16x8x3_t load1_x3(uint16_t
const *ptr) {
return vld1q_u16_x3(ptr); }
1726template <> [[gnu::always_inline]]
inline uint32x2x3_t load1_x3(uint32_t
const *ptr) {
return vld1_u32_x3(ptr); }
1727template <> [[gnu::always_inline]]
inline uint32x4x3_t load1_x3(uint32_t
const *ptr) {
return vld1q_u32_x3(ptr); }
1728template <> [[gnu::always_inline]]
inline float32x2x3_t load1_x3(float32_t
const *ptr) {
return vld1_f32_x3(ptr); }
1729template <> [[gnu::always_inline]]
inline float32x4x3_t load1_x3(float32_t
const *ptr) {
return vld1q_f32_x3(ptr); }
1730template <> [[gnu::always_inline]]
inline poly8x8x3_t load1_x3(poly8_t
const *ptr) {
return vld1_p8_x3(ptr); }
1731template <> [[gnu::always_inline]]
inline poly8x16x3_t load1_x3(poly8_t
const *ptr) {
return vld1q_p8_x3(ptr); }
1732template <> [[gnu::always_inline]]
inline poly16x4x3_t load1_x3(poly16_t
const *ptr) {
return vld1_p16_x3(ptr); }
1733template <> [[gnu::always_inline]]
inline poly16x8x3_t load1_x3(poly16_t
const *ptr) {
return vld1q_p16_x3(ptr); }
1734template <> [[gnu::always_inline]]
inline int64x1x3_t load1_x3(int64_t
const *ptr) {
return vld1_s64_x3(ptr); }
1735template <> [[gnu::always_inline]]
inline uint64x1x3_t load1_x3(uint64_t
const *ptr) {
return vld1_u64_x3(ptr); }
1736template <> [[gnu::always_inline]]
inline int64x2x3_t load1_x3(int64_t
const *ptr) {
return vld1q_s64_x3(ptr); }
1737template <> [[gnu::always_inline]]
inline uint64x2x3_t load1_x3(uint64_t
const *ptr) {
return vld1q_u64_x3(ptr); }
1738template <> [[gnu::always_inline]]
inline int8x8x4_t load1_x4(int8_t
const *ptr) {
return vld1_s8_x4(ptr); }
1740#if defined(__GNUC__) && __GNUC__ == 14 && defined(__ARM_ARCH) && (__ARM_ARCH < 8)
1741template <> [[gnu::always_inline]]
inline int8x16x4_t load1_x4(int8_t
const *ptr) {
return vld1q_s8_x4((
const uint8_t*)ptr); }
1742template <> [[gnu::always_inline]]
inline int16x8x4_t load1_x4(int16_t
const *ptr) {
return vld1q_s16_x4((
const uint16_t*)ptr); }
1744template <> [[gnu::always_inline]]
inline int8x16x4_t load1_x4(int8_t
const *ptr) {
return vld1q_s8_x4(ptr); }
1745template <> [[gnu::always_inline]]
inline int16x8x4_t load1_x4(int16_t
const *ptr) {
return vld1q_s16_x4(ptr); }
1748template <> [[gnu::always_inline]]
inline int16x4x4_t load1_x4(int16_t
const *ptr) {
return vld1_s16_x4(ptr); }
1749template <> [[gnu::always_inline]]
inline int32x2x4_t load1_x4(int32_t
const *ptr) {
return vld1_s32_x4(ptr); }
1750template <> [[gnu::always_inline]]
inline int32x4x4_t load1_x4(int32_t
const *ptr) {
return vld1q_s32_x4(ptr); }
1751template <> [[gnu::always_inline]]
inline uint8x8x4_t load1_x4(uint8_t
const *ptr) {
return vld1_u8_x4(ptr); }
1752template <> [[gnu::always_inline]]
inline uint8x16x4_t load1_x4(uint8_t
const *ptr) {
return vld1q_u8_x4(ptr); }
1753template <> [[gnu::always_inline]]
inline uint16x4x4_t load1_x4(uint16_t
const *ptr) {
return vld1_u16_x4(ptr); }
1754template <> [[gnu::always_inline]]
inline uint16x8x4_t load1_x4(uint16_t
const *ptr) {
return vld1q_u16_x4(ptr); }
1755template <> [[gnu::always_inline]]
inline uint32x2x4_t load1_x4(uint32_t
const *ptr) {
return vld1_u32_x4(ptr); }
1756template <> [[gnu::always_inline]]
inline uint32x4x4_t load1_x4(uint32_t
const *ptr) {
return vld1q_u32_x4(ptr); }
1757template <> [[gnu::always_inline]]
inline float32x2x4_t load1_x4(float32_t
const *ptr) {
return vld1_f32_x4(ptr); }
1758template <> [[gnu::always_inline]]
inline float32x4x4_t load1_x4(float32_t
const *ptr) {
return vld1q_f32_x4(ptr); }
1759template <> [[gnu::always_inline]]
inline poly8x8x4_t load1_x4(poly8_t
const *ptr) {
return vld1_p8_x4(ptr); }
1760template <> [[gnu::always_inline]]
inline poly8x16x4_t load1_x4(poly8_t
const *ptr) {
return vld1q_p8_x4(ptr); }
1761template <> [[gnu::always_inline]]
inline poly16x4x4_t load1_x4(poly16_t
const *ptr) {
return vld1_p16_x4(ptr); }
1762template <> [[gnu::always_inline]]
inline poly16x8x4_t load1_x4(poly16_t
const *ptr) {
return vld1q_p16_x4(ptr); }
1763template <> [[gnu::always_inline]]
inline int64x1x4_t load1_x4(int64_t
const *ptr) {
return vld1_s64_x4(ptr); }
1764template <> [[gnu::always_inline]]
inline uint64x1x4_t load1_x4(uint64_t
const *ptr) {
return vld1_u64_x4(ptr); }
1765template <> [[gnu::always_inline]]
inline int64x2x4_t load1_x4(int64_t
const *ptr) {
return vld1q_s64_x4(ptr); }
1766template <> [[gnu::always_inline]]
inline uint64x2x4_t load1_x4(uint64_t
const *ptr) {
return vld1q_u64_x4(ptr); }
1768[[gnu::always_inline]]
inline void store1(int8_t *ptr, int8x8_t val) {
return vst1_s8(ptr, val); }
1769[[gnu::always_inline]]
inline void store1(int16_t *ptr, int16x4_t val) {
return vst1_s16(ptr, val); }
1770[[gnu::always_inline]]
inline void store1(int32_t *ptr, int32x2_t val) {
return vst1_s32(ptr, val); }
1771[[gnu::always_inline]]
inline void store1(int64_t *ptr, int64x1_t val) {
return vst1_s64(ptr, val); }
1772[[gnu::always_inline]]
inline void store1(int64_t *ptr, int64x2_t val) {
return vst1q_s64(ptr, val); }
1773[[gnu::always_inline]]
inline void store1(uint8_t *ptr, uint8x8_t val) {
return vst1_u8(ptr, val); }
1774[[gnu::always_inline]]
inline void store1(uint16_t *ptr, uint16x4_t val) {
return vst1_u16(ptr, val); }
1775[[gnu::always_inline]]
inline void store1(uint32_t *ptr, uint32x2_t val) {
return vst1_u32(ptr, val); }
1776[[gnu::always_inline]]
inline void store1(uint64_t *ptr, uint64x1_t val) {
return vst1_u64(ptr, val); }
1777[[gnu::always_inline]]
inline void store1(uint64_t *ptr, uint64x2_t val) {
return vst1q_u64(ptr, val); }
1778[[gnu::always_inline]]
inline void store1(float32_t *ptr, float32x2_t val) {
return vst1_f32(ptr, val); }
1779[[gnu::always_inline]]
inline void store1(poly8_t *ptr, poly8x8_t val) {
return vst1_p8(ptr, val); }
1780[[gnu::always_inline]]
inline void store1(poly8_t *ptr, poly8x16_t val) {
return vst1q_p8(ptr, val); }
1781[[gnu::always_inline]]
inline void store1(poly16_t *ptr, poly16x4_t val) {
return vst1_p16(ptr, val); }
1782[[gnu::always_inline]]
inline void store1(poly16_t *ptr, poly16x8_t val) {
return vst1q_p16(ptr, val); }
1783template <
int lane>[[gnu::always_inline]] nce
void store1_lane(int8_t *ptr, int8x8_t val) {
return vst1_lane_s8(ptr, val, lane); }
1784template <
int lane>[[gnu::always_inline]] nce
void store1_lane(int8_t *ptr, int8x16_t val) {
return vst1q_lane_s8(ptr, val, lane); }
1785template <
int lane>[[gnu::always_inline]] nce
void store1_lane(int16_t *ptr, int16x4_t val) {
return vst1_lane_s16(ptr, val, lane); }
1786template <
int lane>[[gnu::always_inline]] nce
void store1_lane(int16_t *ptr, int16x8_t val) {
return vst1q_lane_s16(ptr, val, lane); }
1787template <
int lane>[[gnu::always_inline]] nce
void store1_lane(int32_t *ptr, int32x2_t val) {
return vst1_lane_s32(ptr, val, lane); }
1788template <
int lane>[[gnu::always_inline]] nce
void store1_lane(int32_t *ptr, int32x4_t val) {
return vst1q_lane_s32(ptr, val, lane); }
1789template <
int lane>[[gnu::always_inline]] nce
void store1_lane(int64_t *ptr, int64x1_t val) {
return vst1_lane_s64(ptr, val, lane); }
1790template <
int lane>[[gnu::always_inline]] nce
void store1_lane(int64_t *ptr, int64x2_t val) {
return vst1q_lane_s64(ptr, val, lane); }
1791template <
int lane>[[gnu::always_inline]] nce
void store1_lane(uint8_t *ptr, uint8x8_t val) {
return vst1_lane_u8(ptr, val, lane); }
1792template <
int lane>[[gnu::always_inline]] nce
void store1_lane(uint8_t *ptr, uint8x16_t val) {
return vst1q_lane_u8(ptr, val, lane); }
1793template <
int lane>[[gnu::always_inline]] nce
void store1_lane(uint16_t *ptr, uint16x4_t val) {
return vst1_lane_u16(ptr, val, lane); }
1794template <
int lane>[[gnu::always_inline]] nce
void store1_lane(uint16_t *ptr, uint16x8_t val) {
return vst1q_lane_u16(ptr, val, lane); }
1795template <
int lane>[[gnu::always_inline]] nce
void store1_lane(uint32_t *ptr, uint32x2_t val) {
return vst1_lane_u32(ptr, val, lane); }
1796template <
int lane>[[gnu::always_inline]] nce
void store1_lane(uint32_t *ptr, uint32x4_t val) {
return vst1q_lane_u32(ptr, val, lane); }
1797template <
int lane>[[gnu::always_inline]] nce
void store1_lane(uint64_t *ptr, uint64x1_t val) {
return vst1_lane_u64(ptr, val, lane); }
1798template <
int lane>[[gnu::always_inline]] nce
void store1_lane(uint64_t *ptr, uint64x2_t val) {
return vst1q_lane_u64(ptr, val, lane); }
1799template <
int lane>[[gnu::always_inline]] nce
void store1_lane(float32_t *ptr, float32x2_t val) {
return vst1_lane_f32(ptr, val, lane); }
1800template <
int lane>[[gnu::always_inline]] nce
void store1_lane(float32_t *ptr, float32x4_t val) {
return vst1q_lane_f32(ptr, val, lane); }
1801template <
int lane>[[gnu::always_inline]] nce
void store1_lane(poly8_t *ptr, poly8x8_t val) {
return vst1_lane_p8(ptr, val, lane); }
1802template <
int lane>[[gnu::always_inline]] nce
void store1_lane(poly8_t *ptr, poly8x16_t val) {
return vst1q_lane_p8(ptr, val, lane); }
1803template <
int lane>[[gnu::always_inline]] nce
void store1_lane(poly16_t *ptr, poly16x4_t val) {
return vst1_lane_p16(ptr, val, lane); }
1804template <
int lane>[[gnu::always_inline]] nce
void store1_lane(poly16_t *ptr, poly16x8_t val) {
return vst1q_lane_p16(ptr, val, lane); }
1805[[gnu::always_inline]]
inline void store2(int8_t *ptr, int8x8x2_t val) {
return vst2_s8(ptr, val); }
1806[[gnu::always_inline]]
inline void store2(int8_t *ptr, int8x16x2_t val) {
return vst2q_s8(ptr, val); }
1807[[gnu::always_inline]]
inline void store2(int16_t *ptr, int16x4x2_t val) {
return vst2_s16(ptr, val); }
1808[[gnu::always_inline]]
inline void store2(int16_t *ptr, int16x8x2_t val) {
return vst2q_s16(ptr, val); }
1809[[gnu::always_inline]]
inline void store2(int32_t *ptr, int32x2x2_t val) {
return vst2_s32(ptr, val); }
1810[[gnu::always_inline]]
inline void store2(int32_t *ptr, int32x4x2_t val) {
return vst2q_s32(ptr, val); }
1811[[gnu::always_inline]]
inline void store2(uint8_t *ptr, uint8x8x2_t val) {
return vst2_u8(ptr, val); }
1812[[gnu::always_inline]]
inline void store2(uint8_t *ptr, uint8x16x2_t val) {
return vst2q_u8(ptr, val); }
1813[[gnu::always_inline]]
inline void store2(uint16_t *ptr, uint16x4x2_t val) {
return vst2_u16(ptr, val); }
1814[[gnu::always_inline]]
inline void store2(uint16_t *ptr, uint16x8x2_t val) {
return vst2q_u16(ptr, val); }
1815[[gnu::always_inline]]
inline void store2(uint32_t *ptr, uint32x2x2_t val) {
return vst2_u32(ptr, val); }
1816[[gnu::always_inline]]
inline void store2(uint32_t *ptr, uint32x4x2_t val) {
return vst2q_u32(ptr, val); }
1817[[gnu::always_inline]]
inline void store2(float32_t *ptr, float32x2x2_t val) {
return vst2_f32(ptr, val); }
1818[[gnu::always_inline]]
inline void store2(float32_t *ptr, float32x4x2_t val) {
return vst2q_f32(ptr, val); }
1819[[gnu::always_inline]]
inline void store2(poly8_t *ptr, poly8x8x2_t val) {
return vst2_p8(ptr, val); }
1820[[gnu::always_inline]]
inline void store2(poly8_t *ptr, poly8x16x2_t val) {
return vst2q_p8(ptr, val); }
1821[[gnu::always_inline]]
inline void store2(poly16_t *ptr, poly16x4x2_t val) {
return vst2_p16(ptr, val); }
1822[[gnu::always_inline]]
inline void store2(poly16_t *ptr, poly16x8x2_t val) {
return vst2q_p16(ptr, val); }
1823[[gnu::always_inline]]
inline void store2(int64_t *ptr, int64x1x2_t val) {
return vst2_s64(ptr, val); }
1824[[gnu::always_inline]]
inline void store2(uint64_t *ptr, uint64x1x2_t val) {
return vst2_u64(ptr, val); }
1825[[gnu::always_inline]]
inline void store3(int8_t *ptr, int8x8x3_t val) {
return vst3_s8(ptr, val); }
1826[[gnu::always_inline]]
inline void store3(int8_t *ptr, int8x16x3_t val) {
return vst3q_s8(ptr, val); }
1827[[gnu::always_inline]]
inline void store3(int16_t *ptr, int16x4x3_t val) {
return vst3_s16(ptr, val); }
1828[[gnu::always_inline]]
inline void store3(int16_t *ptr, int16x8x3_t val) {
return vst3q_s16(ptr, val); }
1829[[gnu::always_inline]]
inline void store3(int32_t *ptr, int32x2x3_t val) {
return vst3_s32(ptr, val); }
1830[[gnu::always_inline]]
inline void store3(int32_t *ptr, int32x4x3_t val) {
return vst3q_s32(ptr, val); }
1831[[gnu::always_inline]]
inline void store3(uint8_t *ptr, uint8x8x3_t val) {
return vst3_u8(ptr, val); }
1832[[gnu::always_inline]]
inline void store3(uint8_t *ptr, uint8x16x3_t val) {
return vst3q_u8(ptr, val); }
1833[[gnu::always_inline]]
inline void store3(uint16_t *ptr, uint16x4x3_t val) {
return vst3_u16(ptr, val); }
1834[[gnu::always_inline]]
inline void store3(uint16_t *ptr, uint16x8x3_t val) {
return vst3q_u16(ptr, val); }
1835[[gnu::always_inline]]
inline void store3(uint32_t *ptr, uint32x2x3_t val) {
return vst3_u32(ptr, val); }
1836[[gnu::always_inline]]
inline void store3(uint32_t *ptr, uint32x4x3_t val) {
return vst3q_u32(ptr, val); }
1837[[gnu::always_inline]]
inline void store3(float32_t *ptr, float32x2x3_t val) {
return vst3_f32(ptr, val); }
1838[[gnu::always_inline]]
inline void store3(float32_t *ptr, float32x4x3_t val) {
return vst3q_f32(ptr, val); }
1839[[gnu::always_inline]]
inline void store3(poly8_t *ptr, poly8x8x3_t val) {
return vst3_p8(ptr, val); }
1840[[gnu::always_inline]]
inline void store3(poly8_t *ptr, poly8x16x3_t val) {
return vst3q_p8(ptr, val); }
1841[[gnu::always_inline]]
inline void store3(poly16_t *ptr, poly16x4x3_t val) {
return vst3_p16(ptr, val); }
1842[[gnu::always_inline]]
inline void store3(poly16_t *ptr, poly16x8x3_t val) {
return vst3q_p16(ptr, val); }
1843[[gnu::always_inline]]
inline void store3(int64_t *ptr, int64x1x3_t val) {
return vst3_s64(ptr, val); }
1844[[gnu::always_inline]]
inline void store3(uint64_t *ptr, uint64x1x3_t val) {
return vst3_u64(ptr, val); }
1845[[gnu::always_inline]]
inline void store4(int8_t *ptr, int8x8x4_t val) {
return vst4_s8(ptr, val); }
1846[[gnu::always_inline]]
inline void store4(int8_t *ptr, int8x16x4_t val) {
return vst4q_s8(ptr, val); }
1847[[gnu::always_inline]]
inline void store4(int16_t *ptr, int16x4x4_t val) {
return vst4_s16(ptr, val); }
1848[[gnu::always_inline]]
inline void store4(int16_t *ptr, int16x8x4_t val) {
return vst4q_s16(ptr, val); }
1849[[gnu::always_inline]]
inline void store4(int32_t *ptr, int32x2x4_t val) {
return vst4_s32(ptr, val); }
1850[[gnu::always_inline]]
inline void store4(int32_t *ptr, int32x4x4_t val) {
return vst4q_s32(ptr, val); }
1851[[gnu::always_inline]]
inline void store4(uint8_t *ptr, uint8x8x4_t val) {
return vst4_u8(ptr, val); }
1852[[gnu::always_inline]]
inline void store4(uint8_t *ptr, uint8x16x4_t val) {
return vst4q_u8(ptr, val); }
1853[[gnu::always_inline]]
inline void store4(uint16_t *ptr, uint16x4x4_t val) {
return vst4_u16(ptr, val); }
1854[[gnu::always_inline]]
inline void store4(uint16_t *ptr, uint16x8x4_t val) {
return vst4q_u16(ptr, val); }
1855[[gnu::always_inline]]
inline void store4(uint32_t *ptr, uint32x2x4_t val) {
return vst4_u32(ptr, val); }
1856[[gnu::always_inline]]
inline void store4(uint32_t *ptr, uint32x4x4_t val) {
return vst4q_u32(ptr, val); }
1857[[gnu::always_inline]]
inline void store4(float32_t *ptr, float32x2x4_t val) {
return vst4_f32(ptr, val); }
1858[[gnu::always_inline]]
inline void store4(float32_t *ptr, float32x4x4_t val) {
return vst4q_f32(ptr, val); }
1859[[gnu::always_inline]]
inline void store4(poly8_t *ptr, poly8x8x4_t val) {
return vst4_p8(ptr, val); }
1860[[gnu::always_inline]]
inline void store4(poly8_t *ptr, poly8x16x4_t val) {
return vst4q_p8(ptr, val); }
1861[[gnu::always_inline]]
inline void store4(poly16_t *ptr, poly16x4x4_t val) {
return vst4_p16(ptr, val); }
1862[[gnu::always_inline]]
inline void store4(poly16_t *ptr, poly16x8x4_t val) {
return vst4q_p16(ptr, val); }
1863[[gnu::always_inline]]
inline void store4(int64_t *ptr, int64x1x4_t val) {
return vst4_s64(ptr, val); }
1864[[gnu::always_inline]]
inline void store4(uint64_t *ptr, uint64x1x4_t val) {
return vst4_u64(ptr, val); }
1865template <
int lane>[[gnu::always_inline]] nce
void store2_lane(int8_t *ptr, int8x8x2_t val) {
return vst2_lane_s8(ptr, val, lane); }
1866template <
int lane>[[gnu::always_inline]] nce
void store2_lane(uint8_t *ptr, uint8x8x2_t val) {
return vst2_lane_u8(ptr, val, lane); }
1867template <
int lane>[[gnu::always_inline]] nce
void store2_lane(poly8_t *ptr, poly8x8x2_t val) {
return vst2_lane_p8(ptr, val, lane); }
1868template <
int lane>[[gnu::always_inline]] nce
void store3_lane(int8_t *ptr, int8x8x3_t val) {
return vst3_lane_s8(ptr, val, lane); }
1869template <
int lane>[[gnu::always_inline]] nce
void store3_lane(uint8_t *ptr, uint8x8x3_t val) {
return vst3_lane_u8(ptr, val, lane); }
1870template <
int lane>[[gnu::always_inline]] nce
void store3_lane(poly8_t *ptr, poly8x8x3_t val) {
return vst3_lane_p8(ptr, val, lane); }
1871template <
int lane>[[gnu::always_inline]] nce
void store4_lane(int8_t *ptr, int8x8x4_t val) {
return vst4_lane_s8(ptr, val, lane); }
1872template <
int lane>[[gnu::always_inline]] nce
void store4_lane(uint8_t *ptr, uint8x8x4_t val) {
return vst4_lane_u8(ptr, val, lane); }
1873template <
int lane>[[gnu::always_inline]] nce
void store4_lane(poly8_t *ptr, poly8x8x4_t val) {
return vst4_lane_p8(ptr, val, lane); }
1874template <
int lane>[[gnu::always_inline]] nce
void store2_lane(int16_t *ptr, int16x4x2_t val) {
return vst2_lane_s16(ptr, val, lane); }
1875template <
int lane>[[gnu::always_inline]] nce
void store2_lane(int16_t *ptr, int16x8x2_t val) {
return vst2q_lane_s16(ptr, val, lane); }
1876template <
int lane>[[gnu::always_inline]] nce
void store2_lane(int32_t *ptr, int32x2x2_t val) {
return vst2_lane_s32(ptr, val, lane); }
1877template <
int lane>[[gnu::always_inline]] nce
void store2_lane(int32_t *ptr, int32x4x2_t val) {
return vst2q_lane_s32(ptr, val, lane); }
1878template <
int lane>[[gnu::always_inline]] nce
void store2_lane(uint16_t *ptr, uint16x4x2_t val) {
return vst2_lane_u16(ptr, val, lane); }
1879template <
int lane>[[gnu::always_inline]] nce
void store2_lane(uint16_t *ptr, uint16x8x2_t val) {
return vst2q_lane_u16(ptr, val, lane); }
1880template <
int lane>[[gnu::always_inline]] nce
void store2_lane(uint32_t *ptr, uint32x2x2_t val) {
return vst2_lane_u32(ptr, val, lane); }
1881template <
int lane>[[gnu::always_inline]] nce
void store2_lane(uint32_t *ptr, uint32x4x2_t val) {
return vst2q_lane_u32(ptr, val, lane); }
1882template <
int lane>[[gnu::always_inline]] nce
void store2_lane(float32_t *ptr, float32x2x2_t val) {
return vst2_lane_f32(ptr, val, lane); }
1883template <
int lane>[[gnu::always_inline]] nce
void store2_lane(float32_t *ptr, float32x4x2_t val) {
return vst2q_lane_f32(ptr, val, lane); }
1884template <
int lane>[[gnu::always_inline]] nce
void store2_lane(poly16_t *ptr, poly16x4x2_t val) {
return vst2_lane_p16(ptr, val, lane); }
1885template <
int lane>[[gnu::always_inline]] nce
void store2_lane(poly16_t *ptr, poly16x8x2_t val) {
return vst2q_lane_p16(ptr, val, lane); }
1886template <
int lane>[[gnu::always_inline]] nce
void store3_lane(int16_t *ptr, int16x4x3_t val) {
return vst3_lane_s16(ptr, val, lane); }
1887template <
int lane>[[gnu::always_inline]] nce
void store3_lane(int16_t *ptr, int16x8x3_t val) {
return vst3q_lane_s16(ptr, val, lane); }
1888template <
int lane>[[gnu::always_inline]] nce
void store3_lane(int32_t *ptr, int32x2x3_t val) {
return vst3_lane_s32(ptr, val, lane); }
1889template <
int lane>[[gnu::always_inline]] nce
void store3_lane(int32_t *ptr, int32x4x3_t val) {
return vst3q_lane_s32(ptr, val, lane); }
1890template <
int lane>[[gnu::always_inline]] nce
void store3_lane(uint16_t *ptr, uint16x4x3_t val) {
return vst3_lane_u16(ptr, val, lane); }
1891template <
int lane>[[gnu::always_inline]] nce
void store3_lane(uint16_t *ptr, uint16x8x3_t val) {
return vst3q_lane_u16(ptr, val, lane); }
1892template <
int lane>[[gnu::always_inline]] nce
void store3_lane(uint32_t *ptr, uint32x2x3_t val) {
return vst3_lane_u32(ptr, val, lane); }
1893template <
int lane>[[gnu::always_inline]] nce
void store3_lane(uint32_t *ptr, uint32x4x3_t val) {
return vst3q_lane_u32(ptr, val, lane); }
1894template <
int lane>[[gnu::always_inline]] nce
void store3_lane(float32_t *ptr, float32x2x3_t val) {
return vst3_lane_f32(ptr, val, lane); }
1895template <
int lane>[[gnu::always_inline]] nce
void store3_lane(float32_t *ptr, float32x4x3_t val) {
return vst3q_lane_f32(ptr, val, lane); }
1896template <
int lane>[[gnu::always_inline]] nce
void store3_lane(poly16_t *ptr, poly16x4x3_t val) {
return vst3_lane_p16(ptr, val, lane); }
1897template <
int lane>[[gnu::always_inline]] nce
void store3_lane(poly16_t *ptr, poly16x8x3_t val) {
return vst3q_lane_p16(ptr, val, lane); }
1901template <
int lane>[[gnu::always_inline]] nce
void store4_lane(int16_t *ptr, int16x4x4_t val) {
return vst4_lane_s16(ptr, val, lane); }
1902template <
int lane>[[gnu::always_inline]] nce
void store4_lane(int16_t *ptr, int16x8x4_t val) {
return vst4q_lane_s16(ptr, val, lane); }
1903template <
int lane>[[gnu::always_inline]] nce
void store4_lane(int32_t *ptr, int32x2x4_t val) {
return vst4_lane_s32(ptr, val, lane); }
1904template <
int lane>[[gnu::always_inline]] nce
void store4_lane(int32_t *ptr, int32x4x4_t val) {
return vst4q_lane_s32(ptr, val, lane); }
1905template <
int lane>[[gnu::always_inline]] nce
void store4_lane(uint16_t *ptr, uint16x4x4_t val) {
return vst4_lane_u16(ptr, val, lane); }
1906template <
int lane>[[gnu::always_inline]] nce
void store4_lane(uint16_t *ptr, uint16x8x4_t val) {
return vst4q_lane_u16(ptr, val, lane); }
1907template <
int lane>[[gnu::always_inline]] nce
void store4_lane(uint32_t *ptr, uint32x2x4_t val) {
return vst4_lane_u32(ptr, val, lane); }
1908template <
int lane>[[gnu::always_inline]] nce
void store4_lane(uint32_t *ptr, uint32x4x4_t val) {
return vst4q_lane_u32(ptr, val, lane); }
1909template <
int lane>[[gnu::always_inline]] nce
void store4_lane(float32_t *ptr, float32x2x4_t val) {
return vst4_lane_f32(ptr, val, lane); }
1910template <
int lane>[[gnu::always_inline]] nce
void store4_lane(float32_t *ptr, float32x4x4_t val) {
return vst4q_lane_f32(ptr, val, lane); }
1911template <
int lane>[[gnu::always_inline]] nce
void store4_lane(poly16_t *ptr, poly16x4x4_t val) {
return vst4_lane_p16(ptr, val, lane); }
1912template <
int lane>[[gnu::always_inline]] nce
void store4_lane(poly16_t *ptr, poly16x8x4_t val) {
return vst4q_lane_p16(ptr, val, lane); }
1913#if defined(__clang__) || (__GNUC__ > 13)
1914[[gnu::always_inline]]
inline void store1_x2(int8_t *ptr, int8x8x2_t val) {
return vst1_s8_x2(ptr, val); }
1915[[gnu::always_inline]]
inline void store1_x2(int8_t *ptr, int8x16x2_t val) {
return vst1q_s8_x2(ptr, val); }
1916[[gnu::always_inline]]
inline void store1_x2(int16_t *ptr, int16x4x2_t val) {
return vst1_s16_x2(ptr, val); }
1917[[gnu::always_inline]]
inline void store1_x2(int16_t *ptr, int16x8x2_t val) {
return vst1q_s16_x2(ptr, val); }
1918[[gnu::always_inline]]
inline void store1_x2(int32_t *ptr, int32x2x2_t val) {
return vst1_s32_x2(ptr, val); }
1919[[gnu::always_inline]]
inline void store1_x2(int32_t *ptr, int32x4x2_t val) {
return vst1q_s32_x2(ptr, val); }
1920[[gnu::always_inline]]
inline void store1_x2(uint8_t *ptr, uint8x8x2_t val) {
return vst1_u8_x2(ptr, val); }
1921[[gnu::always_inline]]
inline void store1_x2(uint8_t *ptr, uint8x16x2_t val) {
return vst1q_u8_x2(ptr, val); }
1922[[gnu::always_inline]]
inline void store1_x2(uint16_t *ptr, uint16x4x2_t val) {
return vst1_u16_x2(ptr, val); }
1923[[gnu::always_inline]]
inline void store1_x2(uint16_t *ptr, uint16x8x2_t val) {
return vst1q_u16_x2(ptr, val); }
1924[[gnu::always_inline]]
inline void store1_x2(uint32_t *ptr, uint32x2x2_t val) {
return vst1_u32_x2(ptr, val); }
1925[[gnu::always_inline]]
inline void store1_x2(uint32_t *ptr, uint32x4x2_t val) {
return vst1q_u32_x2(ptr, val); }
1926[[gnu::always_inline]]
inline void store1_x2(float32_t *ptr, float32x2x2_t val) {
return vst1_f32_x2(ptr, val); }
1927[[gnu::always_inline]]
inline void store1_x2(float32_t *ptr, float32x4x2_t val) {
return vst1q_f32_x2(ptr, val); }
1928[[gnu::always_inline]]
inline void store1_x2(poly8_t *ptr, poly8x8x2_t val) {
return vst1_p8_x2(ptr, val); }
1929[[gnu::always_inline]]
inline void store1_x2(poly8_t *ptr, poly8x16x2_t val) {
return vst1q_p8_x2(ptr, val); }
1930[[gnu::always_inline]]
inline void store1_x2(poly16_t *ptr, poly16x4x2_t val) {
return vst1_p16_x2(ptr, val); }
1931[[gnu::always_inline]]
inline void store1_x2(poly16_t *ptr, poly16x8x2_t val) {
return vst1q_p16_x2(ptr, val); }
1932[[gnu::always_inline]]
inline void store1_x2(int64_t *ptr, int64x1x2_t val) {
return vst1_s64_x2(ptr, val); }
1933[[gnu::always_inline]]
inline void store1_x2(uint64_t *ptr, uint64x1x2_t val) {
return vst1_u64_x2(ptr, val); }
1934[[gnu::always_inline]]
inline void store1_x2(int64_t *ptr, int64x2x2_t val) {
return vst1q_s64_x2(ptr, val); }
1935[[gnu::always_inline]]
inline void store1_x2(uint64_t *ptr, uint64x2x2_t val) {
return vst1q_u64_x2(ptr, val); }
1936[[gnu::always_inline]]
inline void store1_x3(int8_t *ptr, int8x8x3_t val) {
return vst1_s8_x3(ptr, val); }
1937[[gnu::always_inline]]
inline void store1_x3(int8_t *ptr, int8x16x3_t val) {
return vst1q_s8_x3(ptr, val); }
1938[[gnu::always_inline]]
inline void store1_x3(int16_t *ptr, int16x4x3_t val) {
return vst1_s16_x3(ptr, val); }
1939[[gnu::always_inline]]
inline void store1_x3(int16_t *ptr, int16x8x3_t val) {
return vst1q_s16_x3(ptr, val); }
1940[[gnu::always_inline]]
inline void store1_x3(int32_t *ptr, int32x2x3_t val) {
return vst1_s32_x3(ptr, val); }
1941[[gnu::always_inline]]
inline void store1_x3(int32_t *ptr, int32x4x3_t val) {
return vst1q_s32_x3(ptr, val); }
1942[[gnu::always_inline]]
inline void store1_x3(uint8_t *ptr, uint8x8x3_t val) {
return vst1_u8_x3(ptr, val); }
1943[[gnu::always_inline]]
inline void store1_x3(uint8_t *ptr, uint8x16x3_t val) {
return vst1q_u8_x3(ptr, val); }
1944[[gnu::always_inline]]
inline void store1_x3(uint16_t *ptr, uint16x4x3_t val) {
return vst1_u16_x3(ptr, val); }
1945[[gnu::always_inline]]
inline void store1_x3(uint16_t *ptr, uint16x8x3_t val) {
return vst1q_u16_x3(ptr, val); }
1946[[gnu::always_inline]]
inline void store1_x3(uint32_t *ptr, uint32x2x3_t val) {
return vst1_u32_x3(ptr, val); }
1947[[gnu::always_inline]]
inline void store1_x3(uint32_t *ptr, uint32x4x3_t val) {
return vst1q_u32_x3(ptr, val); }
1948[[gnu::always_inline]]
inline void store1_x3(float32_t *ptr, float32x2x3_t val) {
return vst1_f32_x3(ptr, val); }
1949[[gnu::always_inline]]
inline void store1_x3(float32_t *ptr, float32x4x3_t val) {
return vst1q_f32_x3(ptr, val); }
1950[[gnu::always_inline]]
inline void store1_x3(poly8_t *ptr, poly8x8x3_t val) {
return vst1_p8_x3(ptr, val); }
1951[[gnu::always_inline]]
inline void store1_x3(poly8_t *ptr, poly8x16x3_t val) {
return vst1q_p8_x3(ptr, val); }
1952[[gnu::always_inline]]
inline void store1_x3(poly16_t *ptr, poly16x4x3_t val) {
return vst1_p16_x3(ptr, val); }
1953[[gnu::always_inline]]
inline void store1_x3(poly16_t *ptr, poly16x8x3_t val) {
return vst1q_p16_x3(ptr, val); }
1954[[gnu::always_inline]]
inline void store1_x3(int64_t *ptr, int64x1x3_t val) {
return vst1_s64_x3(ptr, val); }
1955[[gnu::always_inline]]
inline void store1_x3(uint64_t *ptr, uint64x1x3_t val) {
return vst1_u64_x3(ptr, val); }
1956[[gnu::always_inline]]
inline void store1_x3(int64_t *ptr, int64x2x3_t val) {
return vst1q_s64_x3(ptr, val); }
1957[[gnu::always_inline]]
inline void store1_x3(uint64_t *ptr, uint64x2x3_t val) {
return vst1q_u64_x3(ptr, val); }
1959[[gnu::always_inline]]
inline void store1_x4(int8_t *ptr, int8x8x4_t val) {
return vst1_s8_x4(ptr, val); }
1960[[gnu::always_inline]]
inline void store1_x4(int8_t *ptr, int8x16x4_t val) {
return vst1q_s8_x4(ptr, val); }
1961[[gnu::always_inline]]
inline void store1_x4(int16_t *ptr, int16x4x4_t val) {
return vst1_s16_x4(ptr, val); }
1962[[gnu::always_inline]]
inline void store1_x4(int16_t *ptr, int16x8x4_t val) {
return vst1q_s16_x4(ptr, val); }
1963[[gnu::always_inline]]
inline void store1_x4(int32_t *ptr, int32x2x4_t val) {
return vst1_s32_x4(ptr, val); }
1964[[gnu::always_inline]]
inline void store1_x4(int32_t *ptr, int32x4x4_t val) {
return vst1q_s32_x4(ptr, val); }
1965[[gnu::always_inline]]
inline void store1_x4(uint8_t *ptr, uint8x8x4_t val) {
return vst1_u8_x4(ptr, val); }
1966[[gnu::always_inline]]
inline void store1_x4(uint8_t *ptr, uint8x16x4_t val) {
return vst1q_u8_x4(ptr, val); }
1967[[gnu::always_inline]]
inline void store1_x4(uint16_t *ptr, uint16x4x4_t val) {
return vst1_u16_x4(ptr, val); }
1968[[gnu::always_inline]]
inline void store1_x4(uint16_t *ptr, uint16x8x4_t val) {
return vst1q_u16_x4(ptr, val); }
1969[[gnu::always_inline]]
inline void store1_x4(uint32_t *ptr, uint32x2x4_t val) {
return vst1_u32_x4(ptr, val); }
1970[[gnu::always_inline]]
inline void store1_x4(uint32_t *ptr, uint32x4x4_t val) {
return vst1q_u32_x4(ptr, val); }
1971[[gnu::always_inline]]
inline void store1_x4(float32_t *ptr, float32x2x4_t val) {
return vst1_f32_x4(ptr, val); }
1972[[gnu::always_inline]]
inline void store1_x4(float32_t *ptr, float32x4x4_t val) {
return vst1q_f32_x4(ptr, val); }
1973[[gnu::always_inline]]
inline void store1_x4(poly8_t *ptr, poly8x8x4_t val) {
return vst1_p8_x4(ptr, val); }
1974[[gnu::always_inline]]
inline void store1_x4(poly8_t *ptr, poly8x16x4_t val) {
return vst1q_p8_x4(ptr, val); }
1975[[gnu::always_inline]]
inline void store1_x4(poly16_t *ptr, poly16x4x4_t val) {
return vst1_p16_x4(ptr, val); }
1976[[gnu::always_inline]]
inline void store1_x4(poly16_t *ptr, poly16x8x4_t val) {
return vst1q_p16_x4(ptr, val); }
1977[[gnu::always_inline]]
inline void store1_x4(int64_t *ptr, int64x1x4_t val) {
return vst1_s64_x4(ptr, val); }
1978[[gnu::always_inline]]
inline void store1_x4(uint64_t *ptr, uint64x1x4_t val) {
return vst1_u64_x4(ptr, val); }
1979[[gnu::always_inline]]
inline void store1_x4(int64_t *ptr, int64x2x4_t val) {
return vst1q_s64_x4(ptr, val); }
1980[[gnu::always_inline]]
inline void store1_x4(uint64_t *ptr, uint64x2x4_t val) {
return vst1q_u64_x4(ptr, val); }
1982[[gnu::always_inline]] nce int8x8_t table_lookup2(int8x8x2_t a, int8x8_t idx) {
return vtbl2_s8(a, idx); }
1983[[gnu::always_inline]] nce uint8x8_t table_lookup2(uint8x8x2_t a, uint8x8_t idx) {
return vtbl2_u8(a, idx); }
1984[[gnu::always_inline]] nce poly8x8_t table_lookup2(poly8x8x2_t a, uint8x8_t idx) {
return vtbl2_p8(a, idx); }
1985[[gnu::always_inline]] nce int8x8_t table_lookup3(int8x8x3_t a, int8x8_t idx) {
return vtbl3_s8(a, idx); }
1986[[gnu::always_inline]] nce uint8x8_t table_lookup3(uint8x8x3_t a, uint8x8_t idx) {
return vtbl3_u8(a, idx); }
1987[[gnu::always_inline]] nce poly8x8_t table_lookup3(poly8x8x3_t a, uint8x8_t idx) {
return vtbl3_p8(a, idx); }
1988[[gnu::always_inline]] nce int8x8_t table_lookup4(int8x8x4_t a, int8x8_t idx) {
return vtbl4_s8(a, idx); }
1989[[gnu::always_inline]] nce uint8x8_t table_lookup4(uint8x8x4_t a, uint8x8_t idx) {
return vtbl4_u8(a, idx); }
1990[[gnu::always_inline]] nce poly8x8_t table_lookup4(poly8x8x4_t a, uint8x8_t idx) {
return vtbl4_p8(a, idx); }
1992[[gnu::always_inline]] nce poly64x1_t add(poly64x1_t a, poly64x1_t b) {
return vadd_p64(a, b); }
1993[[gnu::always_inline]] nce poly8x16_t add(poly8x16_t a, poly8x16_t b) {
return vaddq_p8(a, b); }
1994[[gnu::always_inline]] nce poly16x8_t add(poly16x8_t a, poly16x8_t b) {
return vaddq_p16(a, b); }
1995[[gnu::always_inline]] nce poly64x2_t add(poly64x2_t a, poly64x2_t b) {
return vaddq_p64(a, b); }