13template <
typename T> nce T max(float64x1_t a, float64x1_t b);
14template <
typename T> nce T max(float64x2_t a, float64x2_t b);
15template <
typename T> nce T min(float64x1_t a, float64x1_t b);
16template <
typename T> nce T min(float64x2_t a, float64x2_t b);
17template <
typename T> nce T max_strict(float64x1_t a, float64x1_t b);
18template <
typename T> nce T max_strict(float64x2_t a, float64x2_t b);
19template <
typename T> nce T min_strict(float64x1_t a, float64x1_t b);
20template <
typename T> nce T min_strict(float64x2_t a, float64x2_t b);
22template <
typename T> nce T shift_right_saturate_narrow(int16_t a);
23template <
typename T> nce T shift_right_saturate_narrow(int32_t a);
24template <
typename T> nce T shift_right_saturate_narrow(int64_t a);
25template <
typename T> nce T shift_right_saturate_narrow(uint16_t a);
26template <
typename T> nce T shift_right_saturate_narrow(uint32_t a);
27template <
typename T> nce T shift_right_saturate_narrow(uint64_t a);
28template <
typename T> nce T shift_right_saturate_narrow(int8x8_t r, int16x8_t a);
29template <
typename T> nce T shift_right_saturate_narrow(int16x4_t r, int32x4_t a);
30template <
typename T> nce T shift_right_saturate_narrow(int32x2_t r, int64x2_t a);
31template <
typename T> nce T shift_right_saturate_narrow(uint8x8_t r, uint16x8_t a);
32template <
typename T> nce T shift_right_saturate_narrow(uint16x4_t r, uint32x4_t a);
33template <
typename T> nce T shift_right_saturate_narrow(uint32x2_t r, uint64x2_t a);
34template <
typename T> nce T convert(float32_t a);
35template <
typename T> nce T convert_round_to_nearest_with_ties_to_even(float32_t a);
36template <
typename T> nce T convert_round_toward_negative_infinity(float32_t a);
37template <
typename T> nce T convert_round_toward_positive_infinity(float32_t a);
38template <
typename T> nce T convert_round_to_nearest_with_ties_away_from_zero(float32_t a);
39template <
typename T> nce T convert(float64x1_t a);
40template <
typename T> nce T convert(float64x2_t a);
41template <
typename T> nce T convert_round_to_nearest_with_ties_to_even(float64x1_t a);
42template <
typename T> nce T convert_round_to_nearest_with_ties_to_even(float64x2_t a);
43template <
typename T> nce T convert_round_toward_negative_infinity(float64x1_t a);
44template <
typename T> nce T convert_round_toward_negative_infinity(float64x2_t a);
45template <
typename T> nce T convert_round_toward_positive_infinity(float64x1_t a);
46template <
typename T> nce T convert_round_toward_positive_infinity(float64x2_t a);
47template <
typename T> nce T convert_round_to_nearest_with_ties_away_from_zero(float64x1_t a);
48template <
typename T> nce T convert_round_to_nearest_with_ties_away_from_zero(float64x2_t a);
49template <
typename T> nce T convert(float64_t a);
50template <
typename T> nce T convert_round_to_nearest_with_ties_to_even(float64_t a);
51template <
typename T> nce T convert_round_toward_negative_infinity(float64_t a);
52template <
typename T> nce T convert_round_toward_positive_infinity(float64_t a);
53template <
typename T> nce T convert_round_to_nearest_with_ties_away_from_zero(float64_t a);
54template <
typename T> nce T convert(int32_t a);
55template <
typename T> nce T convert(uint32_t a);
56template <
typename T> nce T convert(int64x1_t a);
57template <
typename T> nce T convert(int64x2_t a);
58template <
typename T> nce T convert(uint64x1_t a);
59template <
typename T> nce T convert(uint64x2_t a);
60template <
typename T> nce T convert(int64_t a);
61template <
typename T> nce T convert(uint64_t a);
62template <
typename T> nce T reinterpret(int8x8_t a);
63template <
typename T> nce T reinterpret(int16x4_t a);
64template <
typename T> nce T reinterpret(int32x2_t a);
65template <
typename T> nce T reinterpret(float32x2_t a);
66template <
typename T> nce T reinterpret(float64x1_t a);
67template <
typename T> nce T reinterpret(uint8x8_t a);
68template <
typename T> nce T reinterpret(uint16x4_t a);
69template <
typename T> nce T reinterpret(uint32x2_t a);
70template <
typename T> nce T reinterpret(poly16x4_t a);
71template <
typename T> nce T reinterpret(uint64x1_t a);
72template <
typename T> nce T reinterpret(int64x1_t a);
73template <
typename T> nce T reinterpret(float16x4_t a);
74template <
typename T> nce T reinterpret(int8x16_t a);
75template <
typename T> nce T reinterpret(int16x8_t a);
76template <
typename T> nce T reinterpret(int32x4_t a);
77template <
typename T> nce T reinterpret(float32x4_t a);
78template <
typename T> nce T reinterpret(float64x2_t a);
79template <
typename T> nce T reinterpret(uint8x16_t a);
80template <
typename T> nce T reinterpret(uint16x8_t a);
81template <
typename T> nce T reinterpret(uint32x4_t a);
82template <
typename T> nce T reinterpret(poly16x8_t a);
83template <
typename T> nce T reinterpret(int64x2_t a);
84template <
typename T> nce T reinterpret(float16x8_t a);
85template <
typename T> nce T reinterpret(poly64x2_t a);
86template <
typename T> nce T reinterpret(poly128_t a);
87template <
typename T> nce T create(uint64_t a);
88template <
typename T> nce T duplicate(float64_t value);
89template <
typename T> nce T move(float64_t value);
90template <
typename T> nce T duplicate(float64x1_t vec);
91template <
typename T> nce T duplicate(int8x16_t vec);
92template <
typename T> nce T duplicate(int16x8_t vec);
93template <
typename T> nce T duplicate(int32x4_t vec);
94template <
typename T> nce T duplicate(int64x2_t vec);
95template <
typename T> nce T duplicate(uint8x16_t vec);
96template <
typename T> nce T duplicate(uint16x8_t vec);
97template <
typename T> nce T duplicate(uint32x4_t vec);
98template <
typename T> nce T duplicate(uint64x2_t vec);
99template <
typename T> nce T duplicate(poly64x2_t vec);
100template <
typename T> nce T duplicate(float32x4_t vec);
101template <
typename T> nce T duplicate(poly8x16_t vec);
102template <
typename T> nce T duplicate(poly16x8_t vec);
103template <
typename T> nce T duplicate(float64x2_t vec);
104template <
typename T> nce T get(float64x2_t a);
105template <
typename T> nce T load1(float64_t
const *ptr);
106template <
typename T> nce T load1_duplicate(float64_t
const *ptr);
107template <
typename T> nce T load2(float64_t
const *ptr);
108template <
typename T> nce T load3(float64_t
const *ptr);
109template <
typename T> nce T load3(int8_t
const *ptr);
110template <
typename T> nce T load4(float64_t
const *ptr);
111template <
typename T> nce T load2_duplicate(float64_t
const *ptr);
112template <
typename T> nce T load3_duplicate(float64_t
const *ptr);
113template <
typename T> nce T load4_duplicate(float64_t
const *ptr);
114template <
typename T> nce T load1_x2(float64_t
const *ptr);
115template <
typename T> nce T load1_x3(float64_t
const *ptr);
116template <
typename T> nce T load1_x4(float64_t
const *ptr);
117template <
typename T> nce T store1(float64_t *ptr, float64x1_t val);
118template <
typename T> nce T store1(float64_t *ptr, float64x2_t val);
119template <
typename T> nce T store2(int64_t *ptr, int64x2x2_t val);
120template <
typename T> nce T store2(uint64_t *ptr, uint64x2x2_t val);
121template <
typename T> nce T store2(poly64_t *ptr, poly64x2x2_t val);
122template <
typename T> nce T store2(float64_t *ptr, float64x1x2_t val);
123template <
typename T> nce T store2(float64_t *ptr, float64x2x2_t val);
124template <
typename T> nce T store3(int64_t *ptr, int64x2x3_t val);
125template <
typename T> nce T store3(uint64_t *ptr, uint64x2x3_t val);
126template <
typename T> nce T store3(poly64_t *ptr, poly64x2x3_t val);
127template <
typename T> nce T store3(float64_t *ptr, float64x1x3_t val);
128template <
typename T> nce T store3(float64_t *ptr, float64x2x3_t val);
129template <
typename T> nce T store4(int64_t *ptr, int64x2x4_t val);
130template <
typename T> nce T store4(uint64_t *ptr, uint64x2x4_t val);
131template <
typename T> nce T store4(poly64_t *ptr, poly64x2x4_t val);
132template <
typename T> nce T store4(float64_t *ptr, float64x1x4_t val);
133template <
typename T> nce T store4(float64_t *ptr, float64x2x4_t val);
134template <
typename T> nce T convert(int16_t a);
135template <
typename T> nce T convert(uint16_t a);
136template <
typename T> nce T convert(float16_t a);
137template <
typename T> nce T convert_round_to_nearest_with_ties_away_from_zero(float16_t a);
138template <
typename T> nce T convert_round_toward_negative_infinity(float16_t a);
139template <
typename T> nce T convert_round_to_nearest_with_ties_to_even(float16_t a);
140template <
typename T> nce T convert_round_toward_positive_infinity(float16_t a);
141template <
typename T> nce T duplicate(float16x8_t vec);
142template <
typename T> nce T reinterpret(bfloat16x4_t a);
143template <
typename T> nce T reinterpret(bfloat16x8_t a);
144template <
typename T> nce T get_high(float64x2_t a);
145template <
typename T> nce T get_low(float64x2_t a);
149[[gnu::always_inline]] nce uint8x8_t add_saturate(uint8x8_t a, int8x8_t b) {
return vsqadd_u8(a, b); }
150[[gnu::always_inline]] nce uint8x16_t add_narrow_high_high(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
return vaddhn_high_u16(r, a, b); }
151[[gnu::always_inline]] nce uint8x16_t add_round_narrow_high_high(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
return vraddhn_high_u16(r, a, b); }
152[[gnu::always_inline]] nce uint8x16_t subtract_narrow_high_high(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
return vsubhn_high_u16(r, a, b); }
153[[gnu::always_inline]] nce uint8x16_t subtract_round_narrow_high_high(uint8x8_t r, uint16x8_t a, uint16x8_t b) {
return vrsubhn_high_u16(r, a, b); }
154[[gnu::always_inline]] nce uint8_t reduce_add(uint8x8_t a) {
return vaddv_u8(a); }
155[[gnu::always_inline]] nce uint16_t reduce_add_long(uint8x8_t a) {
return vaddlv_u8(a); }
156[[gnu::always_inline]]
inline uint8_t reduce_max(uint8x8_t a) {
return vmaxv_u8(a); }
157[[gnu::always_inline]]
inline uint8_t reduce_min(uint8x8_t a) {
return vminv_u8(a); }
158[[gnu::always_inline]] nce uint8x8_t equal_to_zero(uint8x8_t a) {
return vceqz_u8(a); }
159template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_high(uint8x8_t r, uint16x8_t a) {
return vshrn_high_n_u16(r, a, n); }
160template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_right_saturate_narrow_high(uint8x8_t r, uint16x8_t a) {
return vqshrn_high_n_u16(r, a, n); }
161template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_right_round_saturate_narrow_high(uint8x8_t r, uint16x8_t a) {
return vqrshrn_high_n_u16(r, a, n); }
162template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_right_round_narrow_high(uint8x8_t r, uint16x8_t a) {
return vrshrn_high_n_u16(r, a, n); }
163template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_right_saturate_narrow_unsigned_high(uint8x8_t r, int16x8_t a) {
return vqshrun_high_n_s16(r, a, n); }
164template <
int n>[[gnu::always_inline]] nce uint8x16_t shift_right_unsigned_saturate_narrow_high(uint8x8_t r, int16x8_t a) {
return vqrshrun_high_n_s16(r, a, n); }
165template <> [[gnu::always_inline]] nce float64x1_t reinterpret(uint8x8_t a) {
return vreinterpret_f64_u8(a); }
166[[gnu::always_inline]] nce uint8x8_t zip2(uint8x8_t a, uint8x8_t b) {
return vzip2_u8(a, b); }
167template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint8x8_t copy_lane(uint8x8_t a, uint8x8_t b) {
return vcopy_lane_u8(a, lane1, b, lane2); }
168template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint8x8_t copy_lane(uint8x8_t a, uint8x16_t b) {
return vcopy_laneq_u8(a, lane1, b, lane2); }
169[[gnu::always_inline]] nce uint8x8_t reverse_bits(uint8x8_t a) {
return vrbit_u8(a); }
170[[gnu::always_inline]] nce uint8x8_t zip1(uint8x8_t a, uint8x8_t b) {
return vzip1_u8(a, b); }
171[[gnu::always_inline]] nce uint8x8_t unzip1(uint8x8_t a, uint8x8_t b) {
return vuzp1_u8(a, b); }
172[[gnu::always_inline]] nce uint8x8_t unzip2(uint8x8_t a, uint8x8_t b) {
return vuzp2_u8(a, b); }
173[[gnu::always_inline]] nce uint8x8_t transpose_step_1(uint8x8_t a, uint8x8_t b) {
return vtrn1_u8(a, b); }
174[[gnu::always_inline]] nce uint8x8_t transpose_step_2(uint8x8_t a, uint8x8_t b) {
return vtrn2_u8(a, b); }
175[[gnu::always_inline]] nce uint8x8_t table_extend1_saturate(uint8x8_t a, uint8x16_t t, uint8x8_t idx) {
return vqtbx1_u8(a, t, idx); }
176[[gnu::always_inline]] nce uint8x16_t move_saturate_narrow_high(uint8x8_t r, uint16x8_t a) {
return vqmovn_high_u16(r, a); }
177[[gnu::always_inline]] nce uint8x16_t move_unsigned_saturate_narrow_high(uint8x8_t r, int16x8_t a) {
return vqmovun_high_s16(r, a); }
178[[gnu::always_inline]] nce uint8x8_t table_extend2_saturate(uint8x8_t a, uint8x16x2_t t, uint8x8_t idx) {
return vqtbx2_u8(a, t, idx); }
179[[gnu::always_inline]] nce uint8x8_t table_extend3_saturate(uint8x8_t a, uint8x16x3_t t, uint8x8_t idx) {
return vqtbx3_u8(a, t, idx); }
180[[gnu::always_inline]] nce uint8x8_t table_extend4_saturate(uint8x8_t a, uint8x16x4_t t, uint8x8_t idx) {
return vqtbx4_u8(a, t, idx); }
181[[gnu::always_inline]] nce uint16x8_t add_long_high(uint8x16_t a, uint8x16_t b) {
return vaddl_high_u8(a, b); }
182[[gnu::always_inline]] nce uint16x8_t multiply_long_high(uint8x16_t a, uint8x16_t b) {
return vmull_high_u8(a, b); }
183[[gnu::always_inline]] nce uint16x8_t subtract_long_high(uint8x16_t a, uint8x16_t b) {
return vsubl_high_u8(a, b); }
184[[gnu::always_inline]] nce uint16x8_t subtract_absolute_long_high(uint8x16_t a, uint8x16_t b) {
return vabdl_high_u8(a, b); }
185[[gnu::always_inline]] nce uint8x16_t pairwise_add(uint8x16_t a, uint8x16_t b) {
return vpaddq_u8(a, b); }
186[[gnu::always_inline]] nce uint8x16_t pairwise_max(uint8x16_t a, uint8x16_t b) {
return vpmaxq_u8(a, b); }
187[[gnu::always_inline]] nce uint8x16_t pairwise_min(uint8x16_t a, uint8x16_t b) {
return vpminq_u8(a, b); }
188[[gnu::always_inline]] nce uint8x16_t add_saturate(uint8x16_t a, int8x16_t b) {
return vsqaddq_u8(a, b); }
189[[gnu::always_inline]] nce uint8_t reduce_add(uint8x16_t a) {
return vaddvq_u8(a); }
190[[gnu::always_inline]] nce uint16_t reduce_add_long(uint8x16_t a) {
return vaddlvq_u8(a); }
191[[gnu::always_inline]]
inline uint8_t reduce_max(uint8x16_t a) {
return vmaxvq_u8(a); }
192[[gnu::always_inline]]
inline uint8_t reduce_min(uint8x16_t a) {
return vminvq_u8(a); }
193[[gnu::always_inline]] nce uint8x16_t equal_to_zero(uint8x16_t a) {
return vceqzq_u8(a); }
194template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_left_long_high(uint8x16_t a) {
return vshll_high_n_u8(a, n); }
195template <> [[gnu::always_inline]] nce float64x2_t reinterpret(uint8x16_t a) {
return vreinterpretq_f64_u8(a); }
196[[gnu::always_inline]] nce uint16x8_t move_long_high(uint8x16_t a) {
return vmovl_high_u8(a); }
197[[gnu::always_inline]] nce uint8x16_t reverse_bits(uint8x16_t a) {
return vrbitq_u8(a); }
198template <
int lane>[[gnu::always_inline]] nce uint8x8_t duplicate_lane(uint8x16_t vec) {
return vdup_laneq_u8(vec, lane); }
199template <
int lane>[[gnu::always_inline]] nce uint8x16_t duplicate_lane(uint8x16_t vec) {
return vdupq_laneq_u8(vec, lane); }
200[[gnu::always_inline]] nce uint8x8_t table_lookup1_saturate(uint8x16_t t, uint8x8_t idx) {
return vqtbl1_u8(t, idx); }
201[[gnu::always_inline]] nce uint8x16_t zip1(uint8x16_t a, uint8x16_t b) {
return vzip1q_u8(a, b); }
202[[gnu::always_inline]] nce uint8x16_t zip2(uint8x16_t a, uint8x16_t b) {
return vzip2q_u8(a, b); }
203[[gnu::always_inline]] nce uint8x16_t unzip1(uint8x16_t a, uint8x16_t b) {
return vuzp1q_u8(a, b); }
204[[gnu::always_inline]] nce uint8x16_t unzip2(uint8x16_t a, uint8x16_t b) {
return vuzp2q_u8(a, b); }
205[[gnu::always_inline]] nce uint8x16_t transpose_step_1(uint8x16_t a, uint8x16_t b) {
return vtrn1q_u8(a, b); }
206[[gnu::always_inline]] nce uint8x16_t transpose_step_2(uint8x16_t a, uint8x16_t b) {
return vtrn2q_u8(a, b); }
207[[gnu::always_inline]] nce uint8x16_t table_lookup1_saturate(uint8x16_t t, uint8x16_t idx) {
return vqtbl1q_u8(t, idx); }
208[[gnu::always_inline]] nce uint8x16_t table_extend1_saturate(uint8x16_t a, uint8x16_t t, uint8x16_t idx) {
return vqtbx1q_u8(a, t, idx); }
211template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint8x16_t copy_lane(uint8x16_t a, uint8x8_t b) {
return vcopyq_lane_u8(a, lane1, b, lane2); }
212template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint8x16_t copy_lane(uint8x16_t a, uint8x16_t b) {
return vcopyq_laneq_u8(a, lane1, b, lane2); }
213[[gnu::always_inline]] nce uint8x16_t table_extend2_saturate(uint8x16_t a, uint8x16x2_t t, uint8x16_t idx) {
return vqtbx2q_u8(a, t, idx); }
214[[gnu::always_inline]] nce uint8x16_t table_extend3_saturate(uint8x16_t a, uint8x16x3_t t, uint8x16_t idx) {
return vqtbx3q_u8(a, t, idx); }
215[[gnu::always_inline]] nce uint8x16_t table_extend4_saturate(uint8x16_t a, uint8x16x4_t t, uint8x16_t idx) {
return vqtbx4q_u8(a, t, idx); }
216[[gnu::always_inline]] nce int8x8_t add_saturate(int8x8_t a, uint8x8_t b) {
return vuqadd_s8(a, b); }
217[[gnu::always_inline]] nce int8x16_t add_narrow_high_high(int8x8_t r, int16x8_t a, int16x8_t b) {
return vaddhn_high_s16(r, a, b); }
218[[gnu::always_inline]] nce int8x16_t add_round_narrow_high_high(int8x8_t r, int16x8_t a, int16x8_t b) {
return vraddhn_high_s16(r, a, b); }
219[[gnu::always_inline]] nce int8x16_t subtract_narrow_high_high(int8x8_t r, int16x8_t a, int16x8_t b) {
return vsubhn_high_s16(r, a, b); }
220[[gnu::always_inline]] nce int8x16_t subtract_round_narrow_high_high(int8x8_t r, int16x8_t a, int16x8_t b) {
return vrsubhn_high_s16(r, a, b); }
221[[gnu::always_inline]] nce int8_t reduce_add(int8x8_t a) {
return vaddv_s8(a); }
222[[gnu::always_inline]] nce int16_t reduce_add_long(int8x8_t a) {
return vaddlv_s8(a); }
223[[gnu::always_inline]]
inline int8_t reduce_max(int8x8_t a) {
return vmaxv_s8(a); }
224[[gnu::always_inline]]
inline int8_t reduce_min(int8x8_t a) {
return vminv_s8(a); }
225[[gnu::always_inline]] nce uint8x8_t equal_to_zero(int8x8_t a) {
return vceqz_s8(a); }
226[[gnu::always_inline]] nce uint8x8_t greater_than_or_equal_to_zero(int8x8_t a) {
return vcgez_s8(a); }
227[[gnu::always_inline]] nce uint8x8_t less_than_or_equal_to_zero(int8x8_t a) {
return vclez_s8(a); }
228[[gnu::always_inline]] nce uint8x8_t greater_than_zero(int8x8_t a) {
return vcgtz_s8(a); }
229[[gnu::always_inline]] nce uint8x8_t less_than_zero(int8x8_t a) {
return vcltz_s8(a); }
230template <
int n>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_high(int8x8_t r, int16x8_t a) {
return vshrn_high_n_s16(r, a, n); }
231template <
int n>[[gnu::always_inline]] nce int8x16_t shift_right_saturate_narrow_high(int8x8_t r, int16x8_t a) {
return vqshrn_high_n_s16(r, a, n); }
232template <
int n>[[gnu::always_inline]] nce int8x16_t shift_right_round_saturate_narrow_high(int8x8_t r, int16x8_t a) {
return vqrshrn_high_n_s16(r, a, n); }
233template <
int n>[[gnu::always_inline]] nce int8x16_t shift_right_round_narrow_high(int8x8_t r, int16x8_t a) {
return vrshrn_high_n_s16(r, a, n); }
234template <> [[gnu::always_inline]] nce float64x1_t reinterpret(int8x8_t a) {
return vreinterpret_f64_s8(a); }
235[[gnu::always_inline]] nce int8x8_t reverse_bits(int8x8_t a) {
return vrbit_s8(a); }
236[[gnu::always_inline]] nce int8x8_t zip1(int8x8_t a, int8x8_t b) {
return vzip1_s8(a, b); }
237[[gnu::always_inline]] nce int8x8_t zip2(int8x8_t a, int8x8_t b) {
return vzip2_s8(a, b); }
238[[gnu::always_inline]] nce int8x8_t unzip1(int8x8_t a, int8x8_t b) {
return vuzp1_s8(a, b); }
239[[gnu::always_inline]] nce int8x8_t unzip2(int8x8_t a, int8x8_t b) {
return vuzp2_s8(a, b); }
240[[gnu::always_inline]] nce int8x8_t transpose_step_1(int8x8_t a, int8x8_t b) {
return vtrn1_s8(a, b); }
241[[gnu::always_inline]] nce int8x8_t transpose_step_2(int8x8_t a, int8x8_t b) {
return vtrn2_s8(a, b); }
242[[gnu::always_inline]] nce int8x8_t table_extend1_saturate(int8x8_t a, int8x16_t t, uint8x8_t idx) {
return vqtbx1_s8(a, t, idx); }
243[[gnu::always_inline]] nce int8x16_t move_saturate_narrow_high(int8x8_t r, int16x8_t a) {
return vqmovn_high_s16(r, a); }
244template <
int lane1,
int lane2>[[gnu::always_inline]] nce int8x8_t copy_lane(int8x8_t a, int8x8_t b) {
return vcopy_lane_s8(a, lane1, b, lane2); }
245template <
int lane1,
int lane2>[[gnu::always_inline]] nce int8x8_t copy_lane(int8x8_t a, int8x16_t b) {
return vcopy_laneq_s8(a, lane1, b, lane2); }
246[[gnu::always_inline]] nce int8x8_t table_extend2_saturate(int8x8_t a, int8x16x2_t t, uint8x8_t idx) {
return vqtbx2_s8(a, t, idx); }
247[[gnu::always_inline]] nce int8x8_t table_extend3_saturate(int8x8_t a, int8x16x3_t t, uint8x8_t idx) {
return vqtbx3_s8(a, t, idx); }
248[[gnu::always_inline]] nce int8x8_t table_extend4_saturate(int8x8_t a, int8x16x4_t t, uint8x8_t idx) {
return vqtbx4_s8(a, t, idx); }
249[[gnu::always_inline]] nce int8x16_t add_saturate(int8x16_t a, uint8x16_t b) {
return vuqaddq_s8(a, b); }
250[[gnu::always_inline]] nce int16x8_t add_long_high(int8x16_t a, int8x16_t b) {
return vaddl_high_s8(a, b); }
251[[gnu::always_inline]] nce int16x8_t multiply_long_high(int8x16_t a, int8x16_t b) {
return vmull_high_s8(a, b); }
252[[gnu::always_inline]] nce int16x8_t subtract_long_high(int8x16_t a, int8x16_t b) {
return vsubl_high_s8(a, b); }
253[[gnu::always_inline]] nce int16x8_t subtract_absolute_long_high(int8x16_t a, int8x16_t b) {
return vabdl_high_s8(a, b); }
254[[gnu::always_inline]] nce int8x16_t pairwise_add(int8x16_t a, int8x16_t b) {
return vpaddq_s8(a, b); }
255[[gnu::always_inline]] nce int8x16_t pairwise_max(int8x16_t a, int8x16_t b) {
return vpmaxq_s8(a, b); }
256[[gnu::always_inline]] nce int8x16_t pairwise_min(int8x16_t a, int8x16_t b) {
return vpminq_s8(a, b); }
257[[gnu::always_inline]] nce int8_t reduce_add(int8x16_t a) {
return vaddvq_s8(a); }
258[[gnu::always_inline]] nce int16_t reduce_add_long(int8x16_t a) {
return vaddlvq_s8(a); }
259[[gnu::always_inline]]
inline int8_t reduce_max(int8x16_t a) {
return vmaxvq_s8(a); }
260[[gnu::always_inline]]
inline int8_t reduce_min(int8x16_t a) {
return vminvq_s8(a); }
261[[gnu::always_inline]] nce uint8x16_t equal_to_zero(int8x16_t a) {
return vceqzq_s8(a); }
262[[gnu::always_inline]] nce uint8x16_t greater_than_or_equal_to_zero(int8x16_t a) {
return vcgezq_s8(a); }
263[[gnu::always_inline]] nce uint8x16_t less_than_or_equal_to_zero(int8x16_t a) {
return vclezq_s8(a); }
264[[gnu::always_inline]] nce uint8x16_t greater_than_zero(int8x16_t a) {
return vcgtzq_s8(a); }
265[[gnu::always_inline]] nce uint8x16_t less_than_zero(int8x16_t a) {
return vcltzq_s8(a); }
266template <
int n>[[gnu::always_inline]] nce int16x8_t shift_left_long_high(int8x16_t a) {
return vshll_high_n_s8(a, n); }
267template <> [[gnu::always_inline]] nce float64x2_t reinterpret(int8x16_t a) {
return vreinterpretq_f64_s8(a); }
268[[gnu::always_inline]] nce int16x8_t move_long_high(int8x16_t a) {
return vmovl_high_s8(a); }
269[[gnu::always_inline]] nce int8x16_t reverse_bits(int8x16_t a) {
return vrbitq_s8(a); }
270template <
int lane>[[gnu::always_inline]] nce int8x8_t duplicate_lane(int8x16_t vec) {
return vdup_laneq_s8(vec, lane); }
271template <
int lane>[[gnu::always_inline]] nce int8x16_t duplicate_lane(int8x16_t vec) {
return vdupq_laneq_s8(vec, lane); }
272[[gnu::always_inline]] nce int8x8_t table_lookup1_saturate(int8x16_t t, uint8x8_t idx) {
return vqtbl1_s8(t, idx); }
273[[gnu::always_inline]] nce int8x16_t table_lookup1_saturate(int8x16_t t, uint8x16_t idx) {
return vqtbl1q_s8(t, idx); }
274[[gnu::always_inline]] nce int8x16_t zip1(int8x16_t a, int8x16_t b) {
return vzip1q_s8(a, b); }
275[[gnu::always_inline]] nce int8x16_t zip2(int8x16_t a, int8x16_t b) {
return vzip2q_s8(a, b); }
276[[gnu::always_inline]] nce int8x16_t unzip1(int8x16_t a, int8x16_t b) {
return vuzp1q_s8(a, b); }
277[[gnu::always_inline]] nce int8x16_t unzip2(int8x16_t a, int8x16_t b) {
return vuzp2q_s8(a, b); }
278[[gnu::always_inline]] nce int8x16_t transpose_step_1(int8x16_t a, int8x16_t b) {
return vtrn1q_s8(a, b); }
279[[gnu::always_inline]] nce int8x16_t transpose_step_2(int8x16_t a, int8x16_t b) {
return vtrn2q_s8(a, b); }
280[[gnu::always_inline]] nce int8x16_t table_extend1_saturate(int8x16_t a, int8x16_t t, uint8x16_t idx) {
return vqtbx1q_s8(a, t, idx); }
283template <
int lane1,
int lane2>[[gnu::always_inline]] nce int8x16_t copy_lane(int8x16_t a, int8x8_t b) {
return vcopyq_lane_s8(a, lane1, b, lane2); }
284template <
int lane1,
int lane2>[[gnu::always_inline]] nce int8x16_t copy_lane(int8x16_t a, int8x16_t b) {
return vcopyq_laneq_s8(a, lane1, b, lane2); }
285[[gnu::always_inline]] nce int8x16_t table_extend2_saturate(int8x16_t a, int8x16x2_t t, uint8x16_t idx) {
return vqtbx2q_s8(a, t, idx); }
286[[gnu::always_inline]] nce int8x16_t table_extend3_saturate(int8x16_t a, int8x16x3_t t, uint8x16_t idx) {
return vqtbx3q_s8(a, t, idx); }
287[[gnu::always_inline]] nce int8x16_t table_extend4_saturate(int8x16_t a, int8x16x4_t t, uint8x16_t idx) {
return vqtbx4q_s8(a, t, idx); }
288[[gnu::always_inline]] nce uint16x4_t add_saturate(uint16x4_t a, int16x4_t b) {
return vsqadd_u16(a, b); }
289[[gnu::always_inline]] nce uint16x8_t add_narrow_high_high(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
return vaddhn_high_u32(r, a, b); }
290[[gnu::always_inline]] nce uint16x8_t add_round_narrow_high_high(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
return vraddhn_high_u32(r, a, b); }
291[[gnu::always_inline]] nce uint16x8_t subtract_narrow_high_high(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
return vsubhn_high_u32(r, a, b); }
292[[gnu::always_inline]] nce uint16x8_t subtract_round_narrow_high_high(uint16x4_t r, uint32x4_t a, uint32x4_t b) {
return vrsubhn_high_u32(r, a, b); }
293[[gnu::always_inline]] nce uint16_t reduce_add(uint16x4_t a) {
return vaddv_u16(a); }
294[[gnu::always_inline]] nce uint32_t reduce_add_long(uint16x4_t a) {
return vaddlv_u16(a); }
295[[gnu::always_inline]]
inline uint16_t reduce_max(uint16x4_t a) {
return vmaxv_u16(a); }
296[[gnu::always_inline]]
inline uint16_t reduce_min(uint16x4_t a) {
return vminv_u16(a); }
297[[gnu::always_inline]] nce uint16x4_t equal_to_zero(uint16x4_t a) {
return vceqz_u16(a); }
298template <> [[gnu::always_inline]] nce float64x1_t reinterpret(uint16x4_t a) {
return vreinterpret_f64_u16(a); }
299template <
int lane>[[gnu::always_inline]] nce uint16x4_t multiply_add_lane(uint16x4_t a, uint16x4_t b, uint16x8_t v) {
return vmla_laneq_u16(a, b, v, lane); }
300template <
int lane>[[gnu::always_inline]] nce uint16x4_t multiply_subtract_lane(uint16x4_t a, uint16x4_t b, uint16x8_t v) {
return vmls_laneq_u16(a, b, v, lane); }
301[[gnu::always_inline]] nce uint16x4_t zip1(uint16x4_t a, uint16x4_t b) {
return vzip1_u16(a, b); }
302[[gnu::always_inline]] nce uint16x4_t zip2(uint16x4_t a, uint16x4_t b) {
return vzip2_u16(a, b); }
303[[gnu::always_inline]] nce uint16x4_t unzip1(uint16x4_t a, uint16x4_t b) {
return vuzp1_u16(a, b); }
304[[gnu::always_inline]] nce uint16x4_t unzip2(uint16x4_t a, uint16x4_t b) {
return vuzp2_u16(a, b); }
305[[gnu::always_inline]] nce uint16x4_t transpose_step_1(uint16x4_t a, uint16x4_t b) {
return vtrn1_u16(a, b); }
306[[gnu::always_inline]] nce uint16x4_t transpose_step_2(uint16x4_t a, uint16x4_t b) {
return vtrn2_u16(a, b); }
307template <
int lane>[[gnu::always_inline]] nce uint16x4_t multiply_lane(uint16x4_t a, uint16x8_t v) {
return vmul_laneq_u16(a, v, lane); }
308template <
int lane>[[gnu::always_inline]] nce uint32x4_t multiply_long_lane(uint16x4_t a, uint16x8_t v) {
return vmull_laneq_u16(a, v, lane); }
309template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_right_saturate_narrow_unsigned_high(uint16x4_t r, int32x4_t a) {
return vqshrun_high_n_s32(r, a, n); }
310template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_right_unsigned_saturate_narrow_high(uint16x4_t r, int32x4_t a) {
return vqrshrun_high_n_s32(r, a, n); }
311[[gnu::always_inline]] nce uint16x8_t move_unsigned_saturate_narrow_high(uint16x4_t r, int32x4_t a) {
return vqmovun_high_s32(r, a); }
312template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_high(uint16x4_t r, uint32x4_t a) {
return vshrn_high_n_u32(r, a, n); }
313template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_right_saturate_narrow_high(uint16x4_t r, uint32x4_t a) {
return vqshrn_high_n_u32(r, a, n); }
314template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_right_round_saturate_narrow_high(uint16x4_t r, uint32x4_t a) {
return vqrshrn_high_n_u32(r, a, n); }
315template <
int n>[[gnu::always_inline]] nce uint16x8_t shift_right_round_narrow_high(uint16x4_t r, uint32x4_t a) {
return vrshrn_high_n_u32(r, a, n); }
316[[gnu::always_inline]] nce uint16x8_t move_saturate_narrow_high(uint16x4_t r, uint32x4_t a) {
return vqmovn_high_u32(r, a); }
317template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint16x4_t copy_lane(uint16x4_t a, uint16x4_t b) {
return vcopy_lane_u16(a, lane1, b, lane2); }
318template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint16x4_t copy_lane(uint16x4_t a, uint16x8_t b) {
return vcopy_laneq_u16(a, lane1, b, lane2); }
319[[gnu::always_inline]] nce uint16x8_t add_high(uint16x8_t a, uint8x16_t b) {
return vaddw_high_u8(a, b); }
320[[gnu::always_inline]] nce uint16x8_t multiply_add_long_high(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
return vmlal_high_u8(a, b, c); }
321[[gnu::always_inline]] nce uint16x8_t multiply_subtract_long_high(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
return vmlsl_high_u8(a, b, c); }
322[[gnu::always_inline]] nce uint16x8_t subtract_high(uint16x8_t a, uint8x16_t b) {
return vsubw_high_u8(a, b); }
323[[gnu::always_inline]] nce uint16x8_t subtract_absolute_add_high(uint16x8_t a, uint8x16_t b, uint8x16_t c) {
return vabal_high_u8(a, b, c); }
324[[gnu::always_inline]] nce uint32x4_t add_long_high(uint16x8_t a, uint16x8_t b) {
return vaddl_high_u16(a, b); }
325[[gnu::always_inline]] nce uint32x4_t multiply_long_high(uint16x8_t a, uint16x8_t b) {
return vmull_high_u16(a, b); }
326[[gnu::always_inline]] nce uint32x4_t subtract_long_high(uint16x8_t a, uint16x8_t b) {
return vsubl_high_u16(a, b); }
327[[gnu::always_inline]] nce uint32x4_t subtract_absolute_long_high(uint16x8_t a, uint16x8_t b) {
return vabdl_high_u16(a, b); }
328[[gnu::always_inline]] nce uint16x8_t pairwise_add(uint16x8_t a, uint16x8_t b) {
return vpaddq_u16(a, b); }
329[[gnu::always_inline]] nce uint16x8_t pairwise_max(uint16x8_t a, uint16x8_t b) {
return vpmaxq_u16(a, b); }
330[[gnu::always_inline]] nce uint16x8_t pairwise_min(uint16x8_t a, uint16x8_t b) {
return vpminq_u16(a, b); }
331[[gnu::always_inline]] nce uint16x8_t add_saturate(uint16x8_t a, int16x8_t b) {
return vsqaddq_u16(a, b); }
332[[gnu::always_inline]] nce uint16_t reduce_add(uint16x8_t a) {
return vaddvq_u16(a); }
333[[gnu::always_inline]] nce uint32_t reduce_add_long(uint16x8_t a) {
return vaddlvq_u16(a); }
334[[gnu::always_inline]]
inline uint16_t reduce_max(uint16x8_t a) {
return vmaxvq_u16(a); }
335[[gnu::always_inline]]
inline uint16_t reduce_min(uint16x8_t a) {
return vminvq_u16(a); }
336[[gnu::always_inline]] nce uint16x8_t equal_to_zero(uint16x8_t a) {
return vceqzq_u16(a); }
337template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_left_long_high(uint16x8_t a) {
return vshll_high_n_u16(a, n); }
338template <> [[gnu::always_inline]] nce float64x2_t reinterpret(uint16x8_t a) {
return vreinterpretq_f64_u16(a); }
339[[gnu::always_inline]] nce uint32x4_t move_long_high(uint16x8_t a) {
return vmovl_high_u16(a); }
340template <
int lane>[[gnu::always_inline]] nce uint32x4_t multiply_long_lane_high(uint16x8_t a, uint16x4_t v) {
return vmull_high_lane_u16(a, v, lane); }
341template <
int lane>[[gnu::always_inline]] nce uint16x8_t multiply_add_lane(uint16x8_t a, uint16x8_t b, uint16x8_t v) {
return vmlaq_laneq_u16(a, b, v, lane); }
342template <
int lane>[[gnu::always_inline]] nce uint16x8_t multiply_subtract_lane(uint16x8_t a, uint16x8_t b, uint16x8_t v) {
return vmlsq_laneq_u16(a, b, v, lane); }
343template <
int lane>[[gnu::always_inline]] nce uint16x8_t multiply_lane(uint16x8_t a, uint16x8_t v) {
return vmulq_laneq_u16(a, v, lane); }
344template <
int lane>[[gnu::always_inline]] nce uint32x4_t multiply_long_lane_high(uint16x8_t a, uint16x8_t v) {
return vmull_high_laneq_u16(a, v, lane); }
345template <
int lane>[[gnu::always_inline]] nce uint16x4_t duplicate_lane(uint16x8_t vec) {
return vdup_laneq_u16(vec, lane); }
346template <
int lane>[[gnu::always_inline]] nce uint16x8_t duplicate_lane(uint16x8_t vec) {
return vdupq_laneq_u16(vec, lane); }
347[[gnu::always_inline]] nce uint16x8_t zip1(uint16x8_t a, uint16x8_t b) {
return vzip1q_u16(a, b); }
348[[gnu::always_inline]] nce uint16x8_t zip2(uint16x8_t a, uint16x8_t b) {
return vzip2q_u16(a, b); }
349[[gnu::always_inline]] nce uint16x8_t unzip1(uint16x8_t a, uint16x8_t b) {
return vuzp1q_u16(a, b); }
350[[gnu::always_inline]] nce uint16x8_t unzip2(uint16x8_t a, uint16x8_t b) {
return vuzp2q_u16(a, b); }
351[[gnu::always_inline]] nce uint16x8_t transpose_step_1(uint16x8_t a, uint16x8_t b) {
return vtrn1q_u16(a, b); }
352[[gnu::always_inline]] nce uint16x8_t transpose_step_2(uint16x8_t a, uint16x8_t b) {
return vtrn2q_u16(a, b); }
355[[gnu::always_inline]] nce uint32x4_t multiply_long_high(uint16x8_t a, uint16_t b) {
return vmull_high_n_u16(a, b); }
356template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint16x8_t copy_lane(uint16x8_t a, uint16x4_t b) {
return vcopyq_lane_u16(a, lane1, b, lane2); }
357template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint16x8_t copy_lane(uint16x8_t a, uint16x8_t b) {
return vcopyq_laneq_u16(a, lane1, b, lane2); }
358[[gnu::always_inline]] nce int16x4_t add_saturate(int16x4_t a, uint16x4_t b) {
return vuqadd_s16(a, b); }
359template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_lane(int16x4_t a, int16x8_t v) {
return vqdmull_laneq_s16(a, v, lane); }
360template <
int lane>[[gnu::always_inline]] nce int16x4_t multiply_double_saturate_high_lane(int16x4_t a, int16x8_t v) {
return vqdmulh_laneq_s16(a, v, lane); }
361template <
int lane>[[gnu::always_inline]] nce int16x4_t multiply_double_round_saturate_high_lane(int16x4_t a, int16x8_t v) {
return vqrdmulh_laneq_s16(a, v, lane); }
362[[gnu::always_inline]] nce int16x8_t add_narrow_high_high(int16x4_t r, int32x4_t a, int32x4_t b) {
return vaddhn_high_s32(r, a, b); }
363[[gnu::always_inline]] nce int16x8_t add_round_narrow_high_high(int16x4_t r, int32x4_t a, int32x4_t b) {
return vraddhn_high_s32(r, a, b); }
364[[gnu::always_inline]] nce int16x8_t subtract_narrow_high_high(int16x4_t r, int32x4_t a, int32x4_t b) {
return vsubhn_high_s32(r, a, b); }
365[[gnu::always_inline]] nce int16x8_t subtract_round_narrow_high_high(int16x4_t r, int32x4_t a, int32x4_t b) {
return vrsubhn_high_s32(r, a, b); }
366[[gnu::always_inline]] nce int16_t reduce_add(int16x4_t a) {
return vaddv_s16(a); }
367[[gnu::always_inline]] nce int32_t reduce_add_long(int16x4_t a) {
return vaddlv_s16(a); }
368[[gnu::always_inline]]
inline int16_t reduce_max(int16x4_t a) {
return vmaxv_s16(a); }
369[[gnu::always_inline]]
inline int16_t reduce_min(int16x4_t a) {
return vminv_s16(a); }
370[[gnu::always_inline]] nce uint16x4_t equal_to_zero(int16x4_t a) {
return vceqz_s16(a); }
371[[gnu::always_inline]] nce uint16x4_t greater_than_or_equal_to_zero(int16x4_t a) {
return vcgez_s16(a); }
372[[gnu::always_inline]] nce uint16x4_t less_than_or_equal_to_zero(int16x4_t a) {
return vclez_s16(a); }
373[[gnu::always_inline]] nce uint16x4_t greater_than_zero(int16x4_t a) {
return vcgtz_s16(a); }
374[[gnu::always_inline]] nce uint16x4_t less_than_zero(int16x4_t a) {
return vcltz_s16(a); }
375template <> [[gnu::always_inline]] nce float64x1_t reinterpret(int16x4_t a) {
return vreinterpret_f64_s16(a); }
376[[gnu::always_inline]] nce int16x4_t zip1(int16x4_t a, int16x4_t b) {
return vzip1_s16(a, b); }
377[[gnu::always_inline]] nce int16x4_t zip2(int16x4_t a, int16x4_t b) {
return vzip2_s16(a, b); }
378[[gnu::always_inline]] nce int16x4_t unzip1(int16x4_t a, int16x4_t b) {
return vuzp1_s16(a, b); }
379[[gnu::always_inline]] nce int16x4_t unzip2(int16x4_t a, int16x4_t b) {
return vuzp2_s16(a, b); }
380[[gnu::always_inline]] nce int16x4_t transpose_step_1(int16x4_t a, int16x4_t b) {
return vtrn1_s16(a, b); }
381[[gnu::always_inline]] nce int16x4_t transpose_step_2(int16x4_t a, int16x4_t b) {
return vtrn2_s16(a, b); }
382[[gnu::always_inline]] nce int16x4_t multiply_double_add_round_saturate_high(int16x4_t a, int16x4_t b, int16x4_t c) {
return vqrdmlah_s16(a, b, c); }
383[[gnu::always_inline]] nce int16x4_t multiply_double_subtract_round_saturate_high(int16x4_t a, int16x4_t b, int16x4_t c) {
return vqrdmlsh_s16(a, b, c); }
384template <
int lane>[[gnu::always_inline]] nce int16x4_t multiply_double_add_round_saturate_high_lane(int16x4_t a, int16x4_t b, int16x4_t v) {
return vqrdmlah_lane_s16(a, b, v, lane); }
385template <
int lane>[[gnu::always_inline]] nce int16x4_t multiply_double_subtract_round_saturate_high_lane(int16x4_t a, int16x4_t b, int16x4_t v) {
return vqrdmlsh_lane_s16(a, b, v, lane); }
386template <
int lane>[[gnu::always_inline]] nce int16x4_t multiply_add_lane(int16x4_t a, int16x4_t b, int16x8_t v) {
return vmla_laneq_s16(a, b, v, lane); }
387template <
int lane>[[gnu::always_inline]] nce int16x4_t multiply_subtract_lane(int16x4_t a, int16x4_t b, int16x8_t v) {
return vmls_laneq_s16(a, b, v, lane); }
388template <
int lane>[[gnu::always_inline]] nce int16x4_t multiply_double_add_round_saturate_high_lane(int16x4_t a, int16x4_t b, int16x8_t v) {
return vqrdmlah_laneq_s16(a, b, v, lane); }
389template <
int lane>[[gnu::always_inline]] nce int16x4_t multiply_double_subtract_round_saturate_high_lane(int16x4_t a, int16x4_t b, int16x8_t v) {
return vqrdmlsh_laneq_s16(a, b, v, lane); }
390template <
int lane>[[gnu::always_inline]] nce int16x4_t multiply_lane(int16x4_t a, int16x8_t v) {
return vmul_laneq_s16(a, v, lane); }
391template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_long_lane(int16x4_t a, int16x8_t v) {
return vmull_laneq_s16(a, v, lane); }
392template <
int n>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_high(int16x4_t r, int32x4_t a) {
return vshrn_high_n_s32(r, a, n); }
393template <
int n>[[gnu::always_inline]] nce int16x8_t shift_right_saturate_narrow_high(int16x4_t r, int32x4_t a) {
return vqshrn_high_n_s32(r, a, n); }
394template <
int n>[[gnu::always_inline]] nce int16x8_t shift_right_round_saturate_narrow_high(int16x4_t r, int32x4_t a) {
return vqrshrn_high_n_s32(r, a, n); }
395template <
int n>[[gnu::always_inline]] nce int16x8_t shift_right_round_narrow_high(int16x4_t r, int32x4_t a) {
return vrshrn_high_n_s32(r, a, n); }
396[[gnu::always_inline]] nce int16x8_t move_saturate_narrow_high(int16x4_t r, int32x4_t a) {
return vqmovn_high_s32(r, a); }
397template <
int lane1,
int lane2>[[gnu::always_inline]] nce int16x4_t copy_lane(int16x4_t a, int16x4_t b) {
return vcopy_lane_s16(a, lane1, b, lane2); }
398template <
int lane1,
int lane2>[[gnu::always_inline]] nce int16x4_t copy_lane(int16x4_t a, int16x8_t b) {
return vcopy_laneq_s16(a, lane1, b, lane2); }
399[[gnu::always_inline]] nce int16x8_t add_high(int16x8_t a, int8x16_t b) {
return vaddw_high_s8(a, b); }
400[[gnu::always_inline]] nce int16x8_t multiply_add_long_high(int16x8_t a, int8x16_t b, int8x16_t c) {
return vmlal_high_s8(a, b, c); }
401[[gnu::always_inline]] nce int16x8_t multiply_subtract_long_high(int16x8_t a, int8x16_t b, int8x16_t c) {
return vmlsl_high_s8(a, b, c); }
402[[gnu::always_inline]] nce int16x8_t subtract_high(int16x8_t a, int8x16_t b) {
return vsubw_high_s8(a, b); }
403[[gnu::always_inline]] nce int16x8_t subtract_absolute_add_high(int16x8_t a, int8x16_t b, int8x16_t c) {
return vabal_high_s8(a, b, c); }
404[[gnu::always_inline]] nce int16x8_t add_saturate(int16x8_t a, uint16x8_t b) {
return vuqaddq_s16(a, b); }
405template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_lane_high(int16x8_t a, int16x4_t v) {
return vqdmull_high_lane_s16(a, v, lane); }
406[[gnu::always_inline]] nce int32x4_t add_long_high(int16x8_t a, int16x8_t b) {
return vaddl_high_s16(a, b); }
407[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_high(int16x8_t a, int16x8_t b) {
return vqdmull_high_s16(a, b); }
408[[gnu::always_inline]] nce int32x4_t multiply_long_high(int16x8_t a, int16x8_t b) {
return vmull_high_s16(a, b); }
409template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_lane_high(int16x8_t a, int16x8_t v) {
return vqdmull_high_laneq_s16(a, v, lane); }
410template <
int lane>[[gnu::always_inline]] nce int16x8_t multiply_double_saturate_high_lane(int16x8_t a, int16x8_t v) {
return vqdmulhq_laneq_s16(a, v, lane); }
411template <
int lane>[[gnu::always_inline]] nce int16x8_t multiply_double_round_saturate_high_lane(int16x8_t a, int16x8_t v) {
return vqrdmulhq_laneq_s16(a, v, lane); }
412[[gnu::always_inline]] nce int32x4_t subtract_long_high(int16x8_t a, int16x8_t b) {
return vsubl_high_s16(a, b); }
413[[gnu::always_inline]] nce int32x4_t subtract_absolute_long_high(int16x8_t a, int16x8_t b) {
return vabdl_high_s16(a, b); }
414[[gnu::always_inline]] nce int16x8_t pairwise_add(int16x8_t a, int16x8_t b) {
return vpaddq_s16(a, b); }
415[[gnu::always_inline]] nce int16x8_t pairwise_max(int16x8_t a, int16x8_t b) {
return vpmaxq_s16(a, b); }
416[[gnu::always_inline]] nce int16x8_t pairwise_min(int16x8_t a, int16x8_t b) {
return vpminq_s16(a, b); }
417[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_high(int16x8_t a, int16_t b) {
return vqdmull_high_n_s16(a, b); }
418[[gnu::always_inline]] nce int16_t reduce_add(int16x8_t a) {
return vaddvq_s16(a); }
419[[gnu::always_inline]] nce int32_t reduce_add_long(int16x8_t a) {
return vaddlvq_s16(a); }
420[[gnu::always_inline]]
inline int16_t reduce_max(int16x8_t a) {
return vmaxvq_s16(a); }
421[[gnu::always_inline]]
inline int16_t reduce_min(int16x8_t a) {
return vminvq_s16(a); }
422[[gnu::always_inline]] nce uint16x8_t equal_to_zero(int16x8_t a) {
return vceqzq_s16(a); }
423[[gnu::always_inline]] nce uint16x8_t greater_than_or_equal_to_zero(int16x8_t a) {
return vcgezq_s16(a); }
424[[gnu::always_inline]] nce uint16x8_t less_than_or_equal_to_zero(int16x8_t a) {
return vclezq_s16(a); }
425[[gnu::always_inline]] nce uint16x8_t greater_than_zero(int16x8_t a) {
return vcgtzq_s16(a); }
426[[gnu::always_inline]] nce uint16x8_t less_than_zero(int16x8_t a) {
return vcltzq_s16(a); }
427template <
int n>[[gnu::always_inline]] nce int32x4_t shift_left_long_high(int16x8_t a) {
return vshll_high_n_s16(a, n); }
428template <> [[gnu::always_inline]] nce float64x2_t reinterpret(int16x8_t a) {
return vreinterpretq_f64_s16(a); }
429[[gnu::always_inline]] nce int32x4_t move_long_high(int16x8_t a) {
return vmovl_high_s16(a); }
430template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_long_lane_high(int16x8_t a, int16x4_t v) {
return vmull_high_lane_s16(a, v, lane); }
431template <
int lane>[[gnu::always_inline]] nce int16x8_t multiply_add_lane(int16x8_t a, int16x8_t b, int16x8_t v) {
return vmlaq_laneq_s16(a, b, v, lane); }
432template <
int lane>[[gnu::always_inline]] nce int16x8_t multiply_subtract_lane(int16x8_t a, int16x8_t b, int16x8_t v) {
return vmlsq_laneq_s16(a, b, v, lane); }
433template <
int lane>[[gnu::always_inline]] nce int16x8_t multiply_lane(int16x8_t a, int16x8_t v) {
return vmulq_laneq_s16(a, v, lane); }
434template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_long_lane_high(int16x8_t a, int16x8_t v) {
return vmull_high_laneq_s16(a, v, lane); }
435template <
int lane>[[gnu::always_inline]] nce int16x4_t duplicate_lane(int16x8_t vec) {
return vdup_laneq_s16(vec, lane); }
436template <
int lane>[[gnu::always_inline]] nce int16x8_t duplicate_lane(int16x8_t vec) {
return vdupq_laneq_s16(vec, lane); }
437[[gnu::always_inline]] nce int16x8_t zip1(int16x8_t a, int16x8_t b) {
return vzip1q_s16(a, b); }
438[[gnu::always_inline]] nce int16x8_t zip2(int16x8_t a, int16x8_t b) {
return vzip2q_s16(a, b); }
439[[gnu::always_inline]] nce int16x8_t unzip1(int16x8_t a, int16x8_t b) {
return vuzp1q_s16(a, b); }
440[[gnu::always_inline]] nce int16x8_t unzip2(int16x8_t a, int16x8_t b) {
return vuzp2q_s16(a, b); }
441[[gnu::always_inline]] nce int16x8_t transpose_step_1(int16x8_t a, int16x8_t b) {
return vtrn1q_s16(a, b); }
442[[gnu::always_inline]] nce int16x8_t transpose_step_2(int16x8_t a, int16x8_t b) {
return vtrn2q_s16(a, b); }
443template <
int lane>[[gnu::always_inline]] nce int16x8_t multiply_double_add_round_saturate_high_lane(int16x8_t a, int16x8_t b, int16x4_t v) {
return vqrdmlahq_lane_s16(a, b, v, lane); }
444template <
int lane>[[gnu::always_inline]] nce int16x8_t multiply_double_subtract_round_saturate_high_lane(int16x8_t a, int16x8_t b, int16x4_t v) {
return vqrdmlshq_lane_s16(a, b, v, lane); }
445[[gnu::always_inline]] nce int16x8_t multiply_double_add_round_saturate_high(int16x8_t a, int16x8_t b, int16x8_t c) {
return vqrdmlahq_s16(a, b, c); }
446[[gnu::always_inline]] nce int16x8_t multiply_double_subtract_round_saturate_high(int16x8_t a, int16x8_t b, int16x8_t c) {
return vqrdmlshq_s16(a, b, c); }
447template <
int lane>[[gnu::always_inline]] nce int16x8_t multiply_double_add_round_saturate_high_lane(int16x8_t a, int16x8_t b, int16x8_t v) {
return vqrdmlahq_laneq_s16(a, b, v, lane); }
448template <
int lane>[[gnu::always_inline]] nce int16x8_t multiply_double_subtract_round_saturate_high_lane(int16x8_t a, int16x8_t b, int16x8_t v) {
return vqrdmlshq_laneq_s16(a, b, v, lane); }
451[[gnu::always_inline]] nce int32x4_t multiply_long_high(int16x8_t a, int16_t b) {
return vmull_high_n_s16(a, b); }
452template <
int lane1,
int lane2>[[gnu::always_inline]] nce int16x8_t copy_lane(int16x8_t a, int16x4_t b) {
return vcopyq_lane_s16(a, lane1, b, lane2); }
453template <
int lane1,
int lane2>[[gnu::always_inline]] nce int16x8_t copy_lane(int16x8_t a, int16x8_t b) {
return vcopyq_laneq_s16(a, lane1, b, lane2); }
454template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_lane(int32x2_t a, int32x4_t v) {
return vqdmull_laneq_s32(a, v, lane); }
455template <
int lane>[[gnu::always_inline]] nce int32x2_t multiply_double_saturate_high_lane(int32x2_t a, int32x4_t v) {
return vqdmulh_laneq_s32(a, v, lane); }
456template <
int lane>[[gnu::always_inline]] nce int32x2_t multiply_double_round_saturate_high_lane(int32x2_t a, int32x4_t v) {
return vqrdmulh_laneq_s32(a, v, lane); }
457[[gnu::always_inline]] nce int32x2_t add_saturate(int32x2_t a, uint32x2_t b) {
return vuqadd_s32(a, b); }
458[[gnu::always_inline]] nce int32x4_t add_narrow_high_high(int32x2_t r, int64x2_t a, int64x2_t b) {
return vaddhn_high_s64(r, a, b); }
459[[gnu::always_inline]] nce int32x4_t add_round_narrow_high_high(int32x2_t r, int64x2_t a, int64x2_t b) {
return vraddhn_high_s64(r, a, b); }
460[[gnu::always_inline]] nce int32x4_t subtract_narrow_high_high(int32x2_t r, int64x2_t a, int64x2_t b) {
return vsubhn_high_s64(r, a, b); }
461[[gnu::always_inline]] nce int32x4_t subtract_round_narrow_high_high(int32x2_t r, int64x2_t a, int64x2_t b) {
return vrsubhn_high_s64(r, a, b); }
462[[gnu::always_inline]] nce int32_t reduce_add(int32x2_t a) {
return vaddv_s32(a); }
463[[gnu::always_inline]] nce int64_t reduce_add_long(int32x2_t a) {
return vaddlv_s32(a); }
464[[gnu::always_inline]]
inline int32_t reduce_max(int32x2_t a) {
return vmaxv_s32(a); }
465[[gnu::always_inline]]
inline int32_t reduce_min(int32x2_t a) {
return vminv_s32(a); }
466[[gnu::always_inline]] nce uint32x2_t equal_to_zero(int32x2_t a) {
return vceqz_s32(a); }
467[[gnu::always_inline]] nce uint32x2_t greater_than_or_equal_to_zero(int32x2_t a) {
return vcgez_s32(a); }
468[[gnu::always_inline]] nce uint32x2_t less_than_or_equal_to_zero(int32x2_t a) {
return vclez_s32(a); }
469[[gnu::always_inline]] nce uint32x2_t greater_than_zero(int32x2_t a) {
return vcgtz_s32(a); }
470[[gnu::always_inline]] nce uint32x2_t less_than_zero(int32x2_t a) {
return vcltz_s32(a); }
471template <> [[gnu::always_inline]] nce float64x1_t reinterpret(int32x2_t a) {
return vreinterpret_f64_s32(a); }
472[[gnu::always_inline]] nce int32x2_t zip1(int32x2_t a, int32x2_t b) {
return vzip1_s32(a, b); }
473[[gnu::always_inline]] nce int32x2_t zip2(int32x2_t a, int32x2_t b) {
return vzip2_s32(a, b); }
474[[gnu::always_inline]] nce int32x2_t unzip1(int32x2_t a, int32x2_t b) {
return vuzp1_s32(a, b); }
475[[gnu::always_inline]] nce int32x2_t unzip2(int32x2_t a, int32x2_t b) {
return vuzp2_s32(a, b); }
476[[gnu::always_inline]] nce int32x2_t transpose_step_1(int32x2_t a, int32x2_t b) {
return vtrn1_s32(a, b); }
477[[gnu::always_inline]] nce int32x2_t transpose_step_2(int32x2_t a, int32x2_t b) {
return vtrn2_s32(a, b); }
478[[gnu::always_inline]] nce int32x2_t multiply_double_add_round_saturate_high(int32x2_t a, int32x2_t b, int32x2_t c) {
return vqrdmlah_s32(a, b, c); }
479[[gnu::always_inline]] nce int32x2_t multiply_double_subtract_round_saturate_high(int32x2_t a, int32x2_t b, int32x2_t c) {
return vqrdmlsh_s32(a, b, c); }
480template <
int lane>[[gnu::always_inline]] nce int32x2_t multiply_double_add_round_saturate_high_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
return vqrdmlah_lane_s32(a, b, v, lane); }
481template <
int lane>[[gnu::always_inline]] nce int32x2_t multiply_double_subtract_round_saturate_high_lane(int32x2_t a, int32x2_t b, int32x2_t v) {
return vqrdmlsh_lane_s32(a, b, v, lane); }
482template <
int lane>[[gnu::always_inline]] nce int32x2_t multiply_add_lane(int32x2_t a, int32x2_t b, int32x4_t v) {
return vmla_laneq_s32(a, b, v, lane); }
483template <
int lane>[[gnu::always_inline]] nce int32x2_t multiply_subtract_lane(int32x2_t a, int32x2_t b, int32x4_t v) {
return vmls_laneq_s32(a, b, v, lane); }
484template <
int lane>[[gnu::always_inline]] nce int32x2_t multiply_double_add_round_saturate_high_lane(int32x2_t a, int32x2_t b, int32x4_t v) {
return vqrdmlah_laneq_s32(a, b, v, lane); }
485template <
int lane>[[gnu::always_inline]] nce int32x2_t multiply_double_subtract_round_saturate_high_lane(int32x2_t a, int32x2_t b, int32x4_t v) {
return vqrdmlsh_laneq_s32(a, b, v, lane); }
486template <
int lane>[[gnu::always_inline]] nce int32x2_t multiply_lane(int32x2_t a, int32x4_t v) {
return vmul_laneq_s32(a, v, lane); }
487template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_long_lane(int32x2_t a, int32x4_t v) {
return vmull_laneq_s32(a, v, lane); }
488template <
int n>[[gnu::always_inline]] nce int32x4_t shift_right_narrow_high(int32x2_t r, int64x2_t a) {
return vshrn_high_n_s64(r, a, n); }
489template <
int n>[[gnu::always_inline]] nce int32x4_t shift_right_saturate_narrow_high(int32x2_t r, int64x2_t a) {
return vqshrn_high_n_s64(r, a, n); }
490template <
int n>[[gnu::always_inline]] nce int32x4_t shift_right_round_saturate_narrow_high(int32x2_t r, int64x2_t a) {
return vqrshrn_high_n_s64(r, a, n); }
491template <
int n>[[gnu::always_inline]] nce int32x4_t shift_right_round_narrow_high(int32x2_t r, int64x2_t a) {
return vrshrn_high_n_s64(r, a, n); }
492[[gnu::always_inline]] nce int32x4_t move_saturate_narrow_high(int32x2_t r, int64x2_t a) {
return vqmovn_high_s64(r, a); }
493template <
int lane1,
int lane2>[[gnu::always_inline]] nce int32x2_t copy_lane(int32x2_t a, int32x2_t b) {
return vcopy_lane_s32(a, lane1, b, lane2); }
494template <
int lane1,
int lane2>[[gnu::always_inline]] nce int32x2_t copy_lane(int32x2_t a, int32x4_t b) {
return vcopy_laneq_s32(a, lane1, b, lane2); }
495template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_add_saturate_long_lane(int32x4_t a, int16x4_t b, int16x8_t v) {
return vqdmlal_laneq_s16(a, b, v, lane); }
496template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_subtract_saturate_long_lane(int32x4_t a, int16x4_t b, int16x8_t v) {
return vqdmlsl_laneq_s16(a, b, v, lane); }
497[[gnu::always_inline]] nce int32x4_t add_high(int32x4_t a, int16x8_t b) {
return vaddw_high_s16(a, b); }
498template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_add_saturate_long_lane_high(int32x4_t a, int16x8_t b, int16x4_t v) {
return vqdmlal_high_lane_s16(a, b, v, lane); }
499template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_subtract_saturate_long_lane_high(int32x4_t a, int16x8_t b, int16x4_t v) {
return vqdmlsl_high_lane_s16(a, b, v, lane); }
500[[gnu::always_inline]] nce int32x4_t multiply_add_long_high(int32x4_t a, int16x8_t b, int16x8_t c) {
return vmlal_high_s16(a, b, c); }
501[[gnu::always_inline]] nce int32x4_t multiply_subtract_long_high(int32x4_t a, int16x8_t b, int16x8_t c) {
return vmlsl_high_s16(a, b, c); }
502[[gnu::always_inline]] nce int32x4_t multiply_double_add_saturate_long_high(int32x4_t a, int16x8_t b, int16x8_t c) {
return vqdmlal_high_s16(a, b, c); }
503[[gnu::always_inline]] nce int32x4_t multiply_double_subtract_saturate_long_high(int32x4_t a, int16x8_t b, int16x8_t c) {
return vqdmlsl_high_s16(a, b, c); }
504template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_add_saturate_long_lane_high(int32x4_t a, int16x8_t b, int16x8_t v) {
return vqdmlal_high_laneq_s16(a, b, v, lane); }
505template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_subtract_saturate_long_lane_high(int32x4_t a, int16x8_t b, int16x8_t v) {
return vqdmlsl_high_laneq_s16(a, b, v, lane); }
506[[gnu::always_inline]] nce int32x4_t subtract_absolute_add_high(int32x4_t a, int16x8_t b, int16x8_t c) {
return vabal_high_s16(a, b, c); }
507[[gnu::always_inline]] nce int32x4_t multiply_double_add_saturate_long_high(int32x4_t a, int16x8_t b, int16_t c) {
return vqdmlal_high_n_s16(a, b, c); }
508[[gnu::always_inline]] nce int32x4_t multiply_double_subtract_saturate_long_high(int32x4_t a, int16x8_t b, int16_t c) {
return vqdmlsl_high_n_s16(a, b, c); }
509[[gnu::always_inline]] nce int32x4_t subtract_high(int32x4_t a, int16x8_t b) {
return vsubw_high_s16(a, b); }
510template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_lane_high(int32x4_t a, int32x2_t v) {
return vqdmull_high_lane_s32(a, v, lane); }
511[[gnu::always_inline]] nce int64x2_t add_long_high(int32x4_t a, int32x4_t b) {
return vaddl_high_s32(a, b); }
512[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_high(int32x4_t a, int32x4_t b) {
return vqdmull_high_s32(a, b); }
513[[gnu::always_inline]] nce int64x2_t multiply_long_high(int32x4_t a, int32x4_t b) {
return vmull_high_s32(a, b); }
514template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_lane_high(int32x4_t a, int32x4_t v) {
return vqdmull_high_laneq_s32(a, v, lane); }
515template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_high_lane(int32x4_t a, int32x4_t v) {
return vqdmulhq_laneq_s32(a, v, lane); }
516template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_round_saturate_high_lane(int32x4_t a, int32x4_t v) {
return vqrdmulhq_laneq_s32(a, v, lane); }
517[[gnu::always_inline]] nce int64x2_t subtract_long_high(int32x4_t a, int32x4_t b) {
return vsubl_high_s32(a, b); }
518[[gnu::always_inline]] nce int64x2_t subtract_absolute_long_high(int32x4_t a, int32x4_t b) {
return vabdl_high_s32(a, b); }
519[[gnu::always_inline]] nce int32x4_t pairwise_add(int32x4_t a, int32x4_t b) {
return vpaddq_s32(a, b); }
520[[gnu::always_inline]] nce int32x4_t pairwise_max(int32x4_t a, int32x4_t b) {
return vpmaxq_s32(a, b); }
521[[gnu::always_inline]] nce int32x4_t pairwise_min(int32x4_t a, int32x4_t b) {
return vpminq_s32(a, b); }
522[[gnu::always_inline]] nce int32x4_t add_saturate(int32x4_t a, uint32x4_t b) {
return vuqaddq_s32(a, b); }
523[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_high(int32x4_t a, int32_t b) {
return vqdmull_high_n_s32(a, b); }
524[[gnu::always_inline]] nce int32_t reduce_add(int32x4_t a) {
return vaddvq_s32(a); }
525[[gnu::always_inline]] nce int64_t reduce_add_long(int32x4_t a) {
return vaddlvq_s32(a); }
526[[gnu::always_inline]]
inline int32_t reduce_max(int32x4_t a) {
return vmaxvq_s32(a); }
527[[gnu::always_inline]]
inline int32_t reduce_min(int32x4_t a) {
return vminvq_s32(a); }
528[[gnu::always_inline]] nce uint32x4_t equal_to_zero(int32x4_t a) {
return vceqzq_s32(a); }
529[[gnu::always_inline]] nce uint32x4_t greater_than_or_equal_to_zero(int32x4_t a) {
return vcgezq_s32(a); }
530[[gnu::always_inline]] nce uint32x4_t less_than_or_equal_to_zero(int32x4_t a) {
return vclezq_s32(a); }
531[[gnu::always_inline]] nce uint32x4_t greater_than_zero(int32x4_t a) {
return vcgtzq_s32(a); }
532[[gnu::always_inline]] nce uint32x4_t less_than_zero(int32x4_t a) {
return vcltzq_s32(a); }
533template <
int n>[[gnu::always_inline]] nce int64x2_t shift_left_long_high(int32x4_t a) {
return vshll_high_n_s32(a, n); }
534template <> [[gnu::always_inline]] nce float64x2_t reinterpret(int32x4_t a) {
return vreinterpretq_f64_s32(a); }
535[[gnu::always_inline]] nce int64x2_t move_long_high(int32x4_t a) {
return vmovl_high_s32(a); }
536template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_add_long_lane(int32x4_t a, int16x4_t b, int16x8_t v) {
return vmlal_laneq_s16(a, b, v, lane); }
537template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_subtract_long_lane(int32x4_t a, int16x4_t b, int16x8_t v) {
return vmlsl_laneq_s16(a, b, v, lane); }
538template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_add_long_lane_high(int32x4_t a, int16x8_t b, int16x4_t v) {
return vmlal_high_lane_s16(a, b, v, lane); }
539template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_subtract_long_lane_high(int32x4_t a, int16x8_t b, int16x4_t v) {
return vmlsl_high_lane_s16(a, b, v, lane); }
540template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_add_long_lane_high(int32x4_t a, int16x8_t b, int16x8_t v) {
return vmlal_high_laneq_s16(a, b, v, lane); }
541template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_subtract_long_lane_high(int32x4_t a, int16x8_t b, int16x8_t v) {
return vmlsl_high_laneq_s16(a, b, v, lane); }
542[[gnu::always_inline]] nce int32x4_t multiply_add_long_high(int32x4_t a, int16x8_t b, int16_t c) {
return vmlal_high_n_s16(a, b, c); }
543[[gnu::always_inline]] nce int32x4_t multiply_subtract_long_high(int32x4_t a, int16x8_t b, int16_t c) {
return vmlsl_high_n_s16(a, b, c); }
544template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_long_lane_high(int32x4_t a, int32x2_t v) {
return vmull_high_lane_s32(a, v, lane); }
545template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_add_lane(int32x4_t a, int32x4_t b, int32x4_t v) {
return vmlaq_laneq_s32(a, b, v, lane); }
546template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_subtract_lane(int32x4_t a, int32x4_t b, int32x4_t v) {
return vmlsq_laneq_s32(a, b, v, lane); }
547template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_lane(int32x4_t a, int32x4_t v) {
return vmulq_laneq_s32(a, v, lane); }
548template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_long_lane_high(int32x4_t a, int32x4_t v) {
return vmull_high_laneq_s32(a, v, lane); }
549template <
int lane>[[gnu::always_inline]] nce int32x2_t duplicate_lane(int32x4_t vec) {
return vdup_laneq_s32(vec, lane); }
550template <
int lane>[[gnu::always_inline]] nce int32x4_t duplicate_lane_quad(int32x4_t vec) {
return vdupq_laneq_s32(vec, lane); }
551[[gnu::always_inline]] nce int32x4_t zip1(int32x4_t a, int32x4_t b) {
return vzip1q_s32(a, b); }
552[[gnu::always_inline]] nce int32x4_t zip2(int32x4_t a, int32x4_t b) {
return vzip2q_s32(a, b); }
553[[gnu::always_inline]] nce int32x4_t unzip1(int32x4_t a, int32x4_t b) {
return vuzp1q_s32(a, b); }
554[[gnu::always_inline]] nce int32x4_t unzip2(int32x4_t a, int32x4_t b) {
return vuzp2q_s32(a, b); }
555[[gnu::always_inline]] nce int32x4_t transpose_step_1(int32x4_t a, int32x4_t b) {
return vtrn1q_s32(a, b); }
556[[gnu::always_inline]] nce int32x4_t transpose_step_2(int32x4_t a, int32x4_t b) {
return vtrn2q_s32(a, b); }
557template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_add_round_saturate_high_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
return vqrdmlahq_lane_s32(a, b, v, lane); }
558template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_subtract_round_saturate_high_lane(int32x4_t a, int32x4_t b, int32x2_t v) {
return vqrdmlshq_lane_s32(a, b, v, lane); }
559[[gnu::always_inline]] nce int32x4_t multiply_double_add_round_saturate_high(int32x4_t a, int32x4_t b, int32x4_t c) {
return vqrdmlahq_s32(a, b, c); }
560[[gnu::always_inline]] nce int32x4_t multiply_double_subtract_round_saturate_high(int32x4_t a, int32x4_t b, int32x4_t c) {
return vqrdmlshq_s32(a, b, c); }
561template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_add_round_saturate_high_lane(int32x4_t a, int32x4_t b, int32x4_t v) {
return vqrdmlahq_laneq_s32(a, b, v, lane); }
562template <
int lane>[[gnu::always_inline]] nce int32x4_t multiply_double_subtract_round_saturate_high_lane(int32x4_t a, int32x4_t b, int32x4_t v) {
return vqrdmlshq_laneq_s32(a, b, v, lane); }
565[[gnu::always_inline]] nce int64x2_t multiply_long_high(int32x4_t a, int32_t b) {
return vmull_high_n_s32(a, b); }
566template <
int lane1,
int lane2>[[gnu::always_inline]] nce int32x4_t copy_lane(int32x4_t a, int32x2_t b) {
return vcopyq_lane_s32(a, lane1, b, lane2); }
567template <
int lane1,
int lane2>[[gnu::always_inline]] nce int32x4_t copy_lane(int32x4_t a, int32x4_t b) {
return vcopyq_laneq_s32(a, lane1, b, lane2); }
568[[gnu::always_inline]] nce uint64x1_t equal(uint64x1_t a, uint64x1_t b) {
return vceq_u64(a, b); }
569[[gnu::always_inline]] nce uint64x1_t add_saturate(uint64x1_t a, int64x1_t b) {
return vsqadd_u64(a, b); }
570[[gnu::always_inline]] nce uint64x1_t equal_to_zero(uint64x1_t a) {
return vceqz_u64(a); }
571[[gnu::always_inline]] nce uint64x1_t greater_than_or_equal(uint64x1_t a, uint64x1_t b) {
return vcge_u64(a, b); }
572[[gnu::always_inline]] nce uint64x1_t less_than_or_equal(uint64x1_t a, uint64x1_t b) {
return vcle_u64(a, b); }
573[[gnu::always_inline]] nce uint64x1_t greater_than(uint64x1_t a, uint64x1_t b) {
return vcgt_u64(a, b); }
574[[gnu::always_inline]] nce uint64x1_t less_than(uint64x1_t a, uint64x1_t b) {
return vclt_u64(a, b); }
575[[gnu::always_inline]] nce uint64x1_t compare_test_nonzero(uint64x1_t a, uint64x1_t b) {
return vtst_u64(a, b); }
576template <> [[gnu::always_inline]] nce float64x1_t convert(uint64x1_t a) {
return vcvt_f64_u64(a); }
577template <
int n>[[gnu::always_inline]] nce float64x1_t convert(uint64x1_t a) {
return vcvt_n_f64_u64(a, n); }
578template <> [[gnu::always_inline]] nce float64x1_t reinterpret(uint64x1_t a) {
return vreinterpret_f64_u64(a); }
579[[gnu::always_inline]] nce float64x1_t bitwise_select(uint64x1_t a, float64x1_t b, float64x1_t c) {
return vbsl_f64(a, b, c); }
580template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint64x1_t copy_lane(uint64x1_t a, uint64x1_t b) {
return vcopy_lane_u64(a, lane1, b, lane2); }
581template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint64x1_t copy_lane(uint64x1_t a, uint64x2_t b) {
return vcopy_laneq_u64(a, lane1, b, lane2); }
582template <
int lane>[[gnu::always_inline]] nce uint64_t duplicate_lane(uint64x1_t vec) {
return vdupd_lane_u64(vec, lane); }
583[[gnu::always_inline]] nce uint64x2_t pairwise_add(uint64x2_t a, uint64x2_t b) {
return vpaddq_u64(a, b); }
584[[gnu::always_inline]] nce uint64x2_t subtract_absolute_add_high(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
return vabal_high_u32(a, b, c); }
585[[gnu::always_inline]] nce uint64_t pairwise_add(uint64x2_t a) {
return vpaddd_u64(a); }
586[[gnu::always_inline]] nce uint64_t reduce_add(uint64x2_t a) {
return vaddvq_u64(a); }
587[[gnu::always_inline]] nce uint64x2_t equal(uint64x2_t a, uint64x2_t b) {
return vceqq_u64(a, b); }
588[[gnu::always_inline]] nce uint64x2_t add_high(uint64x2_t a, uint32x4_t b) {
return vaddw_high_u32(a, b); }
589[[gnu::always_inline]] nce uint64x2_t multiply_add_long_high(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
return vmlal_high_u32(a, b, c); }
590[[gnu::always_inline]] nce uint64x2_t multiply_subtract_long_high(uint64x2_t a, uint32x4_t b, uint32x4_t c) {
return vmlsl_high_u32(a, b, c); }
591[[gnu::always_inline]] nce uint64x2_t subtract_high(uint64x2_t a, uint32x4_t b) {
return vsubw_high_u32(a, b); }
592[[gnu::always_inline]] nce uint64x2_t add_saturate(uint64x2_t a, int64x2_t b) {
return vsqaddq_u64(a, b); }
593[[gnu::always_inline]] nce uint64x2_t equal_to_zero(uint64x2_t a) {
return vceqzq_u64(a); }
594[[gnu::always_inline]] nce uint64x2_t greater_than_or_equal(uint64x2_t a, uint64x2_t b) {
return vcgeq_u64(a, b); }
595[[gnu::always_inline]] nce uint64x2_t less_than_or_equal(uint64x2_t a, uint64x2_t b) {
return vcleq_u64(a, b); }
596[[gnu::always_inline]] nce uint64x2_t greater_than(uint64x2_t a, uint64x2_t b) {
return vcgtq_u64(a, b); }
597[[gnu::always_inline]] nce uint64x2_t less_than(uint64x2_t a, uint64x2_t b) {
return vcltq_u64(a, b); }
598[[gnu::always_inline]] nce uint64x2_t compare_test_nonzero(uint64x2_t a, uint64x2_t b) {
return vtstq_u64(a, b); }
599template <> [[gnu::always_inline]] nce float64x2_t convert(uint64x2_t a) {
return vcvtq_f64_u64(a); }
600template <
int n>[[gnu::always_inline]] nce float64x2_t convert(uint64x2_t a) {
return vcvtq_n_f64_u64(a, n); }
601template <
int lane>[[gnu::always_inline]] nce uint64x1_t duplicate_lane(uint64x2_t vec) {
return vdup_laneq_u64(vec, lane); }
602template <
int lane>[[gnu::always_inline]] nce uint64x2_t duplicate_lane_quad(uint64x2_t vec) {
return vdupq_laneq_u64(vec, lane); }
603template <
int lane>[[gnu::always_inline]] nce uint64_t duplicate_lane(uint64x2_t vec) {
return vdupd_laneq_u64(vec, lane); }
604[[gnu::always_inline]] nce uint64x2_t zip1(uint64x2_t a, uint64x2_t b) {
return vzip1q_u64(a, b); }
605[[gnu::always_inline]] nce uint64x2_t zip2(uint64x2_t a, uint64x2_t b) {
return vzip2q_u64(a, b); }
606[[gnu::always_inline]] nce uint64x2_t unzip1(uint64x2_t a, uint64x2_t b) {
return vuzp1q_u64(a, b); }
607[[gnu::always_inline]] nce uint64x2_t unzip2(uint64x2_t a, uint64x2_t b) {
return vuzp2q_u64(a, b); }
608[[gnu::always_inline]] nce uint64x2_t transpose_step_1(uint64x2_t a, uint64x2_t b) {
return vtrn1q_u64(a, b); }
609[[gnu::always_inline]] nce uint64x2_t transpose_step_2(uint64x2_t a, uint64x2_t b) {
return vtrn2q_u64(a, b); }
612template <
int lane>[[gnu::always_inline]] nce uint64x2_t multiply_add_long_lane(uint64x2_t a, uint32x2_t b, uint32x4_t v) {
return vmlal_laneq_u32(a, b, v, lane); }
613template <
int lane>[[gnu::always_inline]] nce uint64x2_t multiply_subtract_long_lane(uint64x2_t a, uint32x2_t b, uint32x4_t v) {
return vmlsl_laneq_u32(a, b, v, lane); }
614template <
int lane>[[gnu::always_inline]] nce uint64x2_t multiply_add_long_lane_high(uint64x2_t a, uint32x4_t b, uint32x2_t v) {
return vmlal_high_lane_u32(a, b, v, lane); }
615template <
int lane>[[gnu::always_inline]] nce uint64x2_t multiply_subtract_long_lane_high(uint64x2_t a, uint32x4_t b, uint32x2_t v) {
return vmlsl_high_lane_u32(a, b, v, lane); }
616template <
int lane>[[gnu::always_inline]] nce uint64x2_t multiply_add_long_lane_high(uint64x2_t a, uint32x4_t b, uint32x4_t v) {
return vmlal_high_laneq_u32(a, b, v, lane); }
617template <
int lane>[[gnu::always_inline]] nce uint64x2_t multiply_subtract_long_lane_high(uint64x2_t a, uint32x4_t b, uint32x4_t v) {
return vmlsl_high_laneq_u32(a, b, v, lane); }
618[[gnu::always_inline]] nce uint64x2_t multiply_add_long_high(uint64x2_t a, uint32x4_t b, uint32_t c) {
return vmlal_high_n_u32(a, b, c); }
619[[gnu::always_inline]] nce uint64x2_t multiply_subtract_long_high(uint64x2_t a, uint32x4_t b, uint32_t c) {
return vmlsl_high_n_u32(a, b, c); }
620[[gnu::always_inline]] nce float64x2_t bitwise_select(uint64x2_t a, float64x2_t b, float64x2_t c) {
return vbslq_f64(a, b, c); }
621template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint64x2_t copy_lane(uint64x2_t a, uint64x1_t b) {
return vcopyq_lane_u64(a, lane1, b, lane2); }
622template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint64x2_t copy_lane(uint64x2_t a, uint64x2_t b) {
return vcopyq_laneq_u64(a, lane1, b, lane2); }
623[[gnu::always_inline]] nce uint32x2_t add_saturate(uint32x2_t a, int32x2_t b) {
return vsqadd_u32(a, b); }
624[[gnu::always_inline]] nce uint32x4_t add_narrow_high_high(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
return vaddhn_high_u64(r, a, b); }
625[[gnu::always_inline]] nce uint32x4_t add_round_narrow_high_high(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
return vraddhn_high_u64(r, a, b); }
626[[gnu::always_inline]] nce uint32x4_t subtract_narrow_high_high(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
return vsubhn_high_u64(r, a, b); }
627[[gnu::always_inline]] nce uint32x4_t subtract_round_narrow_high_high(uint32x2_t r, uint64x2_t a, uint64x2_t b) {
return vrsubhn_high_u64(r, a, b); }
628[[gnu::always_inline]] nce uint32_t reduce_add(uint32x2_t a) {
return vaddv_u32(a); }
629[[gnu::always_inline]] nce uint64_t reduce_add_long(uint32x2_t a) {
return vaddlv_u32(a); }
630[[gnu::always_inline]]
inline uint32_t reduce_max(uint32x2_t a) {
return vmaxv_u32(a); }
631[[gnu::always_inline]]
inline uint32_t reduce_min(uint32x2_t a) {
return vminv_u32(a); }
632[[gnu::always_inline]] nce uint32x2_t equal_to_zero(uint32x2_t a) {
return vceqz_u32(a); }
633template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_right_narrow_high(uint32x2_t r, uint64x2_t a) {
return vshrn_high_n_u64(r, a, n); }
634template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_right_saturate_narrow_high(uint32x2_t r, uint64x2_t a) {
return vqshrn_high_n_u64(r, a, n); }
635template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_right_round_saturate_narrow_high(uint32x2_t r, uint64x2_t a) {
return vqrshrn_high_n_u64(r, a, n); }
636template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_right_round_narrow_high(uint32x2_t r, uint64x2_t a) {
return vrshrn_high_n_u64(r, a, n); }
637template <> [[gnu::always_inline]] nce float64x1_t reinterpret(uint32x2_t a) {
return vreinterpret_f64_u32(a); }
638[[gnu::always_inline]] nce uint32x4_t move_saturate_narrow_high(uint32x2_t r, uint64x2_t a) {
return vqmovn_high_u64(r, a); }
639template <
int lane>[[gnu::always_inline]] nce uint32x2_t multiply_add_lane(uint32x2_t a, uint32x2_t b, uint32x4_t v) {
return vmla_laneq_u32(a, b, v, lane); }
640template <
int lane>[[gnu::always_inline]] nce uint32x2_t multiply_subtract_lane(uint32x2_t a, uint32x2_t b, uint32x4_t v) {
return vmls_laneq_u32(a, b, v, lane); }
641[[gnu::always_inline]] nce uint32x2_t zip1(uint32x2_t a, uint32x2_t b) {
return vzip1_u32(a, b); }
642[[gnu::always_inline]] nce uint32x2_t zip2(uint32x2_t a, uint32x2_t b) {
return vzip2_u32(a, b); }
643[[gnu::always_inline]] nce uint32x2_t unzip1(uint32x2_t a, uint32x2_t b) {
return vuzp1_u32(a, b); }
644[[gnu::always_inline]] nce uint32x2_t unzip2(uint32x2_t a, uint32x2_t b) {
return vuzp2_u32(a, b); }
645[[gnu::always_inline]] nce uint32x2_t transpose_step_1(uint32x2_t a, uint32x2_t b) {
return vtrn1_u32(a, b); }
646[[gnu::always_inline]] nce uint32x2_t transpose_step_2(uint32x2_t a, uint32x2_t b) {
return vtrn2_u32(a, b); }
647template <
int lane>[[gnu::always_inline]] nce uint32x2_t multiply_lane(uint32x2_t a, uint32x4_t v) {
return vmul_laneq_u32(a, v, lane); }
648template <
int lane>[[gnu::always_inline]] nce uint64x2_t multiply_long_lane(uint32x2_t a, uint32x4_t v) {
return vmull_laneq_u32(a, v, lane); }
649template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_right_saturate_narrow_unsigned_high(uint32x2_t r, int64x2_t a) {
return vqshrun_high_n_s64(r, a, n); }
650template <
int n>[[gnu::always_inline]] nce uint32x4_t shift_right_unsigned_saturate_narrow_high(uint32x2_t r, int64x2_t a) {
return vqrshrun_high_n_s64(r, a, n); }
651[[gnu::always_inline]] nce uint32x4_t move_unsigned_saturate_narrow_high(uint32x2_t r, int64x2_t a) {
return vqmovun_high_s64(r, a); }
652template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint32x2_t copy_lane(uint32x2_t a, uint32x2_t b) {
return vcopy_lane_u32(a, lane1, b, lane2); }
653template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint32x2_t copy_lane(uint32x2_t a, uint32x4_t b) {
return vcopy_laneq_u32(a, lane1, b, lane2); }
654[[gnu::always_inline]] nce uint32x4_t add_high(uint32x4_t a, uint16x8_t b) {
return vaddw_high_u16(a, b); }
655[[gnu::always_inline]] nce uint32x4_t multiply_add_long_high(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
return vmlal_high_u16(a, b, c); }
656[[gnu::always_inline]] nce uint32x4_t multiply_subtract_long_high(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
return vmlsl_high_u16(a, b, c); }
657[[gnu::always_inline]] nce uint32x4_t subtract_high(uint32x4_t a, uint16x8_t b) {
return vsubw_high_u16(a, b); }
658[[gnu::always_inline]] nce uint32x4_t subtract_absolute_add_high(uint32x4_t a, uint16x8_t b, uint16x8_t c) {
return vabal_high_u16(a, b, c); }
659[[gnu::always_inline]] nce uint32x4_t add_saturate(uint32x4_t a, int32x4_t b) {
return vsqaddq_u32(a, b); }
660[[gnu::always_inline]] nce uint64x2_t add_long_high(uint32x4_t a, uint32x4_t b) {
return vaddl_high_u32(a, b); }
661[[gnu::always_inline]] nce uint64x2_t multiply_long_high(uint32x4_t a, uint32x4_t b) {
return vmull_high_u32(a, b); }
662[[gnu::always_inline]] nce uint64x2_t subtract_long_high(uint32x4_t a, uint32x4_t b) {
return vsubl_high_u32(a, b); }
663[[gnu::always_inline]] nce uint64x2_t subtract_absolute_long_high(uint32x4_t a, uint32x4_t b) {
return vabdl_high_u32(a, b); }
664[[gnu::always_inline]] nce uint32x4_t pairwise_add(uint32x4_t a, uint32x4_t b) {
return vpaddq_u32(a, b); }
665[[gnu::always_inline]] nce uint32x4_t pairwise_max(uint32x4_t a, uint32x4_t b) {
return vpmaxq_u32(a, b); }
666[[gnu::always_inline]] nce uint32x4_t pairwise_min(uint32x4_t a, uint32x4_t b) {
return vpminq_u32(a, b); }
667[[gnu::always_inline]] nce uint32_t reduce_add(uint32x4_t a) {
return vaddvq_u32(a); }
668[[gnu::always_inline]] nce uint64_t reduce_add_long(uint32x4_t a) {
return vaddlvq_u32(a); }
669[[gnu::always_inline]]
inline uint32_t reduce_max(uint32x4_t a) {
return vmaxvq_u32(a); }
670[[gnu::always_inline]]
inline uint32_t reduce_min(uint32x4_t a) {
return vminvq_u32(a); }
671[[gnu::always_inline]] nce uint32x4_t equal_to_zero(uint32x4_t a) {
return vceqzq_u32(a); }
672template <
int n>[[gnu::always_inline]] nce uint64x2_t shift_left_long_high(uint32x4_t a) {
return vshll_high_n_u32(a, n); }
673template <> [[gnu::always_inline]] nce float64x2_t reinterpret(uint32x4_t a) {
return vreinterpretq_f64_u32(a); }
674[[gnu::always_inline]] nce uint64x2_t move_long_high(uint32x4_t a) {
return vmovl_high_u32(a); }
675template <
int lane>[[gnu::always_inline]] nce uint32x4_t multiply_add_long_lane(uint32x4_t a, uint16x4_t b, uint16x8_t v) {
return vmlal_laneq_u16(a, b, v, lane); }
676template <
int lane>[[gnu::always_inline]] nce uint32x4_t multiply_subtract_long_lane(uint32x4_t a, uint16x4_t b, uint16x8_t v) {
return vmlsl_laneq_u16(a, b, v, lane); }
677template <
int lane>[[gnu::always_inline]] nce uint32x4_t multiply_add_long_lane_high(uint32x4_t a, uint16x8_t b, uint16x4_t v) {
return vmlal_high_lane_u16(a, b, v, lane); }
678template <
int lane>[[gnu::always_inline]] nce uint32x4_t multiply_subtract_long_lane_high(uint32x4_t a, uint16x8_t b, uint16x4_t v) {
return vmlsl_high_lane_u16(a, b, v, lane); }
679template <
int lane>[[gnu::always_inline]] nce uint32x4_t multiply_add_long_lane_high(uint32x4_t a, uint16x8_t b, uint16x8_t v) {
return vmlal_high_laneq_u16(a, b, v, lane); }
680template <
int lane>[[gnu::always_inline]] nce uint32x4_t multiply_subtract_long_lane_high(uint32x4_t a, uint16x8_t b, uint16x8_t v) {
return vmlsl_high_laneq_u16(a, b, v, lane); }
681[[gnu::always_inline]] nce uint32x4_t multiply_add_long_high(uint32x4_t a, uint16x8_t b, uint16_t c) {
return vmlal_high_n_u16(a, b, c); }
682[[gnu::always_inline]] nce uint32x4_t multiply_subtract_long_high(uint32x4_t a, uint16x8_t b, uint16_t c) {
return vmlsl_high_n_u16(a, b, c); }
683template <
int lane>[[gnu::always_inline]] nce uint64x2_t multiply_long_lane_high(uint32x4_t a, uint32x2_t v) {
return vmull_high_lane_u32(a, v, lane); }
684template <
int lane>[[gnu::always_inline]] nce uint32x4_t multiply_add_lane(uint32x4_t a, uint32x4_t b, uint32x4_t v) {
return vmlaq_laneq_u32(a, b, v, lane); }
685template <
int lane>[[gnu::always_inline]] nce uint32x4_t multiply_subtract_lane(uint32x4_t a, uint32x4_t b, uint32x4_t v) {
return vmlsq_laneq_u32(a, b, v, lane); }
686template <
int lane>[[gnu::always_inline]] nce uint32x4_t multiply_lane(uint32x4_t a, uint32x4_t v) {
return vmulq_laneq_u32(a, v, lane); }
687template <
int lane>[[gnu::always_inline]] nce uint64x2_t multiply_long_lane_high(uint32x4_t a, uint32x4_t v) {
return vmull_high_laneq_u32(a, v, lane); }
688template <
int lane>[[gnu::always_inline]] nce uint32x2_t duplicate_lane(uint32x4_t vec) {
return vdup_laneq_u32(vec, lane); }
689template <
int lane>[[gnu::always_inline]] nce uint32x4_t duplicate_lane_quad(uint32x4_t vec) {
return vdupq_laneq_u32(vec, lane); }
690[[gnu::always_inline]] nce uint32x4_t zip1(uint32x4_t a, uint32x4_t b) {
return vzip1q_u32(a, b); }
691[[gnu::always_inline]] nce uint32x4_t zip2(uint32x4_t a, uint32x4_t b) {
return vzip2q_u32(a, b); }
692[[gnu::always_inline]] nce uint32x4_t unzip1(uint32x4_t a, uint32x4_t b) {
return vuzp1q_u32(a, b); }
693[[gnu::always_inline]] nce uint32x4_t unzip2(uint32x4_t a, uint32x4_t b) {
return vuzp2q_u32(a, b); }
694[[gnu::always_inline]] nce uint32x4_t transpose_step_1(uint32x4_t a, uint32x4_t b) {
return vtrn1q_u32(a, b); }
695[[gnu::always_inline]] nce uint32x4_t transpose_step_2(uint32x4_t a, uint32x4_t b) {
return vtrn2q_u32(a, b); }
700[[gnu::always_inline]] nce uint64x2_t multiply_long_high(uint32x4_t a, uint32_t b) {
return vmull_high_n_u32(a, b); }
701template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint32x4_t copy_lane(uint32x4_t a, uint32x2_t b) {
return vcopyq_lane_u32(a, lane1, b, lane2); }
702template <
int lane1,
int lane2>[[gnu::always_inline]] nce uint32x4_t copy_lane(uint32x4_t a, uint32x4_t b) {
return vcopyq_laneq_u32(a, lane1, b, lane2); }
703[[gnu::always_inline]] nce float16x8_t convert_high(float16x4_t r, float32x4_t a) {
return vcvt_high_f16_f32(r, a); }
704template <> [[gnu::always_inline]] nce float64x1_t reinterpret(float16x4_t a) {
return vreinterpret_f64_f16(a); }
705[[gnu::always_inline]] nce float16x4_t round_using_current_mode(float16x4_t a) {
return vrndi_f16(a); }
706[[gnu::always_inline]] nce float16x4_t square_root(float16x4_t a) {
return vsqrt_f16(a); }
707[[gnu::always_inline]] nce float16x4_t divide(float16x4_t a, float16x4_t b) {
return vdiv_f16(a, b); }
708[[gnu::always_inline]]
inline float16_t reduce_max(float16x4_t a) {
return vmaxv_f16(a); }
709[[gnu::always_inline]]
inline float16_t reduce_max_strict(float16x4_t a) {
return vmaxnmv_f16(a); }
710[[gnu::always_inline]]
inline float16_t reduce_min(float16x4_t a) {
return vminv_f16(a); }
711[[gnu::always_inline]]
inline float16_t reduce_min_strict(float16x4_t a) {
return vminnmv_f16(a); }
712[[gnu::always_inline]] nce float16x4_t multiply_extended(float16x4_t a, float16x4_t b) {
return vmulx_f16(a, b); }
713template <
int lane>[[gnu::always_inline]] nce float16x4_t multiply_extended_lane(float16x4_t a, float16x4_t v) {
return vmulx_lane_f16(a, v, lane); }
714template <
int lane>[[gnu::always_inline]] nce float16x4_t multiply_add_fused_lane(float16x4_t a, float16x4_t b, float16x4_t v) {
return vfma_lane_f16(a, b, v, lane); }
715template <
int lane>[[gnu::always_inline]] nce float16x4_t multiply_subtract_fused_lane(float16x4_t a, float16x4_t b, float16x4_t v) {
return vfms_lane_f16(a, b, v, lane); }
716template <
int lane>[[gnu::always_inline]] nce float16x4_t multiply_add_fused_lane(float16x4_t a, float16x4_t b, float16x8_t v) {
return vfma_laneq_f16(a, b, v, lane); }
717template <
int lane>[[gnu::always_inline]] nce float16x4_t multiply_subtract_fused_lane(float16x4_t a, float16x4_t b, float16x8_t v) {
return vfms_laneq_f16(a, b, v, lane); }
718[[gnu::always_inline]] nce float16x4_t multiply_add_fused(float16x4_t a, float16x4_t b, float16_t n) {
return vfma_n_f16(a, b, n); }
719[[gnu::always_inline]] nce float16x4_t multiply_subtract_fused(float16x4_t a, float16x4_t b, float16_t n) {
return vfms_n_f16(a, b, n); }
720[[gnu::always_inline]] nce float16x4_t pairwise_max_strict(float16x4_t a, float16x4_t b) {
return vpmaxnm_f16(a, b); }
721[[gnu::always_inline]] nce float16x4_t pairwise_min_strict(float16x4_t a, float16x4_t b) {
return vpminnm_f16(a, b); }
722[[gnu::always_inline]] nce float16x4_t zip1(float16x4_t a, float16x4_t b) {
return vzip1_f16(a, b); }
723[[gnu::always_inline]] nce float16x4_t zip2(float16x4_t a, float16x4_t b) {
return vzip2_f16(a, b); }
724[[gnu::always_inline]] nce float16x4_t unzip1(float16x4_t a, float16x4_t b) {
return vuzp1_f16(a, b); }
725[[gnu::always_inline]] nce float16x4_t unzip2(float16x4_t a, float16x4_t b) {
return vuzp2_f16(a, b); }
726[[gnu::always_inline]] nce float16x4_t transpose_step_1(float16x4_t a, float16x4_t b) {
return vtrn1_f16(a, b); }
727[[gnu::always_inline]] nce float16x4_t transpose_step_2(float16x4_t a, float16x4_t b) {
return vtrn2_f16(a, b); }
728template <
int lane>[[gnu::always_inline]] nce float16x4_t multiply_lane(float16x4_t a, float16x8_t v) {
return vmul_laneq_f16(a, v, lane); }
729template <
int lane>[[gnu::always_inline]] nce float16x4_t multiply_extended_lane(float16x4_t a, float16x8_t v) {
return vmulx_laneq_f16(a, v, lane); }
730[[gnu::always_inline]] nce float16x4_t multiply_extended(float16x4_t a, float16_t n) {
return vmulx_n_f16(a, n); }
731[[gnu::always_inline]] nce float32x4_t convert_high(float16x8_t a) {
return vcvt_high_f32_f16(a); }
732template <> [[gnu::always_inline]] nce float64x2_t reinterpret(float16x8_t a) {
return vreinterpretq_f64_f16(a); }
733[[gnu::always_inline]] nce float16x8_t round_using_current_mode(float16x8_t a) {
return vrndiq_f16(a); }
734[[gnu::always_inline]] nce float16x8_t square_root(float16x8_t a) {
return vsqrtq_f16(a); }
735[[gnu::always_inline]] nce float16x8_t divide(float16x8_t a, float16x8_t b) {
return vdivq_f16(a, b); }
736[[gnu::always_inline]]
inline float16_t reduce_max(float16x8_t a) {
return vmaxvq_f16(a); }
737[[gnu::always_inline]]
inline float16_t reduce_max_strict(float16x8_t a) {
return vmaxnmvq_f16(a); }
738[[gnu::always_inline]]
inline float16_t reduce_min(float16x8_t a) {
return vminvq_f16(a); }
739[[gnu::always_inline]]
inline float16_t reduce_min_strict(float16x8_t a) {
return vminnmvq_f16(a); }
740template <
int lane>[[gnu::always_inline]] nce float16x8_t multiply_extended_lane(float16x8_t a, float16x4_t v) {
return vmulxq_lane_f16(a, v, lane); }
741template <
int lane>[[gnu::always_inline]] nce float16x8_t multiply_lane(float16x8_t a, float16x8_t v) {
return vmulq_laneq_f16(a, v, lane); }
742[[gnu::always_inline]] nce float16x8_t multiply_extended(float16x8_t a, float16x8_t b) {
return vmulxq_f16(a, b); }
743template <
int lane>[[gnu::always_inline]] nce float16x8_t multiply_extended_lane(float16x8_t a, float16x8_t v) {
return vmulxq_laneq_f16(a, v, lane); }
744template <
int lane>[[gnu::always_inline]] nce float16x8_t multiply_add_fused_lane(float16x8_t a, float16x8_t b, float16x4_t v) {
return vfmaq_lane_f16(a, b, v, lane); }
745template <
int lane>[[gnu::always_inline]] nce float16x8_t multiply_subtract_fused_lane(float16x8_t a, float16x8_t b, float16x4_t v) {
return vfmsq_lane_f16(a, b, v, lane); }
746template <
int lane>[[gnu::always_inline]] nce float16x8_t multiply_add_fused_lane(float16x8_t a, float16x8_t b, float16x8_t v) {
return vfmaq_laneq_f16(a, b, v, lane); }
747template <
int lane>[[gnu::always_inline]] nce float16x8_t multiply_subtract_fused_lane(float16x8_t a, float16x8_t b, float16x8_t v) {
return vfmsq_laneq_f16(a, b, v, lane); }
748[[gnu::always_inline]] nce float16x8_t multiply_add_fused(float16x8_t a, float16x8_t b, float16_t n) {
return vfmaq_n_f16(a, b, n); }
749[[gnu::always_inline]] nce float16x8_t multiply_subtract_fused(float16x8_t a, float16x8_t b, float16_t n) {
return vfmsq_n_f16(a, b, n); }
750[[gnu::always_inline]] nce float16x8_t pairwise_add(float16x8_t a, float16x8_t b) {
return vpaddq_f16(a, b); }
751[[gnu::always_inline]] nce float16x8_t pairwise_max(float16x8_t a, float16x8_t b) {
return vpmaxq_f16(a, b); }
752[[gnu::always_inline]] nce float16x8_t pairwise_max_strict(float16x8_t a, float16x8_t b) {
return vpmaxnmq_f16(a, b); }
753[[gnu::always_inline]] nce float16x8_t pairwise_min(float16x8_t a, float16x8_t b) {
return vpminq_f16(a, b); }
754[[gnu::always_inline]] nce float16x8_t pairwise_min_strict(float16x8_t a, float16x8_t b) {
return vpminnmq_f16(a, b); }
755[[gnu::always_inline]] nce float16x8_t zip1(float16x8_t a, float16x8_t b) {
return vzip1q_f16(a, b); }
756[[gnu::always_inline]] nce float16x8_t zip2(float16x8_t a, float16x8_t b) {
return vzip2q_f16(a, b); }
757[[gnu::always_inline]] nce float16x8_t unzip1(float16x8_t a, float16x8_t b) {
return vuzp1q_f16(a, b); }
758[[gnu::always_inline]] nce float16x8_t unzip2(float16x8_t a, float16x8_t b) {
return vuzp2q_f16(a, b); }
759[[gnu::always_inline]] nce float16x8_t transpose_step_1(float16x8_t a, float16x8_t b) {
return vtrn1q_f16(a, b); }
760[[gnu::always_inline]] nce float16x8_t transpose_step_2(float16x8_t a, float16x8_t b) {
return vtrn2q_f16(a, b); }
761[[gnu::always_inline]] nce float16x8_t multiply_extended(float16x8_t a, float16_t n) {
return vmulxq_n_f16(a, n); }
762template <
int lane>[[gnu::always_inline]] nce float16x4_t duplicate_lane(float16x8_t vec) {
return vdup_laneq_f16(vec, lane); }
763template <
int lane>[[gnu::always_inline]] nce float16x8_t duplicate_lane(float16x8_t vec) {
return vdupq_laneq_f16(vec, lane); }
764[[gnu::always_inline]] nce float32x2_t multiply_extended(float32x2_t a, float32x2_t b) {
return vmulx_f32(a, b); }
765template <
int lane>[[gnu::always_inline]] nce float32x2_t multiply_extended_lane(float32x2_t a, float32x2_t v) {
return vmulx_lane_f32(a, v, lane); }
766template <
int lane>[[gnu::always_inline]] nce float32x2_t multiply_add_fused_lane(float32x2_t a, float32x2_t b, float32x2_t v) {
return vfma_lane_f32(a, b, v, lane); }
767template <
int lane>[[gnu::always_inline]] nce float32x2_t multiply_subtract_fused_lane(float32x2_t a, float32x2_t b, float32x2_t v) {
return vfms_lane_f32(a, b, v, lane); }
768template <
int lane>[[gnu::always_inline]] nce float32x2_t multiply_add_fused_lane(float32x2_t a, float32x2_t b, float32x4_t v) {
return vfma_laneq_f32(a, b, v, lane); }
769template <
int lane>[[gnu::always_inline]] nce float32x2_t multiply_subtract_fused_lane(float32x2_t a, float32x2_t b, float32x4_t v) {
return vfms_laneq_f32(a, b, v, lane); }
770[[gnu::always_inline]] nce float32x2_t divide(float32x2_t a, float32x2_t b) {
return vdiv_f32(a, b); }
771[[gnu::always_inline]] nce float32x2_t square_root(float32x2_t a) {
return vsqrt_f32(a); }
772[[gnu::always_inline]] nce float32_t pairwise_add(float32x2_t a) {
return vpadds_f32(a); }
773[[gnu::always_inline]] nce float32_t pairwise_max(float32x2_t a) {
return vpmaxs_f32(a); }
774[[gnu::always_inline]] nce float32_t pairwise_max_strict(float32x2_t a) {
return vpmaxnms_f32(a); }
775[[gnu::always_inline]] nce float32_t pairwise_min(float32x2_t a) {
return vpmins_f32(a); }
776[[gnu::always_inline]] nce float32x2_t pairwise_max_strict(float32x2_t a, float32x2_t b) {
return vpmaxnm_f32(a, b); }
777[[gnu::always_inline]] nce float32x2_t pairwise_min_strict(float32x2_t a, float32x2_t b) {
return vpminnm_f32(a, b); }
778template <
int lane>[[gnu::always_inline]] nce float32x2_t multiply_extended_lane(float32x2_t a, float32x4_t v) {
return vmulx_laneq_f32(a, v, lane); }
779[[gnu::always_inline]] nce float32_t pairwise_min_strict(float32x2_t a) {
return vpminnms_f32(a); }
780[[gnu::always_inline]] nce float32_t reduce_add(float32x2_t a) {
return vaddv_f32(a); }
781[[gnu::always_inline]]
inline float32_t reduce_max(float32x2_t a) {
return vmaxv_f32(a); }
782[[gnu::always_inline]]
inline float32_t reduce_min(float32x2_t a) {
return vminv_f32(a); }
783[[gnu::always_inline]]
inline float32_t reduce_max_strict(float32x2_t a) {
return vmaxnmv_f32(a); }
784[[gnu::always_inline]]
inline float32_t reduce_min_strict(float32x2_t a) {
return vminnmv_f32(a); }
785[[gnu::always_inline]] nce uint32x2_t equal_to_zero(float32x2_t a) {
return vceqz_f32(a); }
786[[gnu::always_inline]] nce uint32x2_t greater_than_or_equal_to_zero(float32x2_t a) {
return vcgez_f32(a); }
787[[gnu::always_inline]] nce uint32x2_t less_than_or_equal_to_zero(float32x2_t a) {
return vclez_f32(a); }
788[[gnu::always_inline]] nce uint32x2_t greater_than_zero(float32x2_t a) {
return vcgtz_f32(a); }
789[[gnu::always_inline]] nce uint32x2_t less_than_zero(float32x2_t a) {
return vcltz_f32(a); }
790[[gnu::always_inline]] nce float32x4_t convert_high(float32x2_t r, float64x2_t a) {
return vcvt_high_f32_f64(r, a); }
791[[gnu::always_inline]] nce float64x2_t convert(float32x2_t a) {
return vcvt_f64_f32(a); }
792[[gnu::always_inline]] nce float32x4_t convert_round_to_odd_high(float32x2_t r, float64x2_t a) {
return vcvtx_high_f32_f64(r, a); }
793template <> [[gnu::always_inline]] nce float64x1_t reinterpret(float32x2_t a) {
return vreinterpret_f64_f32(a); }
794template <
int lane>[[gnu::always_inline]] nce float32x2_t multiply_add_lane(float32x2_t a, float32x2_t b, float32x4_t v) {
return vmla_laneq_f32(a, b, v, lane); }
795template <
int lane>[[gnu::always_inline]] nce float32x2_t multiply_subtract_lane(float32x2_t a, float32x2_t b, float32x4_t v) {
return vmls_laneq_f32(a, b, v, lane); }
796[[gnu::always_inline]] nce float32x2_t round_to_32bit_integer(float32x2_t a) {
return vrnd32z_f32(a); }
797[[gnu::always_inline]] nce float32x2_t round_to_64bit_integer(float32x2_t a) {
return vrnd64z_f32(a); }
798[[gnu::always_inline]] nce float32x2_t round_to_32bit_integer_using_current_mode(float32x2_t a) {
return vrnd32x_f32(a); }
799[[gnu::always_inline]] nce float32x2_t round_to_64bit_integer_using_current_mode(float32x2_t a) {
return vrnd64x_f32(a); }
800[[gnu::always_inline]] nce float32x2_t zip1(float32x2_t a, float32x2_t b) {
return vzip1_f32(a, b); }
801[[gnu::always_inline]] nce float32x2_t zip2(float32x2_t a, float32x2_t b) {
return vzip2_f32(a, b); }
802[[gnu::always_inline]] nce float32x2_t unzip1(float32x2_t a, float32x2_t b) {
return vuzp1_f32(a, b); }
803[[gnu::always_inline]] nce float32x2_t unzip2(float32x2_t a, float32x2_t b) {
return vuzp2_f32(a, b); }
804[[gnu::always_inline]] nce float32x2_t transpose_step_1(float32x2_t a, float32x2_t b) {
return vtrn1_f32(a, b); }
805[[gnu::always_inline]] nce float32x2_t transpose_step_2(float32x2_t a, float32x2_t b) {
return vtrn2_f32(a, b); }
806template <
int lane>[[gnu::always_inline]] nce float32x2_t multiply_lane(float32x2_t a, float32x4_t v) {
return vmul_laneq_f32(a, v, lane); }
807template <
int lane1,
int lane2>[[gnu::always_inline]] nce float32x2_t copy_lane(float32x2_t a, float32x2_t b) {
return vcopy_lane_f32(a, lane1, b, lane2); }
808template <
int lane1,
int lane2>[[gnu::always_inline]] nce float32x2_t copy_lane(float32x2_t a, float32x4_t b) {
return vcopy_laneq_f32(a, lane1, b, lane2); }
809template <
int lane>[[gnu::always_inline]] nce float32x4_t multiply_extended_lane(float32x4_t a, float32x2_t v) {
return vmulxq_lane_f32(a, v, lane); }
810[[gnu::always_inline]] nce float32x4_t multiply_extended(float32x4_t a, float32x4_t b) {
return vmulxq_f32(a, b); }
811template <
int lane>[[gnu::always_inline]] nce float32x4_t multiply_extended_lane(float32x4_t a, float32x4_t v) {
return vmulxq_laneq_f32(a, v, lane); }
812template <
int lane>[[gnu::always_inline]] nce float32x4_t multiply_add_fused_lane(float32x4_t a, float32x4_t b, float32x2_t v) {
return vfmaq_lane_f32(a, b, v, lane); }
813template <
int lane>[[gnu::always_inline]] nce float32x4_t multiply_subtract_fused_lane(float32x4_t a, float32x4_t b, float32x2_t v) {
return vfmsq_lane_f32(a, b, v, lane); }
814template <
int lane>[[gnu::always_inline]] nce float32x4_t multiply_add_fused_lane(float32x4_t a, float32x4_t b, float32x4_t v) {
return vfmaq_laneq_f32(a, b, v, lane); }
815template <
int lane>[[gnu::always_inline]] nce float32x4_t multiply_subtract_fused_lane(float32x4_t a, float32x4_t b, float32x4_t v) {
return vfmsq_laneq_f32(a, b, v, lane); }
816[[gnu::always_inline]] nce float32x4_t divide(float32x4_t a, float32x4_t b) {
return vdivq_f32(a, b); }
817[[gnu::always_inline]] nce float32x4_t square_root(float32x4_t a) {
return vsqrtq_f32(a); }
818[[gnu::always_inline]] nce float32x4_t pairwise_add(float32x4_t a, float32x4_t b) {
return vpaddq_f32(a, b); }
819[[gnu::always_inline]] nce float32x4_t pairwise_max(float32x4_t a, float32x4_t b) {
return vpmaxq_f32(a, b); }
820[[gnu::always_inline]] nce float32x4_t pairwise_min(float32x4_t a, float32x4_t b) {
return vpminq_f32(a, b); }
821[[gnu::always_inline]] nce float32x4_t pairwise_max_strict(float32x4_t a, float32x4_t b) {
return vpmaxnmq_f32(a, b); }
822[[gnu::always_inline]] nce float32x4_t pairwise_min_strict(float32x4_t a, float32x4_t b) {
return vpminnmq_f32(a, b); }
823[[gnu::always_inline]] nce float32_t reduce_add(float32x4_t a) {
return vaddvq_f32(a); }
824[[gnu::always_inline]]
inline float32_t reduce_max(float32x4_t a) {
return vmaxvq_f32(a); }
825[[gnu::always_inline]]
inline float32_t reduce_min(float32x4_t a) {
return vminvq_f32(a); }
826[[gnu::always_inline]]
inline float32_t reduce_max_strict(float32x4_t a) {
return vmaxnmvq_f32(a); }
827[[gnu::always_inline]]
inline float32_t reduce_min_strict(float32x4_t a) {
return vminnmvq_f32(a); }
828[[gnu::always_inline]] nce uint32x4_t equal_to_zero(float32x4_t a) {
return vceqzq_f32(a); }
829[[gnu::always_inline]] nce uint32x4_t greater_than_or_equal_to_zero(float32x4_t a) {
return vcgezq_f32(a); }
830[[gnu::always_inline]] nce uint32x4_t less_than_or_equal_to_zero(float32x4_t a) {
return vclezq_f32(a); }
831[[gnu::always_inline]] nce uint32x4_t greater_than_zero(float32x4_t a) {
return vcgtzq_f32(a); }
832[[gnu::always_inline]] nce uint32x4_t less_than_zero(float32x4_t a) {
return vcltzq_f32(a); }
833[[gnu::always_inline]] nce float64x2_t convert_high(float32x4_t a) {
return vcvt_high_f64_f32(a); }
834template <> [[gnu::always_inline]] nce float64x2_t reinterpret(float32x4_t a) {
return vreinterpretq_f64_f32(a); }
835template <
int lane>[[gnu::always_inline]] nce float32x4_t multiply_lane(float32x4_t a, float32x4_t v) {
return vmulq_laneq_f32(a, v, lane); }
836template <
int lane>[[gnu::always_inline]] nce float32x2_t duplicate_lane(float32x4_t vec) {
return vdup_laneq_f32(vec, lane); }
837template <
int lane>[[gnu::always_inline]] nce float32x4_t duplicate_lane_quad(float32x4_t vec) {
return vdupq_laneq_f32(vec, lane); }
838[[gnu::always_inline]] nce float32x4_t round_to_32bit_integer(float32x4_t a) {
return vrnd32zq_f32(a); }
839[[gnu::always_inline]] nce float32x4_t round_to_64bit_integer(float32x4_t a) {
return vrnd64zq_f32(a); }
840[[gnu::always_inline]] nce float32x4_t round_to_32bit_integer_using_current_mode(float32x4_t a) {
return vrnd32xq_f32(a); }
841[[gnu::always_inline]] nce float32x4_t round_to_64bit_integer_using_current_mode(float32x4_t a) {
return vrnd64xq_f32(a); }
842template <
int lane>[[gnu::always_inline]] nce float32x4_t multiply_add_lane(float32x4_t a, float32x4_t b, float32x4_t v) {
return vmlaq_laneq_f32(a, b, v, lane); }
843template <
int lane>[[gnu::always_inline]] nce float32x4_t multiply_subtract_lane(float32x4_t a, float32x4_t b, float32x4_t v) {
return vmlsq_laneq_f32(a, b, v, lane); }
844[[gnu::always_inline]] nce float32x4_t zip1(float32x4_t a, float32x4_t b) {
return vzip1q_f32(a, b); }
845[[gnu::always_inline]] nce float32x4_t zip2(float32x4_t a, float32x4_t b) {
return vzip2q_f32(a, b); }
846[[gnu::always_inline]] nce float32x4_t unzip1(float32x4_t a, float32x4_t b) {
return vuzp1q_f32(a, b); }
847[[gnu::always_inline]] nce float32x4_t unzip2(float32x4_t a, float32x4_t b) {
return vuzp2q_f32(a, b); }
848[[gnu::always_inline]] nce float32x4_t transpose_step_1(float32x4_t a, float32x4_t b) {
return vtrn1q_f32(a, b); }
849[[gnu::always_inline]] nce float32x4_t transpose_step_2(float32x4_t a, float32x4_t b) {
return vtrn2q_f32(a, b); }
850template <
int lane1,
int lane2>[[gnu::always_inline]] nce float32x4_t copy_lane(float32x4_t a, float32x2_t b) {
return vcopyq_lane_f32(a, lane1, b, lane2); }
851template <
int lane1,
int lane2>[[gnu::always_inline]] nce float32x4_t copy_lane(float32x4_t a, float32x4_t b) {
return vcopyq_laneq_f32(a, lane1, b, lane2); }
852[[gnu::always_inline]] nce float64x1_t add(float64x1_t a, float64x1_t b) {
return vadd_f64(a, b); }
853[[gnu::always_inline]] nce float64x1_t multiply(float64x1_t a, float64x1_t b) {
return vmul_f64(a, b); }
854[[gnu::always_inline]] nce float64x1_t multiply_extended(float64x1_t a, float64x1_t b) {
return vmulx_f64(a, b); }
855template <
int lane>[[gnu::always_inline]] nce float64x1_t multiply_extended_lane(float64x1_t a, float64x1_t v) {
return vmulx_lane_f64(a, v, lane); }
856[[gnu::always_inline]] nce float64x1_t multiply_add(float64x1_t a, float64x1_t b, float64x1_t c) {
return vmla_f64(a, b, c); }
857[[gnu::always_inline]] nce float64x1_t multiply_subtract(float64x1_t a, float64x1_t b, float64x1_t c) {
return vmls_f64(a, b, c); }
858[[gnu::always_inline]] nce float64x1_t multiply_add_fused(float64x1_t a, float64x1_t b, float64x1_t c) {
return vfma_f64(a, b, c); }
859template <
int lane>[[gnu::always_inline]] nce float64x1_t multiply_add_fused_lane(float64x1_t a, float64x1_t b, float64x1_t v) {
return vfma_lane_f64(a, b, v, lane); }
860[[gnu::always_inline]] nce float64x1_t multiply_subtract_fused(float64x1_t a, float64x1_t b, float64x1_t c) {
return vfms_f64(a, b, c); }
861template <
int lane>[[gnu::always_inline]] nce float64x1_t multiply_subtract_fused_lane(float64x1_t a, float64x1_t b, float64x1_t v) {
return vfms_lane_f64(a, b, v, lane); }
862template <
int lane>[[gnu::always_inline]] nce float64x1_t multiply_add_fused_lane(float64x1_t a, float64x1_t b, float64x2_t v) {
return vfma_laneq_f64(a, b, v, lane); }
863template <
int lane>[[gnu::always_inline]] nce float64x1_t multiply_subtract_fused_lane(float64x1_t a, float64x1_t b, float64x2_t v) {
return vfms_laneq_f64(a, b, v, lane); }
864[[gnu::always_inline]] nce float64x1_t divide(float64x1_t a, float64x1_t b) {
return vdiv_f64(a, b); }
865[[gnu::always_inline]] nce float64x1_t subtract(float64x1_t a, float64x1_t b) {
return vsub_f64(a, b); }
866[[gnu::always_inline]] nce float64x1_t subtract_absolute(float64x1_t a, float64x1_t b) {
return vabd_f64(a, b); }
867[[gnu::always_inline]] nce float64x1_t absolute(float64x1_t a) {
return vabs_f64(a); }
868template <> [[gnu::always_inline]] nce float64x1_t max(float64x1_t a, float64x1_t b) {
return vmax_f64(a, b); }
869template <> [[gnu::always_inline]] nce float64x1_t min(float64x1_t a, float64x1_t b) {
return vmin_f64(a, b); }
870template <> [[gnu::always_inline]] nce float64x1_t max_strict(float64x1_t a, float64x1_t b) {
return vmaxnm_f64(a, b); }
871template <> [[gnu::always_inline]] nce float64x1_t min_strict(float64x1_t a, float64x1_t b) {
return vminnm_f64(a, b); }
872[[gnu::always_inline]] nce float64x1_t round(float64x1_t a) {
return vrnd_f64(a); }
873[[gnu::always_inline]] nce float64x1_t round_toward_negative_infinity(float64x1_t a) {
return vrndm_f64(a); }
874[[gnu::always_inline]] nce float64x1_t round_toward_positive_infinity(float64x1_t a) {
return vrndp_f64(a); }
875[[gnu::always_inline]] nce float64x1_t round_to_nearest_with_ties_away_from_zero(float64x1_t a) {
return vrnda_f64(a); }
876[[gnu::always_inline]] nce float64x1_t round_using_current_mode(float64x1_t a) {
return vrndi_f64(a); }
877[[gnu::always_inline]] nce float64x1_t round_inexact(float64x1_t a) {
return vrndx_f64(a); }
878[[gnu::always_inline]] nce float64x1_t reciprocal_estimate(float64x1_t a) {
return vrecpe_f64(a); }
879[[gnu::always_inline]] nce float64x1_t reciprocal_step(float64x1_t a, float64x1_t b) {
return vrecps_f64(a, b); }
880[[gnu::always_inline]] nce float64x1_t reciprocal_sqrt_estimate(float64x1_t a) {
return vrsqrte_f64(a); }
881[[gnu::always_inline]] nce float64x1_t reciprocal_sqrt_step(float64x1_t a, float64x1_t b) {
return vrsqrts_f64(a, b); }
882[[gnu::always_inline]] nce float64x1_t square_root(float64x1_t a) {
return vsqrt_f64(a); }
883[[gnu::always_inline]] nce uint64x1_t equal(float64x1_t a, float64x1_t b) {
return vceq_f64(a, b); }
884template <
int lane>[[gnu::always_inline]] nce float64x1_t multiply_extended_lane(float64x1_t a, float64x2_t v) {
return vmulx_laneq_f64(a, v, lane); }
885[[gnu::always_inline]] nce uint64x1_t equal_to_zero(float64x1_t a) {
return vceqz_f64(a); }
886[[gnu::always_inline]] nce uint64x1_t greater_than_or_equal(float64x1_t a, float64x1_t b) {
return vcge_f64(a, b); }
887[[gnu::always_inline]] nce uint64x1_t greater_than_or_equal_to_zero(float64x1_t a) {
return vcgez_f64(a); }
888[[gnu::always_inline]] nce uint64x1_t less_than_or_equal(float64x1_t a, float64x1_t b) {
return vcle_f64(a, b); }
889[[gnu::always_inline]] nce uint64x1_t less_than_or_equal_to_zero(float64x1_t a) {
return vclez_f64(a); }
890[[gnu::always_inline]] nce uint64x1_t greater_than(float64x1_t a, float64x1_t b) {
return vcgt_f64(a, b); }
891[[gnu::always_inline]] nce uint64x1_t greater_than_zero(float64x1_t a) {
return vcgtz_f64(a); }
892[[gnu::always_inline]] nce uint64x1_t less_than(float64x1_t a, float64x1_t b) {
return vclt_f64(a, b); }
893[[gnu::always_inline]] nce uint64x1_t less_than_zero(float64x1_t a) {
return vcltz_f64(a); }
894[[gnu::always_inline]] nce uint64x1_t absolute_greater_than_or_equal(float64x1_t a, float64x1_t b) {
return vcage_f64(a, b); }
895[[gnu::always_inline]] nce uint64x1_t absolute_less_than_or_equal(float64x1_t a, float64x1_t b) {
return vcale_f64(a, b); }
896[[gnu::always_inline]] nce uint64x1_t absolute_greater_than(float64x1_t a, float64x1_t b) {
return vcagt_f64(a, b); }
897[[gnu::always_inline]] nce uint64x1_t absolute_less_than(float64x1_t a, float64x1_t b) {
return vcalt_f64(a, b); }
898template <> [[gnu::always_inline]] nce int64x1_t convert(float64x1_t a) {
return vcvt_s64_f64(a); }
899template <> [[gnu::always_inline]] nce uint64x1_t convert(float64x1_t a) {
return vcvt_u64_f64(a); }
900template <> [[gnu::always_inline]] nce int64x1_t convert_round_to_nearest_with_ties_to_even(float64x1_t a) {
return vcvtn_s64_f64(a); }
901template <> [[gnu::always_inline]] nce uint64x1_t convert_round_to_nearest_with_ties_to_even(float64x1_t a) {
return vcvtn_u64_f64(a); }
902template <> [[gnu::always_inline]] nce int64x1_t convert_round_toward_negative_infinity(float64x1_t a) {
return vcvtm_s64_f64(a); }
903template <> [[gnu::always_inline]] nce uint64x1_t convert_round_toward_negative_infinity(float64x1_t a) {
return vcvtm_u64_f64(a); }
904template <> [[gnu::always_inline]] nce int64x1_t convert_round_toward_positive_infinity(float64x1_t a) {
return vcvtp_s64_f64(a); }
905template <> [[gnu::always_inline]] nce uint64x1_t convert_round_toward_positive_infinity(float64x1_t a) {
return vcvtp_u64_f64(a); }
906template <> [[gnu::always_inline]] nce int64x1_t convert_round_to_nearest_with_ties_away_from_zero(float64x1_t a) {
return vcvta_s64_f64(a); }
907template <> [[gnu::always_inline]] nce uint64x1_t convert_round_to_nearest_with_ties_away_from_zero(float64x1_t a) {
return vcvta_u64_f64(a); }
908template <
int n>[[gnu::always_inline]] nce int64x1_t convert(float64x1_t a) {
return vcvt_n_s64_f64(a, n); }
909template <
int n>[[gnu::always_inline]] nce uint64x1_t convert(float64x1_t a) {
return vcvt_n_u64_f64(a, n); }
910template <> [[gnu::always_inline]] nce poly64x1_t reinterpret(float64x1_t a) {
return vreinterpret_p64_f64(a); }
911template <> [[gnu::always_inline]] nce int8x8_t reinterpret(float64x1_t a) {
return vreinterpret_s8_f64(a); }
912template <> [[gnu::always_inline]] nce int16x4_t reinterpret(float64x1_t a) {
return vreinterpret_s16_f64(a); }
913template <> [[gnu::always_inline]] nce int32x2_t reinterpret(float64x1_t a) {
return vreinterpret_s32_f64(a); }
914template <> [[gnu::always_inline]] nce uint8x8_t reinterpret(float64x1_t a) {
return vreinterpret_u8_f64(a); }
915template <> [[gnu::always_inline]] nce uint16x4_t reinterpret(float64x1_t a) {
return vreinterpret_u16_f64(a); }
916template <> [[gnu::always_inline]] nce uint32x2_t reinterpret(float64x1_t a) {
return vreinterpret_u32_f64(a); }
917template <> [[gnu::always_inline]] nce poly8x8_t reinterpret(float64x1_t a) {
return vreinterpret_p8_f64(a); }
918template <> [[gnu::always_inline]] nce poly16x4_t reinterpret(float64x1_t a) {
return vreinterpret_p16_f64(a); }
919template <> [[gnu::always_inline]] nce uint64x1_t reinterpret(float64x1_t a) {
return vreinterpret_u64_f64(a); }
920template <> [[gnu::always_inline]] nce int64x1_t reinterpret(float64x1_t a) {
return vreinterpret_s64_f64(a); }
921template <> [[gnu::always_inline]] nce float16x4_t reinterpret(float64x1_t a) {
return vreinterpret_f16_f64(a); }
922template <> [[gnu::always_inline]] nce float32x2_t reinterpret(float64x1_t a) {
return vreinterpret_f32_f64(a); }
923template <
int lane>[[gnu::always_inline]] nce float64x1_t multiply_lane(float64x1_t a, float64x1_t v) {
return vmul_lane_f64(a, v, lane); }
924[[gnu::always_inline]] nce float64x1_t multiply_add_fused(float64x1_t a, float64x1_t b, float64_t n) {
return vfma_n_f64(a, b, n); }
925[[gnu::always_inline]] nce float64x1_t multiply_subtract_fused(float64x1_t a, float64x1_t b, float64_t n) {
return vfms_n_f64(a, b, n); }
926template <
int lane>[[gnu::always_inline]] nce float64x1_t multiply_lane(float64x1_t a, float64x2_t v) {
return vmul_laneq_f64(a, v, lane); }
927[[gnu::always_inline]] nce float64x1_t multiply(float64x1_t a, float64_t b) {
return vmul_n_f64(a, b); }
928[[gnu::always_inline]] nce float64x1_t negate(float64x1_t a) {
return vneg_f64(a); }
929template <
int lane>[[gnu::always_inline]] nce float64x1_t duplicate_lane(float64x1_t vec) {
return vdup_lane_f64(vec, lane); }
930template <
int lane>[[gnu::always_inline]] nce float64x2_t duplicate_lane_quad(float64x1_t vec) {
return vdupq_lane_f64(vec, lane); }
931[[gnu::always_inline]] nce float64x2_t combine(float64x1_t low, float64x1_t high) {
return vcombine_f64(low, high); }
932template <
int lane>[[gnu::always_inline]] nce float64_t duplicate_lane(float64x1_t vec) {
return vdupd_lane_f64(vec, lane); }
933template <
int lane>[[gnu::always_inline]] nce float64_t get_lane(float64x1_t v) {
return vget_lane_f64(v, lane); }
934template <
int n>[[gnu::always_inline]] nce float64x1_t extract(float64x1_t a, float64x1_t b) {
return vext_f64(a, b, n); }
935template <
int lane1,
int lane2>[[gnu::always_inline]] nce float64x1_t copy_lane(float64x1_t a, float64x1_t b) {
return vcopy_lane_f64(a, lane1, b, lane2); }
936template <
int lane1,
int lane2>[[gnu::always_inline]] nce float64x1_t copy_lane(float64x1_t a, float64x2_t b) {
return vcopy_laneq_f64(a, lane1, b, lane2); }
937[[gnu::always_inline]] nce float64x1_t round_to_32bit_integer(float64x1_t a) {
return vrnd32z_f64(a); }
938[[gnu::always_inline]] nce float64x1_t round_to_64bit_integer(float64x1_t a) {
return vrnd64z_f64(a); }
939[[gnu::always_inline]] nce float64x1_t round_to_32bit_integer_using_current_mode(float64x1_t a) {
return vrnd32x_f64(a); }
940[[gnu::always_inline]] nce float64x1_t round_to_64bit_integer_using_current_mode(float64x1_t a) {
return vrnd64x_f64(a); }
941template <> [[gnu::always_inline]] nce bfloat16x4_t reinterpret(float64x1_t a) {
return vreinterpret_bf16_f64(a); }
942template <
int lane>[[gnu::always_inline]] nce float64x2_t multiply_extended_lane(float64x2_t a, float64x1_t v) {
return vmulxq_lane_f64(a, v, lane); }
943[[gnu::always_inline]] nce float64x2_t add(float64x2_t a, float64x2_t b) {
return vaddq_f64(a, b); }
944[[gnu::always_inline]] nce float64x2_t multiply(float64x2_t a, float64x2_t b) {
return vmulq_f64(a, b); }
945[[gnu::always_inline]] nce float64x2_t multiply_extended(float64x2_t a, float64x2_t b) {
return vmulxq_f64(a, b); }
946template <
int lane>[[gnu::always_inline]] nce float64x2_t multiply_extended_lane(float64x2_t a, float64x2_t v) {
return vmulxq_laneq_f64(a, v, lane); }
947template <
int lane>[[gnu::always_inline]] nce float64x2_t multiply_add_fused_lane(float64x2_t a, float64x2_t b, float64x1_t v) {
return vfmaq_lane_f64(a, b, v, lane); }
948template <
int lane>[[gnu::always_inline]] nce float64x2_t multiply_subtract_fused_lane(float64x2_t a, float64x2_t b, float64x1_t v) {
return vfmsq_lane_f64(a, b, v, lane); }
949[[gnu::always_inline]] nce float64x2_t multiply_add(float64x2_t a, float64x2_t b, float64x2_t c) {
return vmlaq_f64(a, b, c); }
950[[gnu::always_inline]] nce float64x2_t multiply_subtract(float64x2_t a, float64x2_t b, float64x2_t c) {
return vmlsq_f64(a, b, c); }
951[[gnu::always_inline]] nce float64x2_t multiply_add_fused(float64x2_t a, float64x2_t b, float64x2_t c) {
return vfmaq_f64(a, b, c); }
952template <
int lane>[[gnu::always_inline]] nce float64x2_t multiply_add_fused_lane(float64x2_t a, float64x2_t b, float64x2_t v) {
return vfmaq_laneq_f64(a, b, v, lane); }
953[[gnu::always_inline]] nce float64x2_t multiply_subtract_fused(float64x2_t a, float64x2_t b, float64x2_t c) {
return vfmsq_f64(a, b, c); }
954template <
int lane>[[gnu::always_inline]] nce float64x2_t multiply_subtract_fused_lane(float64x2_t a, float64x2_t b, float64x2_t v) {
return vfmsq_laneq_f64(a, b, v, lane); }
955[[gnu::always_inline]] nce float64x2_t divide(float64x2_t a, float64x2_t b) {
return vdivq_f64(a, b); }
956[[gnu::always_inline]] nce float64x2_t subtract(float64x2_t a, float64x2_t b) {
return vsubq_f64(a, b); }
957[[gnu::always_inline]] nce float64x2_t subtract_absolute(float64x2_t a, float64x2_t b) {
return vabdq_f64(a, b); }
958[[gnu::always_inline]] nce float64x2_t absolute(float64x2_t a) {
return vabsq_f64(a); }
959template <> [[gnu::always_inline]] nce float64x2_t max(float64x2_t a, float64x2_t b) {
return vmaxq_f64(a, b); }
960template <> [[gnu::always_inline]] nce float64x2_t min(float64x2_t a, float64x2_t b) {
return vminq_f64(a, b); }
961template <> [[gnu::always_inline]] nce float64x2_t max_strict(float64x2_t a, float64x2_t b) {
return vmaxnmq_f64(a, b); }
962template <> [[gnu::always_inline]] nce float64x2_t min_strict(float64x2_t a, float64x2_t b) {
return vminnmq_f64(a, b); }
963[[gnu::always_inline]] nce float64x2_t round(float64x2_t a) {
return vrndq_f64(a); }
964[[gnu::always_inline]] nce float64x2_t round_toward_negative_infinity(float64x2_t a) {
return vrndmq_f64(a); }
965[[gnu::always_inline]] nce float64x2_t round_toward_positive_infinity(float64x2_t a) {
return vrndpq_f64(a); }
966[[gnu::always_inline]] nce float64x2_t round_to_nearest_with_ties_away_from_zero(float64x2_t a) {
return vrndaq_f64(a); }
967[[gnu::always_inline]] nce float64x2_t round_using_current_mode(float64x2_t a) {
return vrndiq_f64(a); }
968[[gnu::always_inline]] nce float64x2_t round_inexact(float64x2_t a) {
return vrndxq_f64(a); }
969[[gnu::always_inline]] nce float64x2_t reciprocal_estimate(float64x2_t a) {
return vrecpeq_f64(a); }
970[[gnu::always_inline]] nce float64x2_t reciprocal_step(float64x2_t a, float64x2_t b) {
return vrecpsq_f64(a, b); }
971[[gnu::always_inline]] nce float64x2_t reciprocal_sqrt_estimate(float64x2_t a) {
return vrsqrteq_f64(a); }
972[[gnu::always_inline]] nce float64x2_t reciprocal_sqrt_step(float64x2_t a, float64x2_t b) {
return vrsqrtsq_f64(a, b); }
973[[gnu::always_inline]] nce float64x2_t square_root(float64x2_t a) {
return vsqrtq_f64(a); }
974[[gnu::always_inline]] nce float64x2_t pairwise_add(float64x2_t a, float64x2_t b) {
return vpaddq_f64(a, b); }
975[[gnu::always_inline]] nce float64_t pairwise_add(float64x2_t a) {
return vpaddd_f64(a); }
976[[gnu::always_inline]] nce float64x2_t pairwise_max(float64x2_t a, float64x2_t b) {
return vpmaxq_f64(a, b); }
977[[gnu::always_inline]] nce float64_t pairwise_max(float64x2_t a) {
return vpmaxqd_f64(a); }
978[[gnu::always_inline]] nce float64_t pairwise_max_strict(float64x2_t a) {
return vpmaxnmqd_f64(a); }
979[[gnu::always_inline]] nce float64x2_t pairwise_min(float64x2_t a, float64x2_t b) {
return vpminq_f64(a, b); }
980[[gnu::always_inline]] nce float64x2_t pairwise_max_strict(float64x2_t a, float64x2_t b) {
return vpmaxnmq_f64(a, b); }
981[[gnu::always_inline]] nce float64x2_t pairwise_min_strict(float64x2_t a, float64x2_t b) {
return vpminnmq_f64(a, b); }
982[[gnu::always_inline]] nce float64_t pairwise_min(float64x2_t a) {
return vpminqd_f64(a); }
983[[gnu::always_inline]] nce float64_t pairwise_min_strict(float64x2_t a) {
return vpminnmqd_f64(a); }
984[[gnu::always_inline]] nce float64_t reduce_add(float64x2_t a) {
return vaddvq_f64(a); }
985[[gnu::always_inline]]
inline float64_t reduce_max(float64x2_t a) {
return vmaxvq_f64(a); }
986[[gnu::always_inline]]
inline float64_t reduce_min(float64x2_t a) {
return vminvq_f64(a); }
987[[gnu::always_inline]]
inline float64_t reduce_max_strict(float64x2_t a) {
return vmaxnmvq_f64(a); }
988[[gnu::always_inline]]
inline float64_t reduce_min_strict(float64x2_t a) {
return vminnmvq_f64(a); }
989[[gnu::always_inline]] nce uint64x2_t equal(float64x2_t a, float64x2_t b) {
return vceqq_f64(a, b); }
990[[gnu::always_inline]] nce uint64x2_t equal_to_zero(float64x2_t a) {
return vceqzq_f64(a); }
991[[gnu::always_inline]] nce uint64x2_t greater_than_or_equal(float64x2_t a, float64x2_t b) {
return vcgeq_f64(a, b); }
992[[gnu::always_inline]] nce uint64x2_t greater_than_or_equal_to_zero(float64x2_t a) {
return vcgezq_f64(a); }
993[[gnu::always_inline]] nce uint64x2_t less_than_or_equal(float64x2_t a, float64x2_t b) {
return vcleq_f64(a, b); }
994[[gnu::always_inline]] nce uint64x2_t less_than_or_equal_to_zero(float64x2_t a) {
return vclezq_f64(a); }
995[[gnu::always_inline]] nce uint64x2_t greater_than(float64x2_t a, float64x2_t b) {
return vcgtq_f64(a, b); }
996[[gnu::always_inline]] nce uint64x2_t greater_than_zero(float64x2_t a) {
return vcgtzq_f64(a); }
997[[gnu::always_inline]] nce uint64x2_t less_than(float64x2_t a, float64x2_t b) {
return vcltq_f64(a, b); }
998[[gnu::always_inline]] nce uint64x2_t less_than_zero(float64x2_t a) {
return vcltzq_f64(a); }
999[[gnu::always_inline]] nce uint64x2_t absolute_greater_than_or_equal(float64x2_t a, float64x2_t b) {
return vcageq_f64(a, b); }
1000[[gnu::always_inline]] nce uint64x2_t absolute_less_than_or_equal(float64x2_t a, float64x2_t b) {
return vcaleq_f64(a, b); }
1001[[gnu::always_inline]] nce uint64x2_t absolute_greater_than(float64x2_t a, float64x2_t b) {
return vcagtq_f64(a, b); }
1002[[gnu::always_inline]] nce uint64x2_t absolute_less_than(float64x2_t a, float64x2_t b) {
return vcaltq_f64(a, b); }
1003template <> [[gnu::always_inline]] nce int64x2_t convert(float64x2_t a) {
return vcvtq_s64_f64(a); }
1004template <> [[gnu::always_inline]] nce uint64x2_t convert(float64x2_t a) {
return vcvtq_u64_f64(a); }
1005template <> [[gnu::always_inline]] nce int64x2_t convert_round_to_nearest_with_ties_to_even(float64x2_t a) {
return vcvtnq_s64_f64(a); }
1006template <> [[gnu::always_inline]] nce uint64x2_t convert_round_to_nearest_with_ties_to_even(float64x2_t a) {
return vcvtnq_u64_f64(a); }
1007template <> [[gnu::always_inline]] nce int64x2_t convert_round_toward_negative_infinity(float64x2_t a) {
return vcvtmq_s64_f64(a); }
1008template <> [[gnu::always_inline]] nce uint64x2_t convert_round_toward_negative_infinity(float64x2_t a) {
return vcvtmq_u64_f64(a); }
1009template <> [[gnu::always_inline]] nce int64x2_t convert_round_toward_positive_infinity(float64x2_t a) {
return vcvtpq_s64_f64(a); }
1010template <> [[gnu::always_inline]] nce uint64x2_t convert_round_toward_positive_infinity(float64x2_t a) {
return vcvtpq_u64_f64(a); }
1011template <> [[gnu::always_inline]] nce int64x2_t convert_round_to_nearest_with_ties_away_from_zero(float64x2_t a) {
return vcvtaq_s64_f64(a); }
1012template <> [[gnu::always_inline]] nce uint64x2_t convert_round_to_nearest_with_ties_away_from_zero(float64x2_t a) {
return vcvtaq_u64_f64(a); }
1013template <
int n>[[gnu::always_inline]] nce int64x2_t convert(float64x2_t a) {
return vcvtq_n_s64_f64(a, n); }
1014template <
int n>[[gnu::always_inline]] nce uint64x2_t convert(float64x2_t a) {
return vcvtq_n_u64_f64(a, n); }
1015template <> [[gnu::always_inline]] nce float32x2_t convert(float64x2_t a) {
return vcvt_f32_f64(a); }
1016[[gnu::always_inline]] nce float32x2_t convert_round_to_odd(float64x2_t a) {
return vcvtx_f32_f64(a); }
1017template <> [[gnu::always_inline]] nce poly64x2_t reinterpret(float64x2_t a) {
return vreinterpretq_p64_f64(a); }
1018template <> [[gnu::always_inline]] nce poly128_t reinterpret(float64x2_t a) {
return vreinterpretq_p128_f64(a); }
1019template <> [[gnu::always_inline]] nce int8x16_t reinterpret(float64x2_t a) {
return vreinterpretq_s8_f64(a); }
1020template <> [[gnu::always_inline]] nce int16x8_t reinterpret(float64x2_t a) {
return vreinterpretq_s16_f64(a); }
1021template <> [[gnu::always_inline]] nce int32x4_t reinterpret(float64x2_t a) {
return vreinterpretq_s32_f64(a); }
1022template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(float64x2_t a) {
return vreinterpretq_u8_f64(a); }
1023template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(float64x2_t a) {
return vreinterpretq_u16_f64(a); }
1024template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(float64x2_t a) {
return vreinterpretq_u32_f64(a); }
1025template <> [[gnu::always_inline]] nce poly8x16_t reinterpret(float64x2_t a) {
return vreinterpretq_p8_f64(a); }
1026template <> [[gnu::always_inline]] nce poly16x8_t reinterpret(float64x2_t a) {
return vreinterpretq_p16_f64(a); }
1027template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(float64x2_t a) {
return vreinterpretq_u64_f64(a); }
1028template <> [[gnu::always_inline]] nce int64x2_t reinterpret(float64x2_t a) {
return vreinterpretq_s64_f64(a); }
1029template <> [[gnu::always_inline]] nce float16x8_t reinterpret(float64x2_t a) {
return vreinterpretq_f16_f64(a); }
1030template <> [[gnu::always_inline]] nce float32x4_t reinterpret(float64x2_t a) {
return vreinterpretq_f32_f64(a); }
1031template <
int lane>[[gnu::always_inline]] nce float64x2_t multiply_lane(float64x2_t a, float64x1_t v) {
return vmulq_lane_f64(a, v, lane); }
1032template <
int lane>[[gnu::always_inline]] nce float64x2_t multiply_lane(float64x2_t a, float64x2_t v) {
return vmulq_laneq_f64(a, v, lane); }
1033[[gnu::always_inline]] nce float64x2_t multiply_add_fused(float64x2_t a, float64x2_t b, float64_t n) {
return vfmaq_n_f64(a, b, n); }
1034[[gnu::always_inline]] nce float64x2_t multiply_subtract_fused(float64x2_t a, float64x2_t b, float64_t n) {
return vfmsq_n_f64(a, b, n); }
1035[[gnu::always_inline]] nce float64x2_t multiply(float64x2_t a, float64_t b) {
return vmulq_n_f64(a, b); }
1036[[gnu::always_inline]] nce float64x2_t negate(float64x2_t a) {
return vnegq_f64(a); }
1037template <
int lane>[[gnu::always_inline]] nce float64x1_t duplicate_lane(float64x2_t vec) {
return vdup_laneq_f64(vec, lane); }
1038template <
int lane>[[gnu::always_inline]] nce float64x2_t duplicate_lane_quad(float64x2_t vec) {
return vdupq_laneq_f64(vec, lane); }
1039template <> [[gnu::always_inline]] nce float64x1_t get_high(float64x2_t a) {
return vget_high_f64(a); }
1040template <> [[gnu::always_inline]] nce float64x1_t get_low(float64x2_t a) {
return vget_low_f64(a); }
1041template <
int lane>[[gnu::always_inline]] nce float64_t duplicate_lane(float64x2_t vec) {
return vdupd_laneq_f64(vec, lane); }
1042template <
int lane>[[gnu::always_inline]] nce float64_t get_lane(float64x2_t v) {
return vgetq_lane_f64(v, lane); }
1043template <
int n>[[gnu::always_inline]] nce float64x2_t extract(float64x2_t a, float64x2_t b) {
return vextq_f64(a, b, n); }
1044[[gnu::always_inline]] nce float64x2_t zip1(float64x2_t a, float64x2_t b) {
return vzip1q_f64(a, b); }
1045[[gnu::always_inline]] nce float64x2_t zip2(float64x2_t a, float64x2_t b) {
return vzip2q_f64(a, b); }
1046[[gnu::always_inline]] nce float64x2_t unzip1(float64x2_t a, float64x2_t b) {
return vuzp1q_f64(a, b); }
1047[[gnu::always_inline]] nce float64x2_t unzip2(float64x2_t a, float64x2_t b) {
return vuzp2q_f64(a, b); }
1048[[gnu::always_inline]] nce float64x2_t transpose_step_1(float64x2_t a, float64x2_t b) {
return vtrn1q_f64(a, b); }
1049[[gnu::always_inline]] nce float64x2_t transpose_step_2(float64x2_t a, float64x2_t b) {
return vtrn2q_f64(a, b); }
1050[[gnu::always_inline]] nce float64x2_t complex_add_rotate_90(float64x2_t a, float64x2_t b) {
return vcaddq_rot90_f64(a, b); }
1051[[gnu::always_inline]] nce float64x2_t complex_add_rotate_270(float64x2_t a, float64x2_t b) {
return vcaddq_rot270_f64(a, b); }
1052[[gnu::always_inline]] nce float64x2_t complex_multiply_add(float64x2_t r, float64x2_t a, float64x2_t b) {
return vcmlaq_f64(r, a, b); }
1053[[gnu::always_inline]] nce float64x2_t complex_multiply_add_rotate_90(float64x2_t r, float64x2_t a, float64x2_t b) {
return vcmlaq_rot90_f64(r, a, b); }
1054[[gnu::always_inline]] nce float64x2_t complex_multiply_add_rotate_180(float64x2_t r, float64x2_t a, float64x2_t b) {
return vcmlaq_rot180_f64(r, a, b); }
1055[[gnu::always_inline]] nce float64x2_t complex_multiply_add_rotate_270(float64x2_t r, float64x2_t a, float64x2_t b) {
return vcmlaq_rot270_f64(r, a, b); }
1056template <
int lane1,
int lane2>[[gnu::always_inline]] nce float64x2_t copy_lane(float64x2_t a, float64x1_t b) {
return vcopyq_lane_f64(a, lane1, b, lane2); }
1057template <
int lane1,
int lane2>[[gnu::always_inline]] nce float64x2_t copy_lane(float64x2_t a, float64x2_t b) {
return vcopyq_laneq_f64(a, lane1, b, lane2); }
1058[[gnu::always_inline]] nce float64x2_t round_to_32bit_integer(float64x2_t a) {
return vrnd32zq_f64(a); }
1059[[gnu::always_inline]] nce float64x2_t round_to_64bit_integer(float64x2_t a) {
return vrnd64zq_f64(a); }
1060[[gnu::always_inline]] nce float64x2_t round_to_32bit_integer_using_current_mode(float64x2_t a) {
return vrnd32xq_f64(a); }
1061[[gnu::always_inline]] nce float64x2_t round_to_64bit_integer_using_current_mode(float64x2_t a) {
return vrnd64xq_f64(a); }
1062template <> [[gnu::always_inline]] nce bfloat16x8_t reinterpret(float64x2_t a) {
return vreinterpretq_bf16_f64(a); }
1063[[gnu::always_inline]] nce uint8x8_t equal_to_zero(poly8x8_t a) {
return vceqz_p8(a); }
1064[[gnu::always_inline]] nce float64x1_t reinterpret(poly8x8_t a) {
return vreinterpret_f64_p8(a); }
1065[[gnu::always_inline]] nce poly8x8_t reverse_bits(poly8x8_t a) {
return vrbit_p8(a); }
1066[[gnu::always_inline]] nce poly8x8_t zip1(poly8x8_t a, poly8x8_t b) {
return vzip1_p8(a, b); }
1067[[gnu::always_inline]] nce poly8x8_t zip2(poly8x8_t a, poly8x8_t b) {
return vzip2_p8(a, b); }
1068[[gnu::always_inline]] nce poly8x8_t unzip1(poly8x8_t a, poly8x8_t b) {
return vuzp1_p8(a, b); }
1069[[gnu::always_inline]] nce poly8x8_t unzip2(poly8x8_t a, poly8x8_t b) {
return vuzp2_p8(a, b); }
1070[[gnu::always_inline]] nce poly8x8_t transpose_step_1(poly8x8_t a, poly8x8_t b) {
return vtrn1_p8(a, b); }
1071[[gnu::always_inline]] nce poly8x8_t transpose_step_2(poly8x8_t a, poly8x8_t b) {
return vtrn2_p8(a, b); }
1072template <
int lane1,
int lane2>[[gnu::always_inline]] nce poly8x8_t copy_lane(poly8x8_t a, poly8x8_t b) {
return vcopy_lane_p8(a, lane1, b, lane2); }
1073template <
int lane1,
int lane2>[[gnu::always_inline]] nce poly8x8_t copy_lane(poly8x8_t a, poly8x16_t b) {
return vcopy_laneq_p8(a, lane1, b, lane2); }
1074[[gnu::always_inline]] nce poly8x8_t table_extend1_saturate(poly8x8_t a, poly8x16_t t, uint8x8_t idx) {
return vqtbx1_p8(a, t, idx); }
1075[[gnu::always_inline]] nce poly8x8_t table_extend2_saturate(poly8x8_t a, poly8x16x2_t t, uint8x8_t idx) {
return vqtbx2_p8(a, t, idx); }
1076[[gnu::always_inline]] nce poly8x8_t table_extend3_saturate(poly8x8_t a, poly8x16x3_t t, uint8x8_t idx) {
return vqtbx3_p8(a, t, idx); }
1077[[gnu::always_inline]] nce poly8x8_t table_extend4_saturate(poly8x8_t a, poly8x16x4_t t, uint8x8_t idx) {
return vqtbx4_p8(a, t, idx); }
1078template <> [[gnu::always_inline]] nce float64x1_t reinterpret(poly16x4_t a) {
return vreinterpret_f64_p16(a); }
1079[[gnu::always_inline]] nce poly16x4_t zip1(poly16x4_t a, poly16x4_t b) {
return vzip1_p16(a, b); }
1080[[gnu::always_inline]] nce poly16x4_t zip2(poly16x4_t a, poly16x4_t b) {
return vzip2_p16(a, b); }
1081[[gnu::always_inline]] nce poly16x4_t unzip1(poly16x4_t a, poly16x4_t b) {
return vuzp1_p16(a, b); }
1082[[gnu::always_inline]] nce poly16x4_t unzip2(poly16x4_t a, poly16x4_t b) {
return vuzp2_p16(a, b); }
1083[[gnu::always_inline]] nce poly16x4_t transpose_step_1(poly16x4_t a, poly16x4_t b) {
return vtrn1_p16(a, b); }
1084[[gnu::always_inline]] nce poly16x4_t transpose_step_2(poly16x4_t a, poly16x4_t b) {
return vtrn2_p16(a, b); }
1085template <
int lane1,
int lane2>[[gnu::always_inline]] nce poly16x4_t copy_lane(poly16x4_t a, poly16x4_t b) {
return vcopy_lane_p16(a, lane1, b, lane2); }
1086template <
int lane1,
int lane2>[[gnu::always_inline]] nce poly16x4_t copy_lane(poly16x4_t a, poly16x8_t b) {
return vcopy_laneq_p16(a, lane1, b, lane2); }
1087[[gnu::always_inline]] nce int64_t add(int64_t a, int64_t b) {
return vaddd_s64(a, b); }
1088[[gnu::always_inline]] nce uint64_t add(uint64_t a, uint64_t b) {
return vaddd_u64(a, b); }
1089[[gnu::always_inline]] nce int64x2_t add_high(int64x2_t a, int32x4_t b) {
return vaddw_high_s32(a, b); }
1090[[gnu::always_inline]] nce int8_t add_saturate(int8_t a, int8_t b) {
return vqaddb_s8(a, b); }
1091[[gnu::always_inline]] nce int16_t add_saturate(int16_t a, int16_t b) {
return vqaddh_s16(a, b); }
1092[[gnu::always_inline]] nce int32_t add_saturate(int32_t a, int32_t b) {
return vqadds_s32(a, b); }
1093[[gnu::always_inline]] nce int64_t add_saturate(int64_t a, int64_t b) {
return vqaddd_s64(a, b); }
1094[[gnu::always_inline]] nce uint8_t add_saturate(uint8_t a, uint8_t b) {
return vqaddb_u8(a, b); }
1095[[gnu::always_inline]] nce uint16_t add_saturate(uint16_t a, uint16_t b) {
return vqaddh_u16(a, b); }
1096[[gnu::always_inline]] nce uint32_t add_saturate(uint32_t a, uint32_t b) {
return vqadds_u32(a, b); }
1097[[gnu::always_inline]] nce uint64_t add_saturate(uint64_t a, uint64_t b) {
return vqaddd_u64(a, b); }
1098[[gnu::always_inline]] nce int64x1_t add_saturate(int64x1_t a, uint64x1_t b) {
return vuqadd_s64(a, b); }
1099[[gnu::always_inline]] nce int64x2_t add_saturate(int64x2_t a, uint64x2_t b) {
return vuqaddq_s64(a, b); }
1100[[gnu::always_inline]] nce int8_t add_saturate(int8_t a, uint8_t b) {
return vuqaddb_s8(a, b); }
1101[[gnu::always_inline]] nce int16_t add_saturate(int16_t a, uint16_t b) {
return vuqaddh_s16(a, b); }
1102[[gnu::always_inline]] nce int32_t add_saturate(int32_t a, uint32_t b) {
return vuqadds_s32(a, b); }
1103[[gnu::always_inline]] nce int64_t add_saturate(int64_t a, uint64_t b) {
return vuqaddd_s64(a, b); }
1104[[gnu::always_inline]] nce uint8_t add_saturate(uint8_t a, int8_t b) {
return vsqaddb_u8(a, b); }
1105[[gnu::always_inline]] nce uint16_t add_saturate(uint16_t a, int16_t b) {
return vsqaddh_u16(a, b); }
1106[[gnu::always_inline]] nce uint32_t add_saturate(uint32_t a, int32_t b) {
return vsqadds_u32(a, b); }
1107[[gnu::always_inline]] nce uint64_t add_saturate(uint64_t a, int64_t b) {
return vsqaddd_u64(a, b); }
1108[[gnu::always_inline]] nce float32_t multiply_extended(float32_t a, float32_t b) {
return vmulxs_f32(a, b); }
1109[[gnu::always_inline]] nce float64_t multiply_extended(float64_t a, float64_t b) {
return vmulxd_f64(a, b); }
1110template <
int lane>[[gnu::always_inline]] nce float32_t multiply_extended_lane(float32_t a, float32x2_t v) {
return vmulxs_lane_f32(a, v, lane); }
1111template <
int lane>[[gnu::always_inline]] nce float64_t multiply_extended_lane(float64_t a, float64x1_t v) {
return vmulxd_lane_f64(a, v, lane); }
1112template <
int lane>[[gnu::always_inline]] nce float32_t multiply_extended_lane(float32_t a, float32x4_t v) {
return vmulxs_laneq_f32(a, v, lane); }
1113template <
int lane>[[gnu::always_inline]] nce float64_t multiply_extended_lane(float64_t a, float64x2_t v) {
return vmulxd_laneq_f64(a, v, lane); }
1114[[gnu::always_inline]] nce int64x2_t multiply_add_long_high(int64x2_t a, int32x4_t b, int32x4_t c) {
return vmlal_high_s32(a, b, c); }
1115[[gnu::always_inline]] nce int64x2_t multiply_subtract_long_high(int64x2_t a, int32x4_t b, int32x4_t c) {
return vmlsl_high_s32(a, b, c); }
1116template <
int lane>[[gnu::always_inline]] nce float32_t multiply_add_fused_lane(float32_t a, float32_t b, float32x2_t v) {
return vfmas_lane_f32(a, b, v, lane); }
1117template <
int lane>[[gnu::always_inline]] nce float64_t multiply_add_fused_lane(float64_t a, float64_t b, float64x1_t v) {
return vfmad_lane_f64(a, b, v, lane); }
1118template <
int lane>[[gnu::always_inline]] nce float32_t multiply_add_fused_lane(float32_t a, float32_t b, float32x4_t v) {
return vfmas_laneq_f32(a, b, v, lane); }
1119template <
int lane>[[gnu::always_inline]] nce float64_t multiply_add_fused_lane(float64_t a, float64_t b, float64x2_t v) {
return vfmad_laneq_f64(a, b, v, lane); }
1120template <
int lane>[[gnu::always_inline]] nce float32_t multiply_subtract_fused_lane(float32_t a, float32_t b, float32x2_t v) {
return vfmss_lane_f32(a, b, v, lane); }
1121template <
int lane>[[gnu::always_inline]] nce float64_t multiply_subtract_fused_lane(float64_t a, float64_t b, float64x1_t v) {
return vfmsd_lane_f64(a, b, v, lane); }
1122template <
int lane>[[gnu::always_inline]] nce float32_t multiply_subtract_fused_lane(float32_t a, float32_t b, float32x4_t v) {
return vfmss_laneq_f32(a, b, v, lane); }
1123template <
int lane>[[gnu::always_inline]] nce float64_t multiply_subtract_fused_lane(float64_t a, float64_t b, float64x2_t v) {
return vfmsd_laneq_f64(a, b, v, lane); }
1124[[gnu::always_inline]] nce int16_t multiply_double_saturate_high(int16_t a, int16_t b) {
return vqdmulhh_s16(a, b); }
1125[[gnu::always_inline]] nce int32_t multiply_double_saturate_high(int32_t a, int32_t b) {
return vqdmulhs_s32(a, b); }
1126[[gnu::always_inline]] nce int16_t multiply_double_round_saturate_high(int16_t a, int16_t b) {
return vqrdmulhh_s16(a, b); }
1127[[gnu::always_inline]] nce int32_t multiply_double_round_saturate_high(int32_t a, int32_t b) {
return vqrdmulhs_s32(a, b); }
1128[[gnu::always_inline]] nce int32_t multiply_double_saturate_long(int16_t a, int16_t b) {
return vqdmullh_s16(a, b); }
1129[[gnu::always_inline]] nce int64_t multiply_double_saturate_long(int32_t a, int32_t b) {
return vqdmulls_s32(a, b); }
1130[[gnu::always_inline]] nce int32_t multiply_double_add_saturate_long(int32_t a, int16_t b, int16_t c) {
return vqdmlalh_s16(a, b, c); }
1131[[gnu::always_inline]] nce int64_t multiply_double_add_saturate_long(int64_t a, int32_t b, int32_t c) {
return vqdmlals_s32(a, b, c); }
1132[[gnu::always_inline]] nce int64x2_t multiply_double_add_saturate_long_high(int64x2_t a, int32x4_t b, int32x4_t c) {
return vqdmlal_high_s32(a, b, c); }
1133[[gnu::always_inline]] nce int32_t multiply_double_subtract_saturate_long(int32_t a, int16_t b, int16_t c) {
return vqdmlslh_s16(a, b, c); }
1134[[gnu::always_inline]] nce int64_t multiply_double_subtract_saturate_long(int64_t a, int32_t b, int32_t c) {
return vqdmlsls_s32(a, b, c); }
1135[[gnu::always_inline]] nce int64x2_t multiply_double_subtract_saturate_long_high(int64x2_t a, int32x4_t b, int32x4_t c) {
return vqdmlsl_high_s32(a, b, c); }
1136template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_add_saturate_long_lane(int32_t a, int16_t b, int16x4_t v) {
return vqdmlalh_lane_s16(a, b, v, lane); }
1137template <
int lane>[[gnu::always_inline]] nce int64_t multiply_double_add_saturate_long_lane(int64_t a, int32_t b, int32x2_t v) {
return vqdmlals_lane_s32(a, b, v, lane); }
1138template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_double_add_saturate_long_lane_high(int64x2_t a, int32x4_t b, int32x2_t v) {
return vqdmlal_high_lane_s32(a, b, v, lane); }
1139template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_double_add_saturate_long_lane(int64x2_t a, int32x2_t b, int32x4_t v) {
return vqdmlal_laneq_s32(a, b, v, lane); }
1140template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_add_saturate_long_lane(int32_t a, int16_t b, int16x8_t v) {
return vqdmlalh_laneq_s16(a, b, v, lane); }
1141template <
int lane>[[gnu::always_inline]] nce int64_t multiply_double_add_saturate_long_lane(int64_t a, int32_t b, int32x4_t v) {
return vqdmlals_laneq_s32(a, b, v, lane); }
1142template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_double_add_saturate_long_lane_high(int64x2_t a, int32x4_t b, int32x4_t v) {
return vqdmlal_high_laneq_s32(a, b, v, lane); }
1143template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_subtract_saturate_long_lane(int32_t a, int16_t b, int16x4_t v) {
return vqdmlslh_lane_s16(a, b, v, lane); }
1144template <
int lane>[[gnu::always_inline]] nce int64_t multiply_double_subtract_saturate_long_lane(int64_t a, int32_t b, int32x2_t v) {
return vqdmlsls_lane_s32(a, b, v, lane); }
1145template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_double_subtract_saturate_long_lane_high(int64x2_t a, int32x4_t b, int32x2_t v) {
return vqdmlsl_high_lane_s32(a, b, v, lane); }
1146template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_double_subtract_saturate_long_lane(int64x2_t a, int32x2_t b, int32x4_t v) {
return vqdmlsl_laneq_s32(a, b, v, lane); }
1147template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_subtract_saturate_long_lane(int32_t a, int16_t b, int16x8_t v) {
return vqdmlslh_laneq_s16(a, b, v, lane); }
1148template <
int lane>[[gnu::always_inline]] nce int64_t multiply_double_subtract_saturate_long_lane(int64_t a, int32_t b, int32x4_t v) {
return vqdmlsls_laneq_s32(a, b, v, lane); }
1149template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_double_subtract_saturate_long_lane_high(int64x2_t a, int32x4_t b, int32x4_t v) {
return vqdmlsl_high_laneq_s32(a, b, v, lane); }
1150template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_saturate_long_lane(int16_t a, int16x4_t v) {
return vqdmullh_lane_s16(a, v, lane); }
1151template <
int lane>[[gnu::always_inline]] nce int64_t multiply_double_saturate_long_lane(int32_t a, int32x2_t v) {
return vqdmulls_lane_s32(a, v, lane); }
1152template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_saturate_long_lane(int16_t a, int16x8_t v) {
return vqdmullh_laneq_s16(a, v, lane); }
1153template <
int lane>[[gnu::always_inline]] nce int64_t multiply_double_saturate_long_lane(int32_t a, int32x4_t v) {
return vqdmulls_laneq_s32(a, v, lane); }
1154template <
int lane>[[gnu::always_inline]] nce int16_t multiply_double_saturate_high_lane(int16_t a, int16x4_t v) {
return vqdmulhh_lane_s16(a, v, lane); }
1155template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_saturate_high_lane(int32_t a, int32x2_t v) {
return vqdmulhs_lane_s32(a, v, lane); }
1156template <
int lane>[[gnu::always_inline]] nce int16_t multiply_double_saturate_high_lane(int16_t a, int16x8_t v) {
return vqdmulhh_laneq_s16(a, v, lane); }
1157template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_saturate_high_lane(int32_t a, int32x4_t v) {
return vqdmulhs_laneq_s32(a, v, lane); }
1158template <
int lane>[[gnu::always_inline]] nce int16_t multiply_double_round_saturate_high_lane(int16_t a, int16x4_t v) {
return vqrdmulhh_lane_s16(a, v, lane); }
1159template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_round_saturate_high_lane(int32_t a, int32x2_t v) {
return vqrdmulhs_lane_s32(a, v, lane); }
1160template <
int lane>[[gnu::always_inline]] nce int16_t multiply_double_round_saturate_high_lane(int16_t a, int16x8_t v) {
return vqrdmulhh_laneq_s16(a, v, lane); }
1161template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_round_saturate_high_lane(int32_t a, int32x4_t v) {
return vqrdmulhs_laneq_s32(a, v, lane); }
1162[[gnu::always_inline]] nce int64x2_t multiply_double_add_saturate_long_high(int64x2_t a, int32x4_t b, int32_t c) {
return vqdmlal_high_n_s32(a, b, c); }
1163[[gnu::always_inline]] nce int64x2_t multiply_double_subtract_saturate_long_high(int64x2_t a, int32x4_t b, int32_t c) {
return vqdmlsl_high_n_s32(a, b, c); }
1164[[gnu::always_inline]] nce poly16x8_t multiply_long_high(poly8x16_t a, poly8x16_t b) {
return vmull_high_p8(a, b); }
1165[[gnu::always_inline]] nce int64_t subtract(int64_t a, int64_t b) {
return vsubd_s64(a, b); }
1166[[gnu::always_inline]] nce uint64_t subtract(uint64_t a, uint64_t b) {
return vsubd_u64(a, b); }
1167[[gnu::always_inline]] nce int64x2_t subtract_high(int64x2_t a, int32x4_t b) {
return vsubw_high_s32(a, b); }
1168[[gnu::always_inline]] nce int8_t subtract_saturate(int8_t a, int8_t b) {
return vqsubb_s8(a, b); }
1169[[gnu::always_inline]] nce int16_t subtract_saturate(int16_t a, int16_t b) {
return vqsubh_s16(a, b); }
1170[[gnu::always_inline]] nce int32_t subtract_saturate(int32_t a, int32_t b) {
return vqsubs_s32(a, b); }
1171[[gnu::always_inline]] nce int64_t subtract_saturate(int64_t a, int64_t b) {
return vqsubd_s64(a, b); }
1172[[gnu::always_inline]] nce uint8_t subtract_saturate(uint8_t a, uint8_t b) {
return vqsubb_u8(a, b); }
1173[[gnu::always_inline]] nce uint16_t subtract_saturate(uint16_t a, uint16_t b) {
return vqsubh_u16(a, b); }
1174[[gnu::always_inline]] nce uint32_t subtract_saturate(uint32_t a, uint32_t b) {
return vqsubs_u32(a, b); }
1175[[gnu::always_inline]] nce uint64_t subtract_saturate(uint64_t a, uint64_t b) {
return vqsubd_u64(a, b); }
1176[[gnu::always_inline]] nce float32_t absolute_difference(float32_t a, float32_t b) {
return vabds_f32(a, b); }
1177[[gnu::always_inline]] nce float64_t absolute_difference(float64_t a, float64_t b) {
return vabdd_f64(a, b); }
1178[[gnu::always_inline]] nce int64x2_t subtract_absolute_add_high(int64x2_t a, int32x4_t b, int32x4_t c) {
return vabal_high_s32(a, b, c); }
1179[[gnu::always_inline]] nce int64x1_t absolute(int64x1_t a) {
return vabs_s64(a); }
1180[[gnu::always_inline]] nce int64_t absolute(int64_t a) {
return vabsd_s64(a); }
1181[[gnu::always_inline]] nce int64x2_t absolute(int64x2_t a) {
return vabsq_s64(a); }
1182[[gnu::always_inline]] nce int64x1_t absolute_saturate(int64x1_t a) {
return vqabs_s64(a); }
1183[[gnu::always_inline]] nce int64x2_t absolute_saturate(int64x2_t a) {
return vqabsq_s64(a); }
1184[[gnu::always_inline]] nce int8_t absolute_saturate(int8_t a) {
return vqabsb_s8(a); }
1185[[gnu::always_inline]] nce int16_t absolute_saturate(int16_t a) {
return vqabsh_s16(a); }
1186[[gnu::always_inline]] nce int32_t absolute_saturate(int32_t a) {
return vqabss_s32(a); }
1187[[gnu::always_inline]] nce int64_t absolute_saturate(int64_t a) {
return vqabsd_s64(a); }
1188[[gnu::always_inline]] nce float32_t reciprocal_estimate(float32_t a) {
return vrecpes_f32(a); }
1189[[gnu::always_inline]] nce float64_t reciprocal_estimate(float64_t a) {
return vrecped_f64(a); }
1190[[gnu::always_inline]] nce float32_t reciprocal_step(float32_t a, float32_t b) {
return vrecpss_f32(a, b); }
1191[[gnu::always_inline]] nce float64_t reciprocal_step(float64_t a, float64_t b) {
return vrecpsd_f64(a, b); }
1192[[gnu::always_inline]] nce float32_t reciprocal_sqrt_estimate(float32_t a) {
return vrsqrtes_f32(a); }
1193[[gnu::always_inline]] nce float64_t reciprocal_sqrt_estimate(float64_t a) {
return vrsqrted_f64(a); }
1194[[gnu::always_inline]] nce float32_t reciprocal_sqrt_step(float32_t a, float32_t b) {
return vrsqrtss_f32(a, b); }
1195[[gnu::always_inline]] nce float64_t reciprocal_sqrt_step(float64_t a, float64_t b) {
return vrsqrtsd_f64(a, b); }
1196[[gnu::always_inline]] nce float32_t reciprocal_exponent(float32_t a) {
return vrecpxs_f32(a); }
1197[[gnu::always_inline]] nce float64_t reciprocal_exponent(float64_t a) {
return vrecpxd_f64(a); }
1198[[gnu::always_inline]] nce int64x2_t pairwise_add(int64x2_t a, int64x2_t b) {
return vpaddq_s64(a, b); }
1199[[gnu::always_inline]] nce int64_t pairwise_add(int64x2_t a) {
return vpaddd_s64(a); }
1200[[gnu::always_inline]] nce int64_t reduce_add(int64x2_t a) {
return vaddvq_s64(a); }
1201[[gnu::always_inline]] nce uint64x1_t equal(int64x1_t a, int64x1_t b) {
return vceq_s64(a, b); }
1202[[gnu::always_inline]] nce uint64x2_t equal(int64x2_t a, int64x2_t b) {
return vceqq_s64(a, b); }
1203[[gnu::always_inline]] nce uint64_t equal(int64_t a, int64_t b) {
return vceqd_s64(a, b); }
1204[[gnu::always_inline]] nce uint64_t equal(uint64_t a, uint64_t b) {
return vceqd_u64(a, b); }
1205[[gnu::always_inline]] nce uint32_t equal(float32_t a, float32_t b) {
return vceqs_f32(a, b); }
1206[[gnu::always_inline]] nce uint64_t equal(float64_t a, float64_t b) {
return vceqd_f64(a, b); }
1207[[gnu::always_inline]] nce uint8x16_t equal_to_zero(poly8x16_t a) {
return vceqzq_p8(a); }
1208[[gnu::always_inline]] nce uint64x1_t equal_to_zero(int64x1_t a) {
return vceqz_s64(a); }
1209[[gnu::always_inline]] nce uint64x2_t equal_to_zero(int64x2_t a) {
return vceqzq_s64(a); }
1210[[gnu::always_inline]] nce uint64_t equal_to_zero(int64_t a) {
return vceqzd_s64(a); }
1211[[gnu::always_inline]] nce uint64_t equal_to_zero(uint64_t a) {
return vceqzd_u64(a); }
1212[[gnu::always_inline]] nce uint32_t equal_to_zero(float32_t a) {
return vceqzs_f32(a); }
1213[[gnu::always_inline]] nce uint64_t equal_to_zero(float64_t a) {
return vceqzd_f64(a); }
1214[[gnu::always_inline]] nce uint64x1_t greater_than_or_equal(int64x1_t a, int64x1_t b) {
return vcge_s64(a, b); }
1215[[gnu::always_inline]] nce uint64x2_t greater_than_or_equal(int64x2_t a, int64x2_t b) {
return vcgeq_s64(a, b); }
1216[[gnu::always_inline]] nce uint64_t greater_than_or_equal(int64_t a, int64_t b) {
return vcged_s64(a, b); }
1217[[gnu::always_inline]] nce uint64_t greater_than_or_equal(uint64_t a, uint64_t b) {
return vcged_u64(a, b); }
1218[[gnu::always_inline]] nce uint32_t greater_than_or_equal(float32_t a, float32_t b) {
return vcges_f32(a, b); }
1219[[gnu::always_inline]] nce uint64_t greater_than_or_equal(float64_t a, float64_t b) {
return vcged_f64(a, b); }
1220[[gnu::always_inline]] nce uint64x1_t greater_than_or_equal_to_zero(int64x1_t a) {
return vcgez_s64(a); }
1221[[gnu::always_inline]] nce uint64x2_t greater_than_or_equal_to_zero(int64x2_t a) {
return vcgezq_s64(a); }
1222[[gnu::always_inline]] nce uint64_t greater_than_or_equal_to_zero(int64_t a) {
return vcgezd_s64(a); }
1223[[gnu::always_inline]] nce uint32_t greater_than_or_equal_to_zero(float32_t a) {
return vcgezs_f32(a); }
1224[[gnu::always_inline]] nce uint64_t greater_than_or_equal_to_zero(float64_t a) {
return vcgezd_f64(a); }
1225[[gnu::always_inline]] nce uint64x1_t less_than_or_equal(int64x1_t a, int64x1_t b) {
return vcle_s64(a, b); }
1226[[gnu::always_inline]] nce uint64x2_t less_than_or_equal(int64x2_t a, int64x2_t b) {
return vcleq_s64(a, b); }
1227[[gnu::always_inline]] nce uint64_t less_than_or_equal(int64_t a, int64_t b) {
return vcled_s64(a, b); }
1228[[gnu::always_inline]] nce uint64_t less_than_or_equal(uint64_t a, uint64_t b) {
return vcled_u64(a, b); }
1229[[gnu::always_inline]] nce uint32_t less_than_or_equal(float32_t a, float32_t b) {
return vcles_f32(a, b); }
1230[[gnu::always_inline]] nce uint64_t less_than_or_equal(float64_t a, float64_t b) {
return vcled_f64(a, b); }
1231[[gnu::always_inline]] nce uint64x1_t less_than_or_equal_to_zero(int64x1_t a) {
return vclez_s64(a); }
1232[[gnu::always_inline]] nce uint64x2_t less_than_or_equal_to_zero(int64x2_t a) {
return vclezq_s64(a); }
1233[[gnu::always_inline]] nce uint64_t less_than_or_equal_to_zero(int64_t a) {
return vclezd_s64(a); }
1234[[gnu::always_inline]] nce uint32_t less_than_or_equal_to_zero(float32_t a) {
return vclezs_f32(a); }
1235[[gnu::always_inline]] nce uint64_t less_than_or_equal_to_zero(float64_t a) {
return vclezd_f64(a); }
1236[[gnu::always_inline]] nce uint64x1_t greater_than(int64x1_t a, int64x1_t b) {
return vcgt_s64(a, b); }
1237[[gnu::always_inline]] nce uint64x2_t greater_than(int64x2_t a, int64x2_t b) {
return vcgtq_s64(a, b); }
1238[[gnu::always_inline]] nce uint64_t greater_than(int64_t a, int64_t b) {
return vcgtd_s64(a, b); }
1239[[gnu::always_inline]] nce uint64_t greater_than(uint64_t a, uint64_t b) {
return vcgtd_u64(a, b); }
1240[[gnu::always_inline]] nce uint32_t greater_than(float32_t a, float32_t b) {
return vcgts_f32(a, b); }
1241[[gnu::always_inline]] nce uint64_t greater_than(float64_t a, float64_t b) {
return vcgtd_f64(a, b); }
1242[[gnu::always_inline]] nce uint64x1_t greater_than_zero(int64x1_t a) {
return vcgtz_s64(a); }
1243[[gnu::always_inline]] nce uint64x2_t greater_than_zero(int64x2_t a) {
return vcgtzq_s64(a); }
1244[[gnu::always_inline]] nce uint64_t greater_than_zero(int64_t a) {
return vcgtzd_s64(a); }
1245[[gnu::always_inline]] nce uint32_t greater_than_zero(float32_t a) {
return vcgtzs_f32(a); }
1246[[gnu::always_inline]] nce uint64_t greater_than_zero(float64_t a) {
return vcgtzd_f64(a); }
1247[[gnu::always_inline]] nce uint64x1_t less_than(int64x1_t a, int64x1_t b) {
return vclt_s64(a, b); }
1248[[gnu::always_inline]] nce uint64x2_t less_than(int64x2_t a, int64x2_t b) {
return vcltq_s64(a, b); }
1249[[gnu::always_inline]] nce uint64_t less_than(int64_t a, int64_t b) {
return vcltd_s64(a, b); }
1250[[gnu::always_inline]] nce uint64_t less_than(uint64_t a, uint64_t b) {
return vcltd_u64(a, b); }
1251[[gnu::always_inline]] nce uint32_t less_than(float32_t a, float32_t b) {
return vclts_f32(a, b); }
1252[[gnu::always_inline]] nce uint64_t less_than(float64_t a, float64_t b) {
return vcltd_f64(a, b); }
1253[[gnu::always_inline]] nce uint64x1_t less_than_zero(int64x1_t a) {
return vcltz_s64(a); }
1254[[gnu::always_inline]] nce uint64x2_t less_than_zero(int64x2_t a) {
return vcltzq_s64(a); }
1255[[gnu::always_inline]] nce uint64_t less_than_zero(int64_t a) {
return vcltzd_s64(a); }
1256[[gnu::always_inline]] nce uint32_t less_than_zero(float32_t a) {
return vcltzs_f32(a); }
1257[[gnu::always_inline]] nce uint64_t less_than_zero(float64_t a) {
return vcltzd_f64(a); }
1258[[gnu::always_inline]] nce uint32_t absolute_greater_than_or_equal(float32_t a, float32_t b) {
return vcages_f32(a, b); }
1259[[gnu::always_inline]] nce uint64_t absolute_greater_than_or_equal(float64_t a, float64_t b) {
return vcaged_f64(a, b); }
1260[[gnu::always_inline]] nce uint32_t absolute_less_than_or_equal(float32_t a, float32_t b) {
return vcales_f32(a, b); }
1261[[gnu::always_inline]] nce uint64_t absolute_less_than_or_equal(float64_t a, float64_t b) {
return vcaled_f64(a, b); }
1262[[gnu::always_inline]] nce uint32_t absolute_greater_than(float32_t a, float32_t b) {
return vcagts_f32(a, b); }
1263[[gnu::always_inline]] nce uint64_t absolute_greater_than(float64_t a, float64_t b) {
return vcagtd_f64(a, b); }
1264[[gnu::always_inline]] nce uint32_t absolute_less_than(float32_t a, float32_t b) {
return vcalts_f32(a, b); }
1265[[gnu::always_inline]] nce uint64_t absolute_less_than(float64_t a, float64_t b) {
return vcaltd_f64(a, b); }
1266[[gnu::always_inline]] nce uint64x1_t compare_test_nonzero(int64x1_t a, int64x1_t b) {
return vtst_s64(a, b); }
1267[[gnu::always_inline]] nce uint64x2_t compare_test_nonzero(int64x2_t a, int64x2_t b) {
return vtstq_s64(a, b); }
1268[[gnu::always_inline]] nce uint64_t compare_test_nonzero(int64_t a, int64_t b) {
return vtstd_s64(a, b); }
1269[[gnu::always_inline]] nce uint64_t compare_test_nonzero(uint64_t a, uint64_t b) {
return vtstd_u64(a, b); }
1270[[gnu::always_inline]] nce int64_t shift_left(int64_t a, int64_t b) {
return vshld_s64(a, b); }
1271[[gnu::always_inline]] nce uint64_t shift_left(uint64_t a, int64_t b) {
return vshld_u64(a, b); }
1272template <
int n>[[gnu::always_inline]] nce int64_t shift_left(int64_t a) {
return vshld_n_s64(a, n); }
1273template <
int n>[[gnu::always_inline]] nce uint64_t shift_left(uint64_t a) {
return vshld_n_u64(a, n); }
1274[[gnu::always_inline]] nce int8_t shift_left_saturate(int8_t a, int8_t b) {
return vqshlb_s8(a, b); }
1275[[gnu::always_inline]] nce int16_t shift_left_saturate(int16_t a, int16_t b) {
return vqshlh_s16(a, b); }
1276[[gnu::always_inline]] nce int32_t shift_left_saturate(int32_t a, int32_t b) {
return vqshls_s32(a, b); }
1277[[gnu::always_inline]] nce int64_t shift_left_saturate(int64_t a, int64_t b) {
return vqshld_s64(a, b); }
1278[[gnu::always_inline]] nce uint8_t shift_left_saturate(uint8_t a, int8_t b) {
return vqshlb_u8(a, b); }
1279[[gnu::always_inline]] nce uint16_t shift_left_saturate(uint16_t a, int16_t b) {
return vqshlh_u16(a, b); }
1280[[gnu::always_inline]] nce uint32_t shift_left_saturate(uint32_t a, int32_t b) {
return vqshls_u32(a, b); }
1281[[gnu::always_inline]] nce uint64_t shift_left_saturate(uint64_t a, int64_t b) {
return vqshld_u64(a, b); }
1282template <
int n>[[gnu::always_inline]] nce int8_t shift_left_saturate(int8_t a) {
return vqshlb_n_s8(a, n); }
1283template <
int n>[[gnu::always_inline]] nce int16_t shift_left_saturate(int16_t a) {
return vqshlh_n_s16(a, n); }
1284template <
int n>[[gnu::always_inline]] nce int32_t shift_left_saturate(int32_t a) {
return vqshls_n_s32(a, n); }
1285template <
int n>[[gnu::always_inline]] nce int64_t shift_left_saturate(int64_t a) {
return vqshld_n_s64(a, n); }
1286template <
int n>[[gnu::always_inline]] nce uint8_t shift_left_saturate(uint8_t a) {
return vqshlb_n_u8(a, n); }
1287template <
int n>[[gnu::always_inline]] nce uint16_t shift_left_saturate(uint16_t a) {
return vqshlh_n_u16(a, n); }
1288template <
int n>[[gnu::always_inline]] nce uint32_t shift_left_saturate(uint32_t a) {
return vqshls_n_u32(a, n); }
1289template <
int n>[[gnu::always_inline]] nce uint64_t shift_left_saturate(uint64_t a) {
return vqshld_n_u64(a, n); }
1290template <
int n>[[gnu::always_inline]] nce uint8_t shift_left_unsigned_saturate(int8_t a) {
return vqshlub_n_s8(a, n); }
1291template <
int n>[[gnu::always_inline]] nce uint16_t shift_left_unsigned_saturate(int16_t a) {
return vqshluh_n_s16(a, n); }
1292template <
int n>[[gnu::always_inline]] nce uint32_t shift_left_unsigned_saturate(int32_t a) {
return vqshlus_n_s32(a, n); }
1293template <
int n>[[gnu::always_inline]] nce uint64_t shift_left_unsigned_saturate(int64_t a) {
return vqshlud_n_s64(a, n); }
1294[[gnu::always_inline]] nce int64_t shift_left_round(int64_t a, int64_t b) {
return vrshld_s64(a, b); }
1295[[gnu::always_inline]] nce uint64_t shift_left_round(uint64_t a, int64_t b) {
return vrshld_u64(a, b); }
1296[[gnu::always_inline]] nce int8_t shift_left_round_saturate(int8_t a, int8_t b) {
return vqrshlb_s8(a, b); }
1297[[gnu::always_inline]] nce int16_t shift_left_round_saturate(int16_t a, int16_t b) {
return vqrshlh_s16(a, b); }
1298[[gnu::always_inline]] nce int32_t shift_left_round_saturate(int32_t a, int32_t b) {
return vqrshls_s32(a, b); }
1299[[gnu::always_inline]] nce int64_t shift_left_round_saturate(int64_t a, int64_t b) {
return vqrshld_s64(a, b); }
1300[[gnu::always_inline]] nce uint8_t shift_left_round_saturate(uint8_t a, int8_t b) {
return vqrshlb_u8(a, b); }
1301[[gnu::always_inline]] nce uint16_t shift_left_round_saturate(uint16_t a, int16_t b) {
return vqrshlh_u16(a, b); }
1302[[gnu::always_inline]] nce uint32_t shift_left_round_saturate(uint32_t a, int32_t b) {
return vqrshls_u32(a, b); }
1303[[gnu::always_inline]] nce uint64_t shift_left_round_saturate(uint64_t a, int64_t b) {
return vqrshld_u64(a, b); }
1304template <
int n>[[gnu::always_inline]] nce int64_t shift_left_insert(int64_t a, int64_t b) {
return vslid_n_s64(a, b, n); }
1305template <
int n>[[gnu::always_inline]] nce uint64_t shift_left_insert(uint64_t a, uint64_t b) {
return vslid_n_u64(a, b, n); }
1306template <
int n>[[gnu::always_inline]] nce int64_t shift_right(int64_t a) {
return vshrd_n_s64(a, n); }
1307template <
int n>[[gnu::always_inline]] nce uint64_t shift_right(uint64_t a) {
return vshrd_n_u64(a, n); }
1308template <
int n>[[gnu::always_inline]] nce int64_t shift_right_round(int64_t a) {
return vrshrd_n_s64(a, n); }
1309template <
int n>[[gnu::always_inline]] nce uint64_t shift_right_round(uint64_t a) {
return vrshrd_n_u64(a, n); }
1310template <
int n>[[gnu::always_inline]] nce int64_t shift_right_add(int64_t a, int64_t b) {
return vsrad_n_s64(a, b, n); }
1311template <
int n>[[gnu::always_inline]] nce uint64_t shift_right_add(uint64_t a, uint64_t b) {
return vsrad_n_u64(a, b, n); }
1312template <
int n>[[gnu::always_inline]] nce int64_t shift_right_accumulate_round(int64_t a, int64_t b) {
return vrsrad_n_s64(a, b, n); }
1313template <
int n>[[gnu::always_inline]] nce uint64_t shift_right_accumulate_round(uint64_t a, uint64_t b) {
return vrsrad_n_u64(a, b, n); }
1314template <
int n>[[gnu::always_inline]] nce uint8_t shift_right_saturate_narrow_unsigned(int16_t a) {
return vqshrunh_n_s16(a, n); }
1315template <
int n>[[gnu::always_inline]] nce uint16_t shift_right_saturate_narrow_unsigned(int32_t a) {
return vqshruns_n_s32(a, n); }
1316template <
int n>[[gnu::always_inline]] nce uint32_t shift_right_saturate_narrow_unsigned(int64_t a) {
return vqshrund_n_s64(a, n); }
1317template <
int n>[[gnu::always_inline]] nce int8_t shift_right_saturate_narrow(int16_t a) {
return vqshrnh_n_s16(a, n); }
1318template <
int n>[[gnu::always_inline]] nce int16_t shift_right_saturate_narrow(int32_t a) {
return vqshrns_n_s32(a, n); }
1319template <
int n>[[gnu::always_inline]] nce int32_t shift_right_saturate_narrow(int64_t a) {
return vqshrnd_n_s64(a, n); }
1320template <
int n>[[gnu::always_inline]] nce uint8_t shift_right_saturate_narrow(uint16_t a) {
return vqshrnh_n_u16(a, n); }
1321template <
int n>[[gnu::always_inline]] nce uint16_t shift_right_saturate_narrow(uint32_t a) {
return vqshrns_n_u32(a, n); }
1322template <
int n>[[gnu::always_inline]] nce uint32_t shift_right_saturate_narrow(uint64_t a) {
return vqshrnd_n_u64(a, n); }
1323template <
int n>[[gnu::always_inline]] nce uint8_t shift_right_unsigned_round_saturate_narrow(int16_t a) {
return vqrshrunh_n_s16(a, n); }
1324template <
int n>[[gnu::always_inline]] nce uint16_t shift_right_unsigned_round_saturate_narrow(int32_t a) {
return vqrshruns_n_s32(a, n); }
1325template <
int n>[[gnu::always_inline]] nce uint32_t shift_right_unsigned_round_saturate_narrow(int64_t a) {
return vqrshrund_n_s64(a, n); }
1326template <
int n>[[gnu::always_inline]] nce int8_t shift_right_round_saturate_narrow(int16_t a) {
return vqrshrnh_n_s16(a, n); }
1327template <
int n>[[gnu::always_inline]] nce int16_t shift_right_round_saturate_narrow(int32_t a) {
return vqrshrns_n_s32(a, n); }
1328template <
int n>[[gnu::always_inline]] nce int32_t shift_right_round_saturate_narrow(int64_t a) {
return vqrshrnd_n_s64(a, n); }
1329template <
int n>[[gnu::always_inline]] nce uint8_t shift_right_round_saturate_narrow(uint16_t a) {
return vqrshrnh_n_u16(a, n); }
1330template <
int n>[[gnu::always_inline]] nce uint16_t shift_right_round_saturate_narrow(uint32_t a) {
return vqrshrns_n_u32(a, n); }
1331template <
int n>[[gnu::always_inline]] nce uint32_t shift_right_round_saturate_narrow(uint64_t a) {
return vqrshrnd_n_u64(a, n); }
1332template <
int n>[[gnu::always_inline]] nce int64_t shift_right_insert(int64_t a, int64_t b) {
return vsrid_n_s64(a, b, n); }
1333template <
int n>[[gnu::always_inline]] nce uint64_t shift_right_insert(uint64_t a, uint64_t b) {
return vsrid_n_u64(a, b, n); }
1334template <> [[gnu::always_inline]] nce int32_t convert(float32_t a) {
return vcvts_s32_f32(a); }
1335template <> [[gnu::always_inline]] nce uint32_t convert(float32_t a) {
return vcvts_u32_f32(a); }
1336template <> [[gnu::always_inline]] nce int32_t convert_round_to_nearest_with_ties_to_even(float32_t a) {
return vcvtns_s32_f32(a); }
1337template <> [[gnu::always_inline]] nce uint32_t convert_round_to_nearest_with_ties_to_even(float32_t a) {
return vcvtns_u32_f32(a); }
1338template <> [[gnu::always_inline]] nce int32_t convert_round_toward_negative_infinity(float32_t a) {
return vcvtms_s32_f32(a); }
1339template <> [[gnu::always_inline]] nce uint32_t convert_round_toward_negative_infinity(float32_t a) {
return vcvtms_u32_f32(a); }
1340template <> [[gnu::always_inline]] nce int32_t convert_round_toward_positive_infinity(float32_t a) {
return vcvtps_s32_f32(a); }
1341template <> [[gnu::always_inline]] nce uint32_t convert_round_toward_positive_infinity(float32_t a) {
return vcvtps_u32_f32(a); }
1342template <> [[gnu::always_inline]] nce int32_t convert_round_to_nearest_with_ties_away_from_zero(float32_t a) {
return vcvtas_s32_f32(a); }
1343template <> [[gnu::always_inline]] nce uint32_t convert_round_to_nearest_with_ties_away_from_zero(float32_t a) {
return vcvtas_u32_f32(a); }
1344template <> [[gnu::always_inline]] nce int64_t convert(float64_t a) {
return vcvtd_s64_f64(a); }
1345template <> [[gnu::always_inline]] nce uint64_t convert(float64_t a) {
return vcvtd_u64_f64(a); }
1346template <> [[gnu::always_inline]] nce int64_t convert_round_to_nearest_with_ties_to_even(float64_t a) {
return vcvtnd_s64_f64(a); }
1347template <> [[gnu::always_inline]] nce uint64_t convert_round_to_nearest_with_ties_to_even(float64_t a) {
return vcvtnd_u64_f64(a); }
1348template <> [[gnu::always_inline]] nce int64_t convert_round_toward_negative_infinity(float64_t a) {
return vcvtmd_s64_f64(a); }
1349template <> [[gnu::always_inline]] nce uint64_t convert_round_toward_negative_infinity(float64_t a) {
return vcvtmd_u64_f64(a); }
1350template <> [[gnu::always_inline]] nce int64_t convert_round_toward_positive_infinity(float64_t a) {
return vcvtpd_s64_f64(a); }
1351template <> [[gnu::always_inline]] nce uint64_t convert_round_toward_positive_infinity(float64_t a) {
return vcvtpd_u64_f64(a); }
1352template <> [[gnu::always_inline]] nce int64_t convert_round_to_nearest_with_ties_away_from_zero(float64_t a) {
return vcvtad_s64_f64(a); }
1353template <> [[gnu::always_inline]] nce uint64_t convert_round_to_nearest_with_ties_away_from_zero(float64_t a) {
return vcvtad_u64_f64(a); }
1354template <
int n>[[gnu::always_inline]] nce int32_t convert(float32_t a) {
return vcvts_n_s32_f32(a, n); }
1355template <
int n>[[gnu::always_inline]] nce uint32_t convert(float32_t a) {
return vcvts_n_u32_f32(a, n); }
1356template <
int n>[[gnu::always_inline]] nce int64_t convert(float64_t a) {
return vcvtd_n_s64_f64(a, n); }
1357template <
int n>[[gnu::always_inline]] nce uint64_t convert(float64_t a) {
return vcvtd_n_u64_f64(a, n); }
1358template <> [[gnu::always_inline]] nce float32_t convert(int32_t a) {
return vcvts_f32_s32(a); }
1359template <> [[gnu::always_inline]] nce float32_t convert(uint32_t a) {
return vcvts_f32_u32(a); }
1360template <> [[gnu::always_inline]] nce float64x1_t convert(int64x1_t a) {
return vcvt_f64_s64(a); }
1361template <> [[gnu::always_inline]] nce float64x2_t convert(int64x2_t a) {
return vcvtq_f64_s64(a); }
1362template <> [[gnu::always_inline]] nce float64_t convert(int64_t a) {
return vcvtd_f64_s64(a); }
1363template <> [[gnu::always_inline]] nce float64_t convert(uint64_t a) {
return vcvtd_f64_u64(a); }
1364template <
int n>[[gnu::always_inline]] nce float32_t convert(int32_t a) {
return vcvts_n_f32_s32(a, n); }
1365template <
int n>[[gnu::always_inline]] nce float32_t convert(uint32_t a) {
return vcvts_n_f32_u32(a, n); }
1366template <
int n>[[gnu::always_inline]] nce float64x1_t convert(int64x1_t a) {
return vcvt_n_f64_s64(a, n); }
1367template <
int n>[[gnu::always_inline]] nce float64x2_t convert(int64x2_t a) {
return vcvtq_n_f64_s64(a, n); }
1368template <
int n>[[gnu::always_inline]] nce float64_t convert(int64_t a) {
return vcvtd_n_f64_s64(a, n); }
1369template <
int n>[[gnu::always_inline]] nce float64_t convert(uint64_t a) {
return vcvtd_n_f64_u64(a, n); }
1370[[gnu::always_inline]] nce float32_t convert_round_to_odd(float64_t a) {
return vcvtxd_f32_f64(a); }
1371template <> [[gnu::always_inline]] nce float64x1_t reinterpret(int64x1_t a) {
return vreinterpret_f64_s64(a); }
1372[[gnu::always_inline]] nce float64x2_t reinterpret(poly8x16_t a) {
return vreinterpretq_f64_p8(a); }
1373template <> [[gnu::always_inline]] nce float64x2_t reinterpret(poly16x8_t a) {
return vreinterpretq_f64_p16(a); }
1374template <> [[gnu::always_inline]] nce float64x2_t reinterpret(int64x2_t a) {
return vreinterpretq_f64_s64(a); }
1375[[gnu::always_inline]] nce float64x1_t reinterpret(poly64x1_t a) {
return vreinterpret_f64_p64(a); }
1376template <> [[gnu::always_inline]] nce float64x2_t reinterpret(poly64x2_t a) {
return vreinterpretq_f64_p64(a); }
1377template <> [[gnu::always_inline]] nce float64x2_t reinterpret(poly128_t a) {
return vreinterpretq_f64_p128(a); }
1378[[gnu::always_inline]] nce int8_t move_saturate_narrow(int16_t a) {
return vqmovnh_s16(a); }
1379[[gnu::always_inline]] nce int16_t move_saturate_narrow(int32_t a) {
return vqmovns_s32(a); }
1380[[gnu::always_inline]] nce int32_t move_saturate_narrow(int64_t a) {
return vqmovnd_s64(a); }
1381[[gnu::always_inline]] nce uint8_t move_saturate_narrow(uint16_t a) {
return vqmovnh_u16(a); }
1382[[gnu::always_inline]] nce uint16_t move_saturate_narrow(uint32_t a) {
return vqmovns_u32(a); }
1383[[gnu::always_inline]] nce uint32_t move_saturate_narrow(uint64_t a) {
return vqmovnd_u64(a); }
1384[[gnu::always_inline]] nce uint8_t move_unsigned_saturate_narrow(int16_t a) {
return vqmovunh_s16(a); }
1385[[gnu::always_inline]] nce uint16_t move_unsigned_saturate_narrow(int32_t a) {
return vqmovuns_s32(a); }
1386[[gnu::always_inline]] nce uint32_t move_unsigned_saturate_narrow(int64_t a) {
return vqmovund_s64(a); }
1387template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_add_long_lane_high(int64x2_t a, int32x4_t b, int32x2_t v) {
return vmlal_high_lane_s32(a, b, v, lane); }
1388template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_add_long_lane(int64x2_t a, int32x2_t b, int32x4_t v) {
return vmlal_laneq_s32(a, b, v, lane); }
1389template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_add_long_lane_high(int64x2_t a, int32x4_t b, int32x4_t v) {
return vmlal_high_laneq_s32(a, b, v, lane); }
1390template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_subtract_long_lane_high(int64x2_t a, int32x4_t b, int32x2_t v) {
return vmlsl_high_lane_s32(a, b, v, lane); }
1391template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_subtract_long_lane(int64x2_t a, int32x2_t b, int32x4_t v) {
return vmlsl_laneq_s32(a, b, v, lane); }
1392template <
int lane>[[gnu::always_inline]] nce int64x2_t multiply_subtract_long_lane_high(int64x2_t a, int32x4_t b, int32x4_t v) {
return vmlsl_high_laneq_s32(a, b, v, lane); }
1393template <
int lane>[[gnu::always_inline]] nce float32_t multiply_lane(float32_t a, float32x2_t v) {
return vmuls_lane_f32(a, v, lane); }
1394template <
int lane>[[gnu::always_inline]] nce float64_t multiply_lane(float64_t a, float64x1_t v) {
return vmuld_lane_f64(a, v, lane); }
1395template <
int lane>[[gnu::always_inline]] nce float32_t multiply_lane(float32_t a, float32x4_t v) {
return vmuls_laneq_f32(a, v, lane); }
1396template <
int lane>[[gnu::always_inline]] nce float64_t multiply_lane(float64_t a, float64x2_t v) {
return vmuld_laneq_f64(a, v, lane); }
1397[[gnu::always_inline]] nce int64x2_t multiply_add_long_high(int64x2_t a, int32x4_t b, int32_t c) {
return vmlal_high_n_s32(a, b, c); }
1398[[gnu::always_inline]] nce int64x2_t multiply_subtract_long_high(int64x2_t a, int32x4_t b, int32_t c) {
return vmlsl_high_n_s32(a, b, c); }
1399[[gnu::always_inline]] nce int64x1_t negate(int64x1_t a) {
return vneg_s64(a); }
1400[[gnu::always_inline]] nce int64_t negate(int64_t a) {
return vnegd_s64(a); }
1401[[gnu::always_inline]] nce int64x2_t negate(int64x2_t a) {
return vnegq_s64(a); }
1402[[gnu::always_inline]] nce int64x1_t negate_saturate(int64x1_t a) {
return vqneg_s64(a); }
1403[[gnu::always_inline]] nce int64x2_t negate_saturate(int64x2_t a) {
return vqnegq_s64(a); }
1404[[gnu::always_inline]] nce int8_t negate_saturate(int8_t a) {
return vqnegb_s8(a); }
1405[[gnu::always_inline]] nce int16_t negate_saturate(int16_t a) {
return vqnegh_s16(a); }
1406[[gnu::always_inline]] nce int32_t negate_saturate(int32_t a) {
return vqnegs_s32(a); }
1407[[gnu::always_inline]] nce int64_t negate_saturate(int64_t a) {
return vqnegd_s64(a); }
1408template <
int lane1,
int lane2>[[gnu::always_inline]] nce int64x1_t copy_lane(int64x1_t a, int64x1_t b) {
return vcopy_lane_s64(a, lane1, b, lane2); }
1409template <
int lane1,
int lane2>[[gnu::always_inline]] nce int64x2_t copy_lane(int64x2_t a, int64x1_t b) {
return vcopyq_lane_s64(a, lane1, b, lane2); }
1410template <
int lane1,
int lane2>[[gnu::always_inline]] nce poly8x16_t copy_lane(poly8x16_t a, poly8x8_t b) {
return vcopyq_lane_p8(a, lane1, b, lane2); }
1411template <
int lane1,
int lane2>[[gnu::always_inline]] nce poly16x8_t copy_lane(poly16x8_t a, poly16x4_t b) {
return vcopyq_lane_p16(a, lane1, b, lane2); }
1412template <
int lane1,
int lane2>[[gnu::always_inline]] nce int64x1_t copy_lane(int64x1_t a, int64x2_t b) {
return vcopy_laneq_s64(a, lane1, b, lane2); }
1413template <
int lane1,
int lane2>[[gnu::always_inline]] nce int64x2_t copy_lane(int64x2_t a, int64x2_t b) {
return vcopyq_laneq_s64(a, lane1, b, lane2); }
1414template <
int lane1,
int lane2>[[gnu::always_inline]] nce poly8x16_t copy_lane(poly8x16_t a, poly8x16_t b) {
return vcopyq_laneq_p8(a, lane1, b, lane2); }
1415template <
int lane1,
int lane2>[[gnu::always_inline]] nce poly16x8_t copy_lane(poly16x8_t a, poly16x8_t b) {
return vcopyq_laneq_p16(a, lane1, b, lane2); }
1416[[gnu::always_inline]] nce poly8x16_t reverse_bits(poly8x16_t a) {
return vrbitq_p8(a); }
1417template <> [[gnu::always_inline]] nce float64x1_t create(uint64_t a) {
return vcreate_f64(a); }
1418template <> [[gnu::always_inline]] nce float64x1_t duplicate(float64_t value) {
return vdup_n_f64(value); }
1419template <> [[gnu::always_inline]] nce float64x2_t duplicate(float64_t value) {
return vdupq_n_f64(value); }
1420template <> [[gnu::always_inline]] nce float64x1_t move(float64_t value) {
return vmov_n_f64(value); }
1421template <> [[gnu::always_inline]] nce float64x2_t move(float64_t value) {
return vmovq_n_f64(value); }
1422template <
int lane>[[gnu::always_inline]] nce int64x1_t duplicate_lane(int64x2_t vec) {
return vdup_laneq_s64(vec, lane); }
1423template <
int lane>[[gnu::always_inline]] nce int64x2_t duplicate_lane_quad(int64x2_t vec) {
return vdupq_laneq_s64(vec, lane); }
1424template <
int lane>[[gnu::always_inline]] nce poly64x1_t duplicate_lane(poly64x2_t vec) {
return vdup_laneq_p64(vec, lane); }
1425template <
int lane>[[gnu::always_inline]] nce poly64x2_t duplicate_lane_quad(poly64x2_t vec) {
return vdupq_laneq_p64(vec, lane); }
1426template <
int lane>[[gnu::always_inline]] nce poly8x8_t duplicate_lane(poly8x16_t vec) {
return vdup_laneq_p8(vec, lane); }
1427template <
int lane>[[gnu::always_inline]] nce poly8x16_t duplicate_lane_quad(poly8x16_t vec) {
return vdupq_laneq_p8(vec, lane); }
1428template <
int lane>[[gnu::always_inline]] nce poly16x4_t duplicate_lane(poly16x8_t vec) {
return vdup_laneq_p16(vec, lane); }
1429template <
int lane>[[gnu::always_inline]] nce poly16x8_t duplicate_lane_quad(poly16x8_t vec) {
return vdupq_laneq_p16(vec, lane); }
1430template <
int lane>[[gnu::always_inline]] nce int64_t duplicate_lane(int64x1_t vec) {
return vdupd_lane_s64(vec, lane); }
1431template <
int lane>[[gnu::always_inline]] nce int64_t duplicate_lane(int64x2_t vec) {
return vdupd_laneq_s64(vec, lane); }
1432[[gnu::always_inline]] nce int64x2_t zip1(int64x2_t a, int64x2_t b) {
return vzip1q_s64(a, b); }
1433[[gnu::always_inline]] nce poly64x2_t zip1(poly64x2_t a, poly64x2_t b) {
return vzip1q_p64(a, b); }
1434[[gnu::always_inline]] nce poly8x16_t zip1(poly8x16_t a, poly8x16_t b) {
return vzip1q_p8(a, b); }
1435[[gnu::always_inline]] nce poly16x8_t zip1(poly16x8_t a, poly16x8_t b) {
return vzip1q_p16(a, b); }
1436[[gnu::always_inline]] nce int64x2_t zip2(int64x2_t a, int64x2_t b) {
return vzip2q_s64(a, b); }
1437[[gnu::always_inline]] nce poly64x2_t zip2(poly64x2_t a, poly64x2_t b) {
return vzip2q_p64(a, b); }
1438[[gnu::always_inline]] nce poly8x16_t zip2(poly8x16_t a, poly8x16_t b) {
return vzip2q_p8(a, b); }
1439[[gnu::always_inline]] nce poly16x8_t zip2(poly16x8_t a, poly16x8_t b) {
return vzip2q_p16(a, b); }
1440[[gnu::always_inline]] nce int64x2_t unzip1(int64x2_t a, int64x2_t b) {
return vuzp1q_s64(a, b); }
1441[[gnu::always_inline]] nce poly64x2_t unzip1(poly64x2_t a, poly64x2_t b) {
return vuzp1q_p64(a, b); }
1442[[gnu::always_inline]] nce poly8x16_t unzip1(poly8x16_t a, poly8x16_t b) {
return vuzp1q_p8(a, b); }
1443[[gnu::always_inline]] nce poly16x8_t unzip1(poly16x8_t a, poly16x8_t b) {
return vuzp1q_p16(a, b); }
1444[[gnu::always_inline]] nce int64x2_t unzip2(int64x2_t a, int64x2_t b) {
return vuzp2q_s64(a, b); }
1445[[gnu::always_inline]] nce poly64x2_t unzip2(poly64x2_t a, poly64x2_t b) {
return vuzp2q_p64(a, b); }
1446[[gnu::always_inline]] nce poly8x16_t unzip2(poly8x16_t a, poly8x16_t b) {
return vuzp2q_p8(a, b); }
1447[[gnu::always_inline]] nce poly16x8_t unzip2(poly16x8_t a, poly16x8_t b) {
return vuzp2q_p16(a, b); }
1448[[gnu::always_inline]] nce int64x2_t transpose_step_1(int64x2_t a, int64x2_t b) {
return vtrn1q_s64(a, b); }
1449[[gnu::always_inline]] nce poly64x2_t transpose_step_1(poly64x2_t a, poly64x2_t b) {
return vtrn1q_p64(a, b); }
1450[[gnu::always_inline]] nce poly8x16_t transpose_step_1(poly8x16_t a, poly8x16_t b) {
return vtrn1q_p8(a, b); }
1451[[gnu::always_inline]] nce poly16x8_t transpose_step_1(poly16x8_t a, poly16x8_t b) {
return vtrn1q_p16(a, b); }
1452[[gnu::always_inline]] nce int64x2_t transpose_step_2(int64x2_t a, int64x2_t b) {
return vtrn2q_s64(a, b); }
1453[[gnu::always_inline]] nce poly64x2_t transpose_step_2(poly64x2_t a, poly64x2_t b) {
return vtrn2q_p64(a, b); }
1454[[gnu::always_inline]] nce poly8x16_t transpose_step_2(poly8x16_t a, poly8x16_t b) {
return vtrn2q_p8(a, b); }
1455[[gnu::always_inline]] nce poly16x8_t transpose_step_2(poly16x8_t a, poly16x8_t b) {
return vtrn2q_p16(a, b); }
1456template <
int lane>[[gnu::always_inline]] nce float64x1_t set_lane(float64_t a, float64x1_t v) {
return vset_lane_f64(a, v, lane); }
1457template <
int lane>[[gnu::always_inline]] nce float64x2_t set_lane(float64_t a, float64x2_t v) {
return vsetq_lane_f64(a, v, lane); }
1458template <> [[gnu::always_inline]]
inline float64x1_t load1(float64_t
const *ptr) {
return vld1_f64(ptr); }
1459template <> [[gnu::always_inline]]
inline float64x2_t load1(float64_t
const *ptr) {
return vld1q_f64(ptr); }
1460template <
int lane>[[gnu::always_inline]] nce float64x1_t load1_lane(float64_t
const *ptr, float64x1_t src) {
return vld1_lane_f64(ptr, src, lane); }
1461template <
int lane>[[gnu::always_inline]] nce float64x2_t load1_lane(float64_t
const *ptr, float64x2_t src) {
return vld1q_lane_f64(ptr, src, lane); }
1462template <
int lane>[[gnu::always_inline]] nce uint64x1_t load_acquire1_lane(uint64_t
const *ptr, uint64x1_t src) {
return vldap1_lane_u64(ptr, src, lane); }
1463template <
int lane>[[gnu::always_inline]] nce uint64x2_t load_acquire1_lane(uint64_t
const *ptr, uint64x2_t src) {
return vldap1q_lane_u64(ptr, src, lane); }
1464template <
int lane>[[gnu::always_inline]] nce int64x1_t load_acquire1_lane(int64_t
const *ptr, int64x1_t src) {
return vldap1_lane_s64(ptr, src, lane); }
1465template <
int lane>[[gnu::always_inline]] nce int64x2_t load_acquire1_lane(int64_t
const *ptr, int64x2_t src) {
return vldap1q_lane_s64(ptr, src, lane); }
1466template <
int lane>[[gnu::always_inline]] nce float64x1_t load_acquire1_lane(float64_t
const *ptr, float64x1_t src) {
return vldap1_lane_f64(ptr, src, lane); }
1467template <
int lane>[[gnu::always_inline]] nce float64x2_t load_acquire1_lane(float64_t
const *ptr, float64x2_t src) {
return vldap1q_lane_f64(ptr, src, lane); }
1468template <
int lane>[[gnu::always_inline]] nce poly64x1_t load_acquire1_lane(poly64_t
const *ptr, poly64x1_t src) {
return vldap1_lane_p64(ptr, src, lane); }
1469template <
int lane>[[gnu::always_inline]] nce poly64x2_t load_acquire1_lane(poly64_t
const *ptr, poly64x2_t src) {
return vldap1q_lane_p64(ptr, src, lane); }
1470template <> [[gnu::always_inline]]
inline float64x1_t load1_duplicate(float64_t
const *ptr) {
return vld1_dup_f64(ptr); }
1471template <> [[gnu::always_inline]]
inline float64x2_t load1_duplicate(float64_t
const *ptr) {
return vld1q_dup_f64(ptr); }
1472template <
int lane>[[gnu::always_inline]] nce
void store_release1_lane(uint64_t *ptr, uint64x1_t val) {
return vstl1_lane_u64(ptr, val, lane); }
1473template <
int lane>[[gnu::always_inline]] nce
void store_release1_lane(uint64_t *ptr, uint64x2_t val) {
return vstl1q_lane_u64(ptr, val, lane); }
1474template <
int lane>[[gnu::always_inline]] nce
void store_release1_lane(int64_t *ptr, int64x1_t val) {
return vstl1_lane_s64(ptr, val, lane); }
1475template <
int lane>[[gnu::always_inline]] nce
void store_release1_lane(int64_t *ptr, int64x2_t val) {
return vstl1q_lane_s64(ptr, val, lane); }
1476template <
int lane>[[gnu::always_inline]] nce
void store_release1_lane(float64_t *ptr, float64x1_t val) {
return vstl1_lane_f64(ptr, val, lane); }
1477template <
int lane>[[gnu::always_inline]] nce
void store_release1_lane(float64_t *ptr, float64x2_t val) {
return vstl1q_lane_f64(ptr, val, lane); }
1478template <
int lane>[[gnu::always_inline]] nce
void store_release1_lane(poly64_t *ptr, poly64x1_t val) {
return vstl1_lane_p64(ptr, val, lane); }
1479template <
int lane>[[gnu::always_inline]] nce
void store_release1_lane(poly64_t *ptr, poly64x2_t val) {
return vstl1q_lane_p64(ptr, val, lane); }
1480template <> [[gnu::always_inline]]
inline int64x2x2_t load2(int64_t
const *ptr) {
return vld2q_s64(ptr); }
1481template <> [[gnu::always_inline]]
inline uint64x2x2_t load2(uint64_t
const *ptr) {
return vld2q_u64(ptr); }
1482template <> [[gnu::always_inline]]
inline poly64x2x2_t load2(poly64_t
const *ptr) {
return vld2q_p64(ptr); }
1483template <> [[gnu::always_inline]]
inline float64x1x2_t load2(float64_t
const *ptr) {
return vld2_f64(ptr); }
1484template <> [[gnu::always_inline]]
inline float64x2x2_t load2(float64_t
const *ptr) {
return vld2q_f64(ptr); }
1485template <> [[gnu::always_inline]]
inline int64x2x3_t load3(int64_t
const *ptr) {
return vld3q_s64(ptr); }
1486template <> [[gnu::always_inline]]
inline uint64x2x3_t load3(uint64_t
const *ptr) {
return vld3q_u64(ptr); }
1487template <> [[gnu::always_inline]]
inline poly64x2x3_t load3(poly64_t
const *ptr) {
return vld3q_p64(ptr); }
1488template <> [[gnu::always_inline]]
inline float64x1x3_t load3(float64_t
const *ptr) {
return vld3_f64(ptr); }
1489template <> [[gnu::always_inline]]
inline float64x2x3_t load3(float64_t
const *ptr) {
return vld3q_f64(ptr); }
1490template <> [[gnu::always_inline]]
inline int64x2x4_t load4(int64_t
const *ptr) {
return vld4q_s64(ptr); }
1491template <> [[gnu::always_inline]]
inline uint64x2x4_t load4(uint64_t
const *ptr) {
return vld4q_u64(ptr); }
1492template <> [[gnu::always_inline]]
inline poly64x2x4_t load4(poly64_t
const *ptr) {
return vld4q_p64(ptr); }
1493template <> [[gnu::always_inline]]
inline float64x1x4_t load4(float64_t
const *ptr) {
return vld4_f64(ptr); }
1494template <> [[gnu::always_inline]]
inline float64x2x4_t load4(float64_t
const *ptr) {
return vld4q_f64(ptr); }
1495template <> [[gnu::always_inline]]
inline int64x2x2_t load2_duplicate(int64_t
const *ptr) {
return vld2q_dup_s64(ptr); }
1496template <> [[gnu::always_inline]]
inline uint64x2x2_t load2_duplicate(uint64_t
const *ptr) {
return vld2q_dup_u64(ptr); }
1497template <> [[gnu::always_inline]]
inline poly64x2x2_t load2_duplicate(poly64_t
const *ptr) {
return vld2q_dup_p64(ptr); }
1498template <> [[gnu::always_inline]]
inline float64x1x2_t load2_duplicate(float64_t
const *ptr) {
return vld2_dup_f64(ptr); }
1499template <> [[gnu::always_inline]]
inline float64x2x2_t load2_duplicate(float64_t
const *ptr) {
return vld2q_dup_f64(ptr); }
1500template <> [[gnu::always_inline]]
inline int64x2x3_t load3_duplicate(int64_t
const *ptr) {
return vld3q_dup_s64(ptr); }
1501template <> [[gnu::always_inline]]
inline uint64x2x3_t load3_duplicate(uint64_t
const *ptr) {
return vld3q_dup_u64(ptr); }
1502template <> [[gnu::always_inline]]
inline poly64x2x3_t load3_duplicate(poly64_t
const *ptr) {
return vld3q_dup_p64(ptr); }
1503template <> [[gnu::always_inline]]
inline float64x1x3_t load3_duplicate(float64_t
const *ptr) {
return vld3_dup_f64(ptr); }
1504template <> [[gnu::always_inline]]
inline float64x2x3_t load3_duplicate(float64_t
const *ptr) {
return vld3q_dup_f64(ptr); }
1505template <> [[gnu::always_inline]]
inline int64x2x4_t load4_duplicate(int64_t
const *ptr) {
return vld4q_dup_s64(ptr); }
1506template <> [[gnu::always_inline]]
inline uint64x2x4_t load4_duplicate(uint64_t
const *ptr) {
return vld4q_dup_u64(ptr); }
1507template <> [[gnu::always_inline]]
inline poly64x2x4_t load4_duplicate(poly64_t
const *ptr) {
return vld4q_dup_p64(ptr); }
1508template <> [[gnu::always_inline]]
inline float64x1x4_t load4_duplicate(float64_t
const *ptr) {
return vld4_dup_f64(ptr); }
1509template <> [[gnu::always_inline]]
inline float64x2x4_t load4_duplicate(float64_t
const *ptr) {
return vld4q_dup_f64(ptr); }
1510template <
int lane>[[gnu::always_inline]] nce int8x16x2_t load2_lane(int8_t
const *ptr, int8x16x2_t src) {
return vld2q_lane_s8(ptr, src, lane); }
1511template <
int lane>[[gnu::always_inline]] nce uint8x16x2_t load2_lane(uint8_t
const *ptr, uint8x16x2_t src) {
return vld2q_lane_u8(ptr, src, lane); }
1512template <
int lane>[[gnu::always_inline]] nce poly8x16x2_t load2_lane(poly8_t
const *ptr, poly8x16x2_t src) {
return vld2q_lane_p8(ptr, src, lane); }
1513template <
int lane>[[gnu::always_inline]] nce int64x1x2_t load2_lane(int64_t
const *ptr, int64x1x2_t src) {
return vld2_lane_s64(ptr, src, lane); }
1514template <
int lane>[[gnu::always_inline]] nce int64x2x2_t load2_lane(int64_t
const *ptr, int64x2x2_t src) {
return vld2q_lane_s64(ptr, src, lane); }
1515template <
int lane>[[gnu::always_inline]] nce uint64x1x2_t load2_lane(uint64_t
const *ptr, uint64x1x2_t src) {
return vld2_lane_u64(ptr, src, lane); }
1516template <
int lane>[[gnu::always_inline]] nce uint64x2x2_t load2_lane(uint64_t
const *ptr, uint64x2x2_t src) {
return vld2q_lane_u64(ptr, src, lane); }
1517template <
int lane>[[gnu::always_inline]] nce poly64x1x2_t load2_lane(poly64_t
const *ptr, poly64x1x2_t src) {
return vld2_lane_p64(ptr, src, lane); }
1518template <
int lane>[[gnu::always_inline]] nce poly64x2x2_t load2_lane(poly64_t
const *ptr, poly64x2x2_t src) {
return vld2q_lane_p64(ptr, src, lane); }
1519template <
int lane>[[gnu::always_inline]] nce float64x1x2_t load2_lane(float64_t
const *ptr, float64x1x2_t src) {
return vld2_lane_f64(ptr, src, lane); }
1520template <
int lane>[[gnu::always_inline]] nce float64x2x2_t load2_lane(float64_t
const *ptr, float64x2x2_t src) {
return vld2q_lane_f64(ptr, src, lane); }
1521template <
int lane>[[gnu::always_inline]] nce int8x16x3_t load3_lane(int8_t
const *ptr, int8x16x3_t src) {
return vld3q_lane_s8(ptr, src, lane); }
1522template <
int lane>[[gnu::always_inline]] nce uint8x16x3_t load3_lane(uint8_t
const *ptr, uint8x16x3_t src) {
return vld3q_lane_u8(ptr, src, lane); }
1523template <
int lane>[[gnu::always_inline]] nce poly8x16x3_t load3_lane(poly8_t
const *ptr, poly8x16x3_t src) {
return vld3q_lane_p8(ptr, src, lane); }
1524template <
int lane>[[gnu::always_inline]] nce int64x1x3_t load3_lane(int64_t
const *ptr, int64x1x3_t src) {
return vld3_lane_s64(ptr, src, lane); }
1525template <
int lane>[[gnu::always_inline]] nce int64x2x3_t load3_lane(int64_t
const *ptr, int64x2x3_t src) {
return vld3q_lane_s64(ptr, src, lane); }
1526template <
int lane>[[gnu::always_inline]] nce uint64x1x3_t load3_lane(uint64_t
const *ptr, uint64x1x3_t src) {
return vld3_lane_u64(ptr, src, lane); }
1527template <
int lane>[[gnu::always_inline]] nce uint64x2x3_t load3_lane(uint64_t
const *ptr, uint64x2x3_t src) {
return vld3q_lane_u64(ptr, src, lane); }
1528template <
int lane>[[gnu::always_inline]] nce poly64x1x3_t load3_lane(poly64_t
const *ptr, poly64x1x3_t src) {
return vld3_lane_p64(ptr, src, lane); }
1529template <
int lane>[[gnu::always_inline]] nce poly64x2x3_t load3_lane(poly64_t
const *ptr, poly64x2x3_t src) {
return vld3q_lane_p64(ptr, src, lane); }
1530template <
int lane>[[gnu::always_inline]] nce float64x1x3_t load3_lane(float64_t
const *ptr, float64x1x3_t src) {
return vld3_lane_f64(ptr, src, lane); }
1531template <
int lane>[[gnu::always_inline]] nce float64x2x3_t load3_lane(float64_t
const *ptr, float64x2x3_t src) {
return vld3q_lane_f64(ptr, src, lane); }
1532template <
int lane>[[gnu::always_inline]] nce int8x16x4_t load4_lane(int8_t
const *ptr, int8x16x4_t src) {
return vld4q_lane_s8(ptr, src, lane); }
1533template <
int lane>[[gnu::always_inline]] nce uint8x16x4_t load4_lane(uint8_t
const *ptr, uint8x16x4_t src) {
return vld4q_lane_u8(ptr, src, lane); }
1534template <
int lane>[[gnu::always_inline]] nce poly8x16x4_t load4_lane(poly8_t
const *ptr, poly8x16x4_t src) {
return vld4q_lane_p8(ptr, src, lane); }
1535template <
int lane>[[gnu::always_inline]] nce int64x1x4_t load4_lane(int64_t
const *ptr, int64x1x4_t src) {
return vld4_lane_s64(ptr, src, lane); }
1536template <
int lane>[[gnu::always_inline]] nce int64x2x4_t load4_lane(int64_t
const *ptr, int64x2x4_t src) {
return vld4q_lane_s64(ptr, src, lane); }
1537template <
int lane>[[gnu::always_inline]] nce uint64x1x4_t load4_lane(uint64_t
const *ptr, uint64x1x4_t src) {
return vld4_lane_u64(ptr, src, lane); }
1538template <
int lane>[[gnu::always_inline]] nce uint64x2x4_t load4_lane(uint64_t
const *ptr, uint64x2x4_t src) {
return vld4q_lane_u64(ptr, src, lane); }
1539template <
int lane>[[gnu::always_inline]] nce poly64x1x4_t load4_lane(poly64_t
const *ptr, poly64x1x4_t src) {
return vld4_lane_p64(ptr, src, lane); }
1540template <
int lane>[[gnu::always_inline]] nce poly64x2x4_t load4_lane(poly64_t
const *ptr, poly64x2x4_t src) {
return vld4q_lane_p64(ptr, src, lane); }
1541template <
int lane>[[gnu::always_inline]] nce float64x1x4_t load4_lane(float64_t
const *ptr, float64x1x4_t src) {
return vld4_lane_f64(ptr, src, lane); }
1542template <
int lane>[[gnu::always_inline]] nce float64x2x4_t load4_lane(float64_t
const *ptr, float64x2x4_t src) {
return vld4q_lane_f64(ptr, src, lane); }
1543template <> [[gnu::always_inline]]
inline float64x1x2_t load1_x2(float64_t
const *ptr) {
return vld1_f64_x2(ptr); }
1544template <> [[gnu::always_inline]]
inline float64x2x2_t load1_x2(float64_t
const *ptr) {
return vld1q_f64_x2(ptr); }
1545template <> [[gnu::always_inline]]
inline float64x1x3_t load1_x3(float64_t
const *ptr) {
return vld1_f64_x3(ptr); }
1546template <> [[gnu::always_inline]]
inline float64x2x3_t load1_x3(float64_t
const *ptr) {
return vld1q_f64_x3(ptr); }
1547template <> [[gnu::always_inline]]
inline float64x1x4_t load1_x4(float64_t
const *ptr) {
return vld1_f64_x4(ptr); }
1548template <> [[gnu::always_inline]]
inline float64x2x4_t load1_x4(float64_t
const *ptr) {
return vld1q_f64_x4(ptr); }
1549template <> [[gnu::always_inline]]
inline void store1(float64_t *ptr, float64x1_t val) {
return vst1_f64(ptr, val); }
1550template <> [[gnu::always_inline]]
inline void store1(float64_t *ptr, float64x2_t val) {
return vst1q_f64(ptr, val); }
1551template <
int lane>[[gnu::always_inline]] nce
void store1_lane(float64_t *ptr, float64x1_t val) {
return vst1_lane_f64(ptr, val, lane); }
1552template <
int lane>[[gnu::always_inline]] nce
void store1_lane(float64_t *ptr, float64x2_t val) {
return vst1q_lane_f64(ptr, val, lane); }
1553template <> [[gnu::always_inline]]
inline void store2(int64_t *ptr, int64x2x2_t val) {
return vst2q_s64(ptr, val); }
1554template <> [[gnu::always_inline]]
inline void store2(uint64_t *ptr, uint64x2x2_t val) {
return vst2q_u64(ptr, val); }
1555template <> [[gnu::always_inline]]
inline void store2(poly64_t *ptr, poly64x2x2_t val) {
return vst2q_p64(ptr, val); }
1556template <> [[gnu::always_inline]]
inline void store2(float64_t *ptr, float64x1x2_t val) {
return vst2_f64(ptr, val); }
1557template <> [[gnu::always_inline]]
inline void store2(float64_t *ptr, float64x2x2_t val) {
return vst2q_f64(ptr, val); }
1558template <> [[gnu::always_inline]]
inline void store3(int64_t *ptr, int64x2x3_t val) {
return vst3q_s64(ptr, val); }
1559template <> [[gnu::always_inline]]
inline void store3(uint64_t *ptr, uint64x2x3_t val) {
return vst3q_u64(ptr, val); }
1560template <> [[gnu::always_inline]]
inline void store3(poly64_t *ptr, poly64x2x3_t val) {
return vst3q_p64(ptr, val); }
1561template <> [[gnu::always_inline]]
inline void store3(float64_t *ptr, float64x1x3_t val) {
return vst3_f64(ptr, val); }
1562template <> [[gnu::always_inline]]
inline void store3(float64_t *ptr, float64x2x3_t val) {
return vst3q_f64(ptr, val); }
1563template <> [[gnu::always_inline]]
inline void store4(int64_t *ptr, int64x2x4_t val) {
return vst4q_s64(ptr, val); }
1564template <> [[gnu::always_inline]]
inline void store4(uint64_t *ptr, uint64x2x4_t val) {
return vst4q_u64(ptr, val); }
1565template <> [[gnu::always_inline]]
inline void store4(poly64_t *ptr, poly64x2x4_t val) {
return vst4q_p64(ptr, val); }
1566template <> [[gnu::always_inline]]
inline void store4(float64_t *ptr, float64x1x4_t val) {
return vst4_f64(ptr, val); }
1567template <> [[gnu::always_inline]]
inline void store4(float64_t *ptr, float64x2x4_t val) {
return vst4q_f64(ptr, val); }
1568template <
int lane>[[gnu::always_inline]] nce
void store2_lane(int8_t *ptr, int8x16x2_t val) {
return vst2q_lane_s8(ptr, val, lane); }
1569template <
int lane>[[gnu::always_inline]] nce
void store2_lane(uint8_t *ptr, uint8x16x2_t val) {
return vst2q_lane_u8(ptr, val, lane); }
1570template <
int lane>[[gnu::always_inline]] nce
void store2_lane(poly8_t *ptr, poly8x16x2_t val) {
return vst2q_lane_p8(ptr, val, lane); }
1571template <
int lane>[[gnu::always_inline]] nce
void store2_lane(int64_t *ptr, int64x1x2_t val) {
return vst2_lane_s64(ptr, val, lane); }
1572template <
int lane>[[gnu::always_inline]] nce
void store2_lane(int64_t *ptr, int64x2x2_t val) {
return vst2q_lane_s64(ptr, val, lane); }
1573template <
int lane>[[gnu::always_inline]] nce
void store2_lane(uint64_t *ptr, uint64x1x2_t val) {
return vst2_lane_u64(ptr, val, lane); }
1574template <
int lane>[[gnu::always_inline]] nce
void store2_lane(uint64_t *ptr, uint64x2x2_t val) {
return vst2q_lane_u64(ptr, val, lane); }
1575template <
int lane>[[gnu::always_inline]] nce
void store2_lane(poly64_t *ptr, poly64x1x2_t val) {
return vst2_lane_p64(ptr, val, lane); }
1576template <
int lane>[[gnu::always_inline]] nce
void store2_lane(poly64_t *ptr, poly64x2x2_t val) {
return vst2q_lane_p64(ptr, val, lane); }
1577template <
int lane>[[gnu::always_inline]] nce
void store2_lane(float64_t *ptr, float64x1x2_t val) {
return vst2_lane_f64(ptr, val, lane); }
1578template <
int lane>[[gnu::always_inline]] nce
void store2_lane(float64_t *ptr, float64x2x2_t val) {
return vst2q_lane_f64(ptr, val, lane); }
1579template <
int lane>[[gnu::always_inline]] nce
void store3_lane(int64_t *ptr, int64x1x3_t val) {
return vst3_lane_s64(ptr, val, lane); }
1580template <
int lane>[[gnu::always_inline]] nce
void store3_lane(int64_t *ptr, int64x2x3_t val) {
return vst3q_lane_s64(ptr, val, lane); }
1581template <
int lane>[[gnu::always_inline]] nce
void store3_lane(uint64_t *ptr, uint64x1x3_t val) {
return vst3_lane_u64(ptr, val, lane); }
1582template <
int lane>[[gnu::always_inline]] nce
void store3_lane(uint64_t *ptr, uint64x2x3_t val) {
return vst3q_lane_u64(ptr, val, lane); }
1583template <
int lane>[[gnu::always_inline]] nce
void store3_lane(poly64_t *ptr, poly64x1x3_t val) {
return vst3_lane_p64(ptr, val, lane); }
1584template <
int lane>[[gnu::always_inline]] nce
void store3_lane(poly64_t *ptr, poly64x2x3_t val) {
return vst3q_lane_p64(ptr, val, lane); }
1585template <
int lane>[[gnu::always_inline]] nce
void store3_lane(float64_t *ptr, float64x1x3_t val) {
return vst3_lane_f64(ptr, val, lane); }
1586template <
int lane>[[gnu::always_inline]] nce
void store3_lane(float64_t *ptr, float64x2x3_t val) {
return vst3q_lane_f64(ptr, val, lane); }
1587template <
int lane>[[gnu::always_inline]] nce
void store4_lane(int8_t *ptr, int8x16x4_t val) {
return vst4q_lane_s8(ptr, val, lane); }
1588template <
int lane>[[gnu::always_inline]] nce
void store4_lane(uint8_t *ptr, uint8x16x4_t val) {
return vst4q_lane_u8(ptr, val, lane); }
1589template <
int lane>[[gnu::always_inline]] nce
void store4_lane(poly8_t *ptr, poly8x16x4_t val) {
return vst4q_lane_p8(ptr, val, lane); }
1590template <
int lane>[[gnu::always_inline]] nce
void store4_lane(int64_t *ptr, int64x1x4_t val) {
return vst4_lane_s64(ptr, val, lane); }
1591template <
int lane>[[gnu::always_inline]] nce
void store4_lane(int64_t *ptr, int64x2x4_t val) {
return vst4q_lane_s64(ptr, val, lane); }
1592template <
int lane>[[gnu::always_inline]] nce
void store4_lane(uint64_t *ptr, uint64x1x4_t val) {
return vst4_lane_u64(ptr, val, lane); }
1593template <
int lane>[[gnu::always_inline]] nce
void store4_lane(uint64_t *ptr, uint64x2x4_t val) {
return vst4q_lane_u64(ptr, val, lane); }
1594template <
int lane>[[gnu::always_inline]] nce
void store4_lane(poly64_t *ptr, poly64x1x4_t val) {
return vst4_lane_p64(ptr, val, lane); }
1595template <
int lane>[[gnu::always_inline]] nce
void store4_lane(poly64_t *ptr, poly64x2x4_t val) {
return vst4q_lane_p64(ptr, val, lane); }
1596template <
int lane>[[gnu::always_inline]] nce
void store4_lane(float64_t *ptr, float64x1x4_t val) {
return vst4_lane_f64(ptr, val, lane); }
1597template <
int lane>[[gnu::always_inline]] nce
void store4_lane(float64_t *ptr, float64x2x4_t val) {
return vst4q_lane_f64(ptr, val, lane); }
1598[[gnu::always_inline]]
inline void store1_x2(float64_t *ptr, float64x1x2_t val) {
return vst1_f64_x2(ptr, val); }
1599[[gnu::always_inline]]
inline void store1_x2(float64_t *ptr, float64x2x2_t val) {
return vst1q_f64_x2(ptr, val); }
1600[[gnu::always_inline]]
inline void store1_x3(float64_t *ptr, float64x1x3_t val) {
return vst1_f64_x3(ptr, val); }
1601[[gnu::always_inline]]
inline void store1_x3(float64_t *ptr, float64x2x3_t val) {
return vst1q_f64_x3(ptr, val); }
1602[[gnu::always_inline]]
inline void store1_x4(float64_t *ptr, float64x1x4_t val) {
return vst1_f64_x4(ptr, val); }
1603[[gnu::always_inline]]
inline void store1_x4(float64_t *ptr, float64x2x4_t val) {
return vst1q_f64_x4(ptr, val); }
1604[[gnu::always_inline]] nce poly8x8_t table_lookup1_saturate(poly8x16_t t, uint8x8_t idx) {
return vqtbl1_p8(t, idx); }
1605[[gnu::always_inline]] nce poly8x16_t table_lookup1_saturate(poly8x16_t t, uint8x16_t idx) {
return vqtbl1q_p8(t, idx); }
1606[[gnu::always_inline]] nce int8x8_t table_lookup2_saturate(int8x16x2_t t, uint8x8_t idx) {
return vqtbl2_s8(t, idx); }
1607[[gnu::always_inline]] nce int8x16_t table_lookup2_saturate(int8x16x2_t t, uint8x16_t idx) {
return vqtbl2q_s8(t, idx); }
1608[[gnu::always_inline]] nce uint8x8_t table_lookup2_saturate(uint8x16x2_t t, uint8x8_t idx) {
return vqtbl2_u8(t, idx); }
1609[[gnu::always_inline]] nce uint8x16_t table_lookup2_saturate(uint8x16x2_t t, uint8x16_t idx) {
return vqtbl2q_u8(t, idx); }
1610[[gnu::always_inline]] nce poly8x8_t table_lookup2_saturate(poly8x16x2_t t, uint8x8_t idx) {
return vqtbl2_p8(t, idx); }
1611[[gnu::always_inline]] nce poly8x16_t table_lookup2_saturate(poly8x16x2_t t, uint8x16_t idx) {
return vqtbl2q_p8(t, idx); }
1612[[gnu::always_inline]] nce int8x8_t table_lookup3_saturate(int8x16x3_t t, uint8x8_t idx) {
return vqtbl3_s8(t, idx); }
1613[[gnu::always_inline]] nce int8x16_t table_lookup3_saturate(int8x16x3_t t, uint8x16_t idx) {
return vqtbl3q_s8(t, idx); }
1614[[gnu::always_inline]] nce uint8x8_t table_lookup3_saturate(uint8x16x3_t t, uint8x8_t idx) {
return vqtbl3_u8(t, idx); }
1615[[gnu::always_inline]] nce uint8x16_t table_lookup3_saturate(uint8x16x3_t t, uint8x16_t idx) {
return vqtbl3q_u8(t, idx); }
1616[[gnu::always_inline]] nce poly8x8_t table_lookup3_saturate(poly8x16x3_t t, uint8x8_t idx) {
return vqtbl3_p8(t, idx); }
1617[[gnu::always_inline]] nce poly8x16_t table_lookup3_saturate(poly8x16x3_t t, uint8x16_t idx) {
return vqtbl3q_p8(t, idx); }
1618[[gnu::always_inline]] nce int8x8_t table_lookup4_saturate(int8x16x4_t t, uint8x8_t idx) {
return vqtbl4_s8(t, idx); }
1619[[gnu::always_inline]] nce int8x16_t table_lookup4_saturate(int8x16x4_t t, uint8x16_t idx) {
return vqtbl4q_s8(t, idx); }
1620[[gnu::always_inline]] nce uint8x8_t table_lookup4_saturate(uint8x16x4_t t, uint8x8_t idx) {
return vqtbl4_u8(t, idx); }
1621[[gnu::always_inline]] nce uint8x16_t table_lookup4_saturate(uint8x16x4_t t, uint8x16_t idx) {
return vqtbl4q_u8(t, idx); }
1622[[gnu::always_inline]] nce poly8x8_t table_lookup4_saturate(poly8x16x4_t t, uint8x8_t idx) {
return vqtbl4_p8(t, idx); }
1623[[gnu::always_inline]] nce poly8x16_t table_lookup4_saturate(poly8x16x4_t t, uint8x16_t idx) {
return vqtbl4q_p8(t, idx); }
1624[[gnu::always_inline]] nce poly8x16_t table_extend1_saturate(poly8x16_t a, poly8x16_t t, uint8x16_t idx) {
return vqtbx1q_p8(a, t, idx); }
1625[[gnu::always_inline]] nce poly8x16_t table_extend2_saturate(poly8x16_t a, poly8x16x2_t t, uint8x16_t idx) {
return vqtbx2q_p8(a, t, idx); }
1626[[gnu::always_inline]] nce poly8x16_t table_extend3_saturate(poly8x16_t a, poly8x16x3_t t, uint8x16_t idx) {
return vqtbx3q_p8(a, t, idx); }
1627[[gnu::always_inline]] nce poly8x16_t table_extend4_saturate(poly8x16_t a, poly8x16x4_t t, uint8x16_t idx) {
return vqtbx4q_p8(a, t, idx); }
1628#ifdef __ARM_FEATURE_LUT
1629template <
int index>[[gnu::always_inline]] nce uint8x16_t lookup_table_2bit_index_lane(uint8x8_t vn, uint8x8_t vm) {
return vluti2_lane_u8(vn, vm, index); }
1630template <
int index>[[gnu::always_inline]] nce uint8x16_t lookup_table_2bit_index_lane(uint8x8_t vn, uint8x16_t vm) {
return vluti2_laneq_u8(vn, vm, index); }
1631template <
int index>[[gnu::always_inline]] nce uint8x16_t lookup_table_2bit_index_lane(uint8x16_t vn, uint8x8_t vm) {
return vluti2q_lane_u8(vn, vm, index); }
1632template <
int index>[[gnu::always_inline]] nce uint8x16_t lookup_table_4bit_index_lane(uint8x16_t vn, uint8x8_t vm) {
return vluti4q_lane_u8(vn, vm, index); }
1633template <
int index>[[gnu::always_inline]] nce uint8x16_t lookup_table_2bit_index_lane(uint8x16_t vn, uint8x16_t vm) {
return vluti2q_laneq_u8(vn, vm, index); }
1634template <
int index>[[gnu::always_inline]] nce uint8x16_t lookup_table_4bit_index_lane(uint8x16_t vn, uint8x16_t vm) {
return vluti4q_laneq_u8(vn, vm, index); }
1635template <
int index>[[gnu::always_inline]] nce int8x16_t lookup_table_2bit_index_lane(int8x8_t vn, uint8x8_t vm) {
return vluti2_lane_s8(vn, vm, index); }
1636template <
int index>[[gnu::always_inline]] nce int8x16_t lookup_table_2bit_index_lane(int8x8_t vn, uint8x16_t vm) {
return vluti2_laneq_s8(vn, vm, index); }
1637template <
int index>[[gnu::always_inline]] nce int8x16_t lookup_table_2bit_index_lane(int8x16_t vn, uint8x8_t vm) {
return vluti2q_lane_s8(vn, vm, index); }
1638template <
int index>[[gnu::always_inline]] nce int8x16_t lookup_table_4bit_index_lane(int8x16_t vn, uint8x8_t vm) {
return vluti4q_lane_s8(vn, vm, index); }
1639template <
int index>[[gnu::always_inline]] nce int8x16_t lookup_table_2bit_index_lane(int8x16_t vn, uint8x16_t vm) {
return vluti2q_laneq_s8(vn, vm, index); }
1640template <
int index>[[gnu::always_inline]] nce int8x16_t lookup_table_4bit_index_lane(int8x16_t vn, uint8x16_t vm) {
return vluti4q_laneq_s8(vn, vm, index); }
1641template <
int index>[[gnu::always_inline]] nce uint16x8_t lookup_table_2bit_index_lane(uint16x4_t vn, uint8x8_t vm) {
return vluti2_lane_u16(vn, vm, index); }
1642template <
int index>[[gnu::always_inline]] nce uint16x8_t lookup_table_2bit_index_lane(uint16x4_t vn, uint8x16_t vm) {
return vluti2_laneq_u16(vn, vm, index); }
1643template <
int index>[[gnu::always_inline]] nce uint16x8_t lookup_table_2bit_index_lane(uint16x8_t vn, uint8x8_t vm) {
return vluti2q_lane_u16(vn, vm, index); }
1644template <
int index>[[gnu::always_inline]] nce uint16x8_t lookup_table_2bit_index_lane(uint16x8_t vn, uint8x16_t vm) {
return vluti2q_laneq_u16(vn, vm, index); }
1645template <
int index>[[gnu::always_inline]] nce int16x8_t lookup_table_2bit_index_lane(int16x4_t vn, uint8x8_t vm) {
return vluti2_lane_s16(vn, vm, index); }
1646template <
int index>[[gnu::always_inline]] nce int16x8_t lookup_table_2bit_index_lane(int16x4_t vn, uint8x16_t vm) {
return vluti2_laneq_s16(vn, vm, index); }
1647template <
int index>[[gnu::always_inline]] nce int16x8_t lookup_table_2bit_index_lane(int16x8_t vn, uint8x8_t vm) {
return vluti2q_lane_s16(vn, vm, index); }
1648template <
int index>[[gnu::always_inline]] nce int16x8_t lookup_table_2bit_index_lane(int16x8_t vn, uint8x16_t vm) {
return vluti2q_laneq_s16(vn, vm, index); }
1649template <
int index>[[gnu::always_inline]] nce float16x8_t lookup_table_2bit_index_lane(float16x4_t vn, uint8x8_t vm) {
return vluti2_lane_f16(vn, vm, index); }
1650template <
int index>[[gnu::always_inline]] nce float16x8_t lookup_table_2bit_index_lane(float16x4_t vn, uint8x16_t vm) {
return vluti2_laneq_f16(vn, vm, index); }
1651template <
int index>[[gnu::always_inline]] nce float16x8_t lookup_table_2bit_index_lane(float16x8_t vn, uint8x8_t vm) {
return vluti2q_lane_f16(vn, vm, index); }
1652template <
int index>[[gnu::always_inline]] nce float16x8_t lookup_table_2bit_index_lane(float16x8_t vn, uint8x16_t vm) {
return vluti2q_laneq_f16(vn, vm, index); }
1653template <
int index>[[gnu::always_inline]] nce poly8x16_t lookup_table_2bit_index_lane(poly8x8_t vn, uint8x8_t vm) {
return vluti2_lane_p8(vn, vm, index); }
1654template <
int index>[[gnu::always_inline]] nce poly8x16_t lookup_table_2bit_index_lane(poly8x8_t vn, uint8x16_t vm) {
return vluti2_laneq_p8(vn, vm, index); }
1655template <
int index>[[gnu::always_inline]] nce poly16x8_t lookup_table_2bit_index_lane(poly16x4_t vn, uint8x8_t vm) {
return vluti2_lane_p16(vn, vm, index); }
1656template <
int index>[[gnu::always_inline]] nce poly16x8_t lookup_table_2bit_index_lane(poly16x4_t vn, uint8x16_t vm) {
return vluti2_laneq_p16(vn, vm, index); }
1657template <
int index>[[gnu::always_inline]] nce poly8x16_t lookup_table_2bit_index_lane(poly8x16_t vn, uint8x8_t vm) {
return vluti2q_lane_p8(vn, vm, index); }
1658template <
int index>[[gnu::always_inline]] nce poly8x16_t lookup_table_2bit_index_lane(poly8x16_t vn, uint8x16_t vm) {
return vluti2q_laneq_p8(vn, vm, index); }
1659template <
int index>[[gnu::always_inline]] nce bfloat16x8_t lookup_table_2bit_index_lane(bfloat16x4_t vn, uint8x8_t vm) {
return vluti2_lane_bf16(vn, vm, index); }
1660template <
int index>[[gnu::always_inline]] nce bfloat16x8_t lookup_table_2bit_index_lane(bfloat16x4_t vn, uint8x16_t vm) {
return vluti2_laneq_bf16(vn, vm, index); }
1661template <
int index>[[gnu::always_inline]] nce bfloat16x8_t lookup_table_2bit_index_lane(bfloat16x8_t vn, uint8x8_t vm) {
return vluti2q_lane_bf16(vn, vm, index); }
1662template <
int index>[[gnu::always_inline]] nce bfloat16x8_t lookup_table_2bit_index_lane(bfloat16x8_t vn, uint8x16_t vm) {
return vluti2q_laneq_bf16(vn, vm, index); }
1663template <
int index>[[gnu::always_inline]] nce poly16x8_t lookup_table_2bit_index_lane(poly16x8_t vn, uint8x8_t vm) {
return vluti2q_lane_p16(vn, vm, index); }
1664template <
int index>[[gnu::always_inline]] nce poly16x8_t lookup_table_2bit_index_lane(poly16x8_t vn, uint8x16_t vm) {
return vluti2q_laneq_p16(vn, vm, index); }
1665template <
int index>[[gnu::always_inline]] nce poly8x16_t lookup_table_4bit_index_lane(poly8x16_t vn, uint8x8_t vm) {
return vluti4q_lane_p8(vn, vm, index); }
1666template <
int index>[[gnu::always_inline]] nce poly8x16_t lookup_table_4bit_index_lane(poly8x16_t vn, uint8x16_t vm) {
return vluti4q_laneq_p8(vn, vm, index); }
1667template <
int index>[[gnu::always_inline]] nce uint16x8_t lookup_table_4bit_index_lane(uint16x8x2_t vn, uint8x8_t vm) {
return vluti4q_lane_u16_x2(vn, vm, index); }
1668template <
int index>[[gnu::always_inline]] nce uint16x8_t lookup_table_4bit_index_lane(uint16x8x2_t vn, uint8x16_t vm) {
return vluti4q_laneq_u16_x2(vn, vm, index); }
1669template <
int index>[[gnu::always_inline]] nce int16x8_t lookup_table_4bit_index_lane(int16x8x2_t vn, uint8x8_t vm) {
return vluti4q_lane_s16_x2(vn, vm, index); }
1670template <
int index>[[gnu::always_inline]] nce int16x8_t lookup_table_4bit_index_lane(int16x8x2_t vn, uint8x16_t vm) {
return vluti4q_laneq_s16_x2(vn, vm, index); }
1671template <
int index>[[gnu::always_inline]] nce float16x8_t lookup_table_4bit_index_lane(float16x8x2_t vn, uint8x8_t vm) {
return vluti4q_lane_f16_x2(vn, vm, index); }
1672template <
int index>[[gnu::always_inline]] nce float16x8_t lookup_table_4bit_index_lane(float16x8x2_t vn, uint8x16_t vm) {
return vluti4q_laneq_f16_x2(vn, vm, index); }
1673template <
int index>[[gnu::always_inline]] nce bfloat16x8_t lookup_table_4bit_index_lane(bfloat16x8x2_t vn, uint8x8_t vm) {
return vluti4q_lane_bf16_x2(vn, vm, index); }
1674template <
int index>[[gnu::always_inline]] nce bfloat16x8_t lookup_table_4bit_index_lane(bfloat16x8x2_t vn, uint8x16_t vm) {
return vluti4q_laneq_bf16_x2(vn, vm, index); }
1675template <
int index>[[gnu::always_inline]] nce poly16x8_t lookup_table_4bit_index_lane(poly16x8x2_t vn, uint8x8_t vm) {
return vluti4q_lane_p16_x2(vn, vm, index); }
1676template <
int index>[[gnu::always_inline]] nce poly16x8_t lookup_table_4bit_index_lane(poly16x8x2_t vn, uint8x16_t vm) {
return vluti4q_laneq_p16_x2(vn, vm, index); }
1678[[gnu::always_inline]] nce int16_t multiply_double_add_round_saturate_high(int16_t a, int16_t b, int16_t c) {
return vqrdmlahh_s16(a, b, c); }
1679[[gnu::always_inline]] nce int32_t multiply_double_add_round_saturate_high(int32_t a, int32_t b, int32_t c) {
return vqrdmlahs_s32(a, b, c); }
1680[[gnu::always_inline]] nce int16_t multiply_double_subtract_round_saturate_high(int16_t a, int16_t b, int16_t c) {
return vqrdmlshh_s16(a, b, c); }
1681[[gnu::always_inline]] nce int32_t multiply_double_subtract_round_saturate_high(int32_t a, int32_t b, int32_t c) {
return vqrdmlshs_s32(a, b, c); }
1682template <
int lane>[[gnu::always_inline]] nce int16_t multiply_double_add_round_saturate_high_lane(int16_t a, int16_t b, int16x4_t v) {
return vqrdmlahh_lane_s16(a, b, v, lane); }
1683template <
int lane>[[gnu::always_inline]] nce int16_t multiply_double_add_round_saturate_high_lane(int16_t a, int16_t b, int16x8_t v) {
return vqrdmlahh_laneq_s16(a, b, v, lane); }
1684template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_add_round_saturate_high_lane(int32_t a, int32_t b, int32x2_t v) {
return vqrdmlahs_lane_s32(a, b, v, lane); }
1685template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_add_round_saturate_high_lane(int32_t a, int32_t b, int32x4_t v) {
return vqrdmlahs_laneq_s32(a, b, v, lane); }
1686template <
int lane>[[gnu::always_inline]] nce int16_t multiply_double_subtract_round_saturate_high_lane(int16_t a, int16_t b, int16x4_t v) {
return vqrdmlshh_lane_s16(a, b, v, lane); }
1687template <
int lane>[[gnu::always_inline]] nce int16_t multiply_double_subtract_round_saturate_high_lane(int16_t a, int16_t b, int16x8_t v) {
return vqrdmlshh_laneq_s16(a, b, v, lane); }
1688template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_subtract_round_saturate_high_lane(int32_t a, int32_t b, int32x2_t v) {
return vqrdmlshs_lane_s32(a, b, v, lane); }
1689template <
int lane>[[gnu::always_inline]] nce int32_t multiply_double_subtract_round_saturate_high_lane(int32_t a, int32_t b, int32x4_t v) {
return vqrdmlshs_laneq_s32(a, b, v, lane); }
1690#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
1691[[gnu::always_inline]] nce float16_t absolute_difference(float16_t a, float16_t b) {
return vabdh_f16(a, b); }
1692[[gnu::always_inline]] nce float16_t reciprocal_estimate(float16_t a) {
return vrecpeh_f16(a); }
1693[[gnu::always_inline]] nce float16_t reciprocal_exponent(float16_t a) {
return vrecpxh_f16(a); }
1694[[gnu::always_inline]] nce float16_t reciprocal_sqrt_estimate(float16_t a) {
return vrsqrteh_f16(a); }
1695[[gnu::always_inline]] nce float16_t reciprocal_sqrt_step(float16_t a, float16_t b) {
return vrsqrtsh_f16(a, b); }
1696[[gnu::always_inline]] nce float16_t reciprocal_step(float16_t a, float16_t b) {
return vrecpsh_f16(a, b); }
1697[[gnu::always_inline]] nce float16_t max(float16_t a, float16_t b) {
return vmaxh_f16(a, b); }
1698[[gnu::always_inline]] nce float16_t min(float16_t a, float16_t b) {
return vminh_f16(a, b); }
1699[[gnu::always_inline]] nce float16_t multiply_extended(float16_t a, float16_t b) {
return vmulxh_f16(a, b); }
1700[[gnu::always_inline]] nce uint16_t equal_to_zero(float16_t a) {
return vceqzh_f16(a); }
1701[[gnu::always_inline]] nce uint16_t greater_than_or_equal_to_zero(float16_t a) {
return vcgezh_f16(a); }
1702[[gnu::always_inline]] nce uint16_t greater_than_zero(float16_t a) {
return vcgtzh_f16(a); }
1703[[gnu::always_inline]] nce uint16_t less_than_or_equal_to_zero(float16_t a) {
return vclezh_f16(a); }
1704[[gnu::always_inline]] nce uint16_t less_than_zero(float16_t a) {
return vcltzh_f16(a); }
1705[[gnu::always_inline]] nce uint16_t absolute_greater_than_or_equal(float16_t a, float16_t b) {
return vcageh_f16(a, b); }
1706[[gnu::always_inline]] nce uint16_t absolute_greater_than(float16_t a, float16_t b) {
return vcagth_f16(a, b); }
1707[[gnu::always_inline]] nce uint16_t absolute_less_than_or_equal(float16_t a, float16_t b) {
return vcaleh_f16(a, b); }
1708[[gnu::always_inline]] nce uint16_t absolute_less_than(float16_t a, float16_t b) {
return vcalth_f16(a, b); }
1709[[gnu::always_inline]] nce uint16_t equal(float16_t a, float16_t b) {
return vceqh_f16(a, b); }
1710[[gnu::always_inline]] nce uint16_t greater_than_or_equal(float16_t a, float16_t b) {
return vcgeh_f16(a, b); }
1711[[gnu::always_inline]] nce uint16_t greater_than(float16_t a, float16_t b) {
return vcgth_f16(a, b); }
1712[[gnu::always_inline]] nce uint16_t less_than_or_equal(float16_t a, float16_t b) {
return vcleh_f16(a, b); }
1713[[gnu::always_inline]] nce uint16_t less_than(float16_t a, float16_t b) {
return vclth_f16(a, b); }
1714template <> [[gnu::always_inline]] nce float16_t convert(int16_t a) {
return vcvth_f16_s16(a); }
1715template <> [[gnu::always_inline]] nce float16_t convert(int64_t a) {
return vcvth_f16_s64(a); }
1716template <> [[gnu::always_inline]] nce float16_t convert(uint16_t a) {
return vcvth_f16_u16(a); }
1717template <> [[gnu::always_inline]] nce float16_t convert(uint64_t a) {
return vcvth_f16_u64(a); }
1718template <> [[gnu::always_inline]] nce int16_t convert(float16_t a) {
return vcvth_s16_f16(a); }
1719template <> [[gnu::always_inline]] nce int64_t convert(float16_t a) {
return vcvth_s64_f16(a); }
1720template <> [[gnu::always_inline]] nce uint16_t convert(float16_t a) {
return vcvth_u16_f16(a); }
1721template <> [[gnu::always_inline]] nce uint64_t convert(float16_t a) {
return vcvth_u64_f16(a); }
1722template <> [[gnu::always_inline]] nce int16_t convert_round_to_nearest_with_ties_away_from_zero(float16_t a) {
return vcvtah_s16_f16(a); }
1723template <> [[gnu::always_inline]] nce int64_t convert_round_to_nearest_with_ties_away_from_zero(float16_t a) {
return vcvtah_s64_f16(a); }
1724template <> [[gnu::always_inline]] nce uint16_t convert_round_to_nearest_with_ties_away_from_zero(float16_t a) {
return vcvtah_u16_f16(a); }
1725template <> [[gnu::always_inline]] nce uint64_t convert_round_to_nearest_with_ties_away_from_zero(float16_t a) {
return vcvtah_u64_f16(a); }
1726template <> [[gnu::always_inline]] nce int16_t convert_round_toward_negative_infinity(float16_t a) {
return vcvtmh_s16_f16(a); }
1727template <> [[gnu::always_inline]] nce int64_t convert_round_toward_negative_infinity(float16_t a) {
return vcvtmh_s64_f16(a); }
1728template <> [[gnu::always_inline]] nce uint16_t convert_round_toward_negative_infinity(float16_t a) {
return vcvtmh_u16_f16(a); }
1729template <> [[gnu::always_inline]] nce uint64_t convert_round_toward_negative_infinity(float16_t a) {
return vcvtmh_u64_f16(a); }
1730template <> [[gnu::always_inline]] nce int16_t convert_round_to_nearest_with_ties_to_even(float16_t a) {
return vcvtnh_s16_f16(a); }
1731template <> [[gnu::always_inline]] nce int64_t convert_round_to_nearest_with_ties_to_even(float16_t a) {
return vcvtnh_s64_f16(a); }
1732template <> [[gnu::always_inline]] nce uint16_t convert_round_to_nearest_with_ties_to_even(float16_t a) {
return vcvtnh_u16_f16(a); }
1733template <> [[gnu::always_inline]] nce uint64_t convert_round_to_nearest_with_ties_to_even(float16_t a) {
return vcvtnh_u64_f16(a); }
1734template <> [[gnu::always_inline]] nce int16_t convert_round_toward_positive_infinity(float16_t a) {
return vcvtph_s16_f16(a); }
1735template <> [[gnu::always_inline]] nce int64_t convert_round_toward_positive_infinity(float16_t a) {
return vcvtph_s64_f16(a); }
1736template <> [[gnu::always_inline]] nce uint16_t convert_round_toward_positive_infinity(float16_t a) {
return vcvtph_u16_f16(a); }
1737template <> [[gnu::always_inline]] nce uint64_t convert_round_toward_positive_infinity(float16_t a) {
return vcvtph_u64_f16(a); }
1738template <
int n>[[gnu::always_inline]] nce float16_t convert(int16_t a) {
return vcvth_n_f16_s16(a, n); }
1739template <
int n>[[gnu::always_inline]] nce float16_t convert(int64_t a) {
return vcvth_n_f16_s64(a, n); }
1740template <
int n>[[gnu::always_inline]] nce float16_t convert(uint16_t a) {
return vcvth_n_f16_u16(a, n); }
1741template <
int n>[[gnu::always_inline]] nce float16_t convert(uint64_t a) {
return vcvth_n_f16_u64(a, n); }
1742template <
int n>[[gnu::always_inline]] nce int16_t convert(float16_t a) {
return vcvth_n_s16_f16(a, n); }
1743template <
int n>[[gnu::always_inline]] nce int64_t convert(float16_t a) {
return vcvth_n_s64_f16(a, n); }
1744template <
int n>[[gnu::always_inline]] nce uint16_t convert(float16_t a) {
return vcvth_n_u16_f16(a, n); }
1745template <
int n>[[gnu::always_inline]] nce uint64_t convert(float16_t a) {
return vcvth_n_u64_f16(a, n); }
1746template <
int lane>[[gnu::always_inline]] nce float16_t multiply_lane(float16_t a, float16x4_t v) {
return vmulh_lane_f16(a, v, lane); }
1747template <
int lane>[[gnu::always_inline]] nce float16_t multiply_lane(float16_t a, float16x8_t v) {
return vmulh_laneq_f16(a, v, lane); }
1748template <
int lane>[[gnu::always_inline]] nce float16_t multiply_extended_lane(float16_t a, float16x4_t v) {
return vmulxh_lane_f16(a, v, lane); }
1749template <
int lane>[[gnu::always_inline]] nce float16_t multiply_extended_lane(float16_t a, float16x8_t v) {
return vmulxh_laneq_f16(a, v, lane); }
1750template <
int lane>[[gnu::always_inline]] nce float16_t multiply_add_fused_lane(float16_t a, float16_t b, float16x4_t v) {
return vfmah_lane_f16(a, b, v, lane); }
1751template <
int lane>[[gnu::always_inline]] nce float16_t multiply_add_fused_lane(float16_t a, float16_t b, float16x8_t v) {
return vfmah_laneq_f16(a, b, v, lane); }
1752template <
int lane>[[gnu::always_inline]] nce float16_t multiply_subtract_fused_lane(float16_t a, float16_t b, float16x4_t v) {
return vfmsh_lane_f16(a, b, v, lane); }
1753template <
int lane>[[gnu::always_inline]] nce float16_t multiply_subtract_fused_lane(float16_t a, float16_t b, float16x8_t v) {
return vfmsh_laneq_f16(a, b, v, lane); }
1756template <
int lane1,
int lane2>[[gnu::always_inline]] nce bfloat16x4_t copy_lane(bfloat16x4_t a, bfloat16x4_t b) {
return vcopy_lane_bf16(a, lane1, b, lane2); }
1757template <
int lane1,
int lane2>[[gnu::always_inline]] nce bfloat16x8_t copy_lane(bfloat16x8_t a, bfloat16x4_t b) {
return vcopyq_lane_bf16(a, lane1, b, lane2); }
1758template <
int lane1,
int lane2>[[gnu::always_inline]] nce bfloat16x4_t copy_lane(bfloat16x4_t a, bfloat16x8_t b) {
return vcopy_laneq_bf16(a, lane1, b, lane2); }
1759template <
int lane1,
int lane2>[[gnu::always_inline]] nce bfloat16x8_t copy_lane(bfloat16x8_t a, bfloat16x8_t b) {
return vcopyq_laneq_bf16(a, lane1, b, lane2); }
1760template <> [[gnu::always_inline]] nce float64x1_t reinterpret(bfloat16x4_t a) {
return vreinterpret_f64_bf16(a); }
1761template <> [[gnu::always_inline]] nce float64x2_t reinterpret(bfloat16x8_t a) {
return vreinterpretq_f64_bf16(a); }