Argon 0.1.0
Loading...
Searching...
No Matches
int.hpp
1#pragma once
2#include <arm_mve.h>
3#include "neon_int.hpp"
4#ifdef __cplusplus
5#ifdef __clang__
6#define nce constexpr
7#else
8#define nce inline
9#endif
10
11namespace mve {
12// clang-format off
13template <typename T> nce T create(uint64_t a, uint64_t b);
14template <typename T> nce T decrement_duplicate(uint32_t a);
15template <typename T> nce T decrement_duplicate(uint32_t *a);
16template <typename T> nce T decrement_duplicate(uint32_t a, mve_pred16_t p);
17template <typename T> nce T decrement_duplicate(uint32_t *a, mve_pred16_t p);
18template <typename T> nce T decrement_wrap_duplicate(uint32_t a, uint32_t b);
19template <typename T> nce T decrement_wrap_duplicate(uint32_t *a, uint32_t b);
20template <typename T> nce T decrement_wrap_duplicate(uint32_t a, uint32_t b, mve_pred16_t p);
21template <typename T> nce T decrement_wrap_duplicate(uint32_t *a, uint32_t b, mve_pred16_t p);
22template <typename T> nce T increment_duplicate(uint32_t a);
23template <typename T> nce T increment_duplicate(uint32_t *a);
24template <typename T> nce T increment_duplicate(uint32_t a, mve_pred16_t p);
25template <typename T> nce T increment_duplicate(uint32_t *a, mve_pred16_t p);
26template <typename T> nce T increment_wrap_duplicate(uint32_t a, uint32_t b);
27template <typename T> nce T increment_wrap_duplicate(uint32_t *a, uint32_t b);
28template <typename T> nce T increment_wrap_duplicate(uint32_t a, uint32_t b, mve_pred16_t p);
29template <typename T> nce T increment_wrap_duplicate(uint32_t *a, uint32_t b, mve_pred16_t p);
30template <typename T> nce T uninitialized();
31template <typename T> nce T load_byte(int8_t const *base);
32template <typename T> nce T load_byte(uint8_t const *base);
33template <typename T> nce T load_byte(int8_t const *base, mve_pred16_t p);
34template <typename T> nce T load_byte(uint8_t const *base, mve_pred16_t p);
35template <typename T> nce T load_halfword(int16_t const *base);
36template <typename T> nce T load_halfword(uint16_t const *base);
37template <typename T> nce T load_halfword(int16_t const *base, mve_pred16_t p);
38template <typename T> nce T load_halfword(uint16_t const *base, mve_pred16_t p);
39template <typename T> nce T load_word_gather_base(uint32x4_t addr);
40template <typename T> nce T load_word_gather_base(uint32x4_t addr, mve_pred16_t p);
41template <typename T> nce T load_word_gather_base(uint32x4_t *addr);
42template <typename T> nce T load_word_gather_base(uint32x4_t *addr, mve_pred16_t p);
43template <typename T> nce T load_doubleword_gather_base(uint64x2_t addr);
44template <typename T> nce T load_doubleword_gather_base(uint64x2_t addr, mve_pred16_t p);
45template <typename T> nce T load_doubleword_gather_base(uint64x2_t *addr);
46template <typename T> nce T load_doubleword_gather_base(uint64x2_t *addr, mve_pred16_t p);
47[[gnu::always_inline]] nce uint8x16_t reverse_16bit(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) { return vrev16q_m_u8(inactive, a, p); }
48[[gnu::always_inline]] nce uint8x16_t reverse_32bit(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) { return vrev32q_m_u8(inactive, a, p); }
49[[gnu::always_inline]] nce uint8x16_t reverse_64bit(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) { return vrev64q_m_u8(inactive, a, p); }
50[[gnu::always_inline]] nce mve_pred16_t equal(uint8x16_t a, uint8x16_t b) { return vcmpeqq_u8(a, b); }
51[[gnu::always_inline]] nce mve_pred16_t equal(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vcmpeqq_m_u8(a, b, p); }
52[[gnu::always_inline]] nce mve_pred16_t not_equal(uint8x16_t a, uint8x16_t b) { return vcmpneq_u8(a, b); }
53[[gnu::always_inline]] nce mve_pred16_t not_equal(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vcmpneq_m_u8(a, b, p); }
54[[gnu::always_inline]] nce mve_pred16_t higher_or_same(uint8x16_t a, uint8x16_t b) { return vcmpcsq_u8(a, b); }
55[[gnu::always_inline]] nce mve_pred16_t higher_or_same(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vcmpcsq_m_u8(a, b, p); }
56[[gnu::always_inline]] nce mve_pred16_t higher(uint8x16_t a, uint8x16_t b) { return vcmphiq_u8(a, b); }
57[[gnu::always_inline]] nce uint8x16_t min(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vminq_m_u8(inactive, a, b, p); }
58[[gnu::always_inline]] nce uint8x16_t max(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmaxq_m_u8(inactive, a, b, p); }
59[[gnu::always_inline]] nce uint8x16_t subtract_absolute(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vabdq_m_u8(inactive, a, b, p); }
60[[gnu::always_inline]] nce uint8x16_t add(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vaddq_m_u8(inactive, a, b, p); }
61[[gnu::always_inline]] nce mve_pred16_t higher(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vcmphiq_m_u8(a, b, p); }
62[[gnu::always_inline]] nce uint8x16_t min(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vminq_x_u8(a, b, p); }
63[[gnu::always_inline]] nce uint8x16_t max(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmaxq_x_u8(a, b, p); }
64[[gnu::always_inline]] nce uint8x16_t subtract_absolute(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vabdq_x_u8(a, b, p); }
65[[gnu::always_inline]] nce uint8x16_t add(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vaddq_x_u8(a, b, p); }
66[[gnu::always_inline]] nce uint8x16_t add(uint8x16_t inactive, uint8x16_t a, uint8_t b, mve_pred16_t p) { return vaddq_m_n_u8(inactive, a, b, p); }
67[[gnu::always_inline]] nce uint8x16_t minimum_absolute(uint8x16_t a, int8x16_t b) { return vminaq_s8(a, b); }
68[[gnu::always_inline]] nce uint8x16_t minimum_absolute(uint8x16_t a, int8x16_t b, mve_pred16_t p) { return vminaq_m_s8(a, b, p); }
69[[gnu::always_inline]] nce uint8x16_t maximum_absolute(uint8x16_t a, int8x16_t b) { return vmaxaq_s8(a, b); }
70[[gnu::always_inline]] nce uint8x16_t maximum_absolute(uint8x16_t a, int8x16_t b, mve_pred16_t p) { return vmaxaq_m_s8(a, b, p); }
71[[gnu::always_inline]] nce uint8x16_t reverse_16bit(uint8x16_t a, mve_pred16_t p) { return vrev16q_x_u8(a, p); }
72[[gnu::always_inline]] nce uint8x16_t reverse_32bit(uint8x16_t a, mve_pred16_t p) { return vrev32q_x_u8(a, p); }
73[[gnu::always_inline]] nce uint8x16_t reverse_64bit(uint8x16_t a, mve_pred16_t p) { return vrev64q_x_u8(a, p); }
74template <int imm>[[gnu::always_inline]] nce uint8x16_t decrement_duplicate(uint8x16_t inactive, uint32_t a, mve_pred16_t p) { return vddupq_m_n_u8(inactive, a, imm, p); }
75template <int imm>[[gnu::always_inline]] nce uint8x16_t decrement_duplicate(uint8x16_t inactive, uint32_t *a, mve_pred16_t p) { return vddupq_m_wb_u8(inactive, a, imm, p); }
76template <int imm>[[gnu::always_inline]] nce uint8x16_t decrement_wrap_duplicate(uint8x16_t inactive, uint32_t a, uint32_t b, mve_pred16_t p) { return vdwdupq_m_n_u8(inactive, a, b, imm, p); }
77template <int imm>[[gnu::always_inline]] nce uint8x16_t decrement_wrap_duplicate(uint8x16_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p) { return vdwdupq_m_wb_u8(inactive, a, b, imm, p); }
78template <int imm>[[gnu::always_inline]] nce uint8x16_t increment_duplicate(uint8x16_t inactive, uint32_t a, mve_pred16_t p) { return vidupq_m_n_u8(inactive, a, imm, p); }
79template <int imm>[[gnu::always_inline]] nce uint8x16_t increment_duplicate(uint8x16_t inactive, uint32_t *a, mve_pred16_t p) { return vidupq_m_wb_u8(inactive, a, imm, p); }
80template <int imm>[[gnu::always_inline]] nce uint8x16_t increment_wrap_duplicate(uint8x16_t inactive, uint32_t a, uint32_t b, mve_pred16_t p) { return viwdupq_m_n_u8(inactive, a, b, imm, p); }
81template <int imm>[[gnu::always_inline]] nce uint8x16_t increment_wrap_duplicate(uint8x16_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p) { return viwdupq_m_wb_u8(inactive, a, b, imm, p); }
82[[gnu::always_inline]] nce uint8x16_t duplicate(uint8x16_t inactive, uint8_t a, mve_pred16_t p) { return vdupq_m_n_u8(inactive, a, p); }
83[[gnu::always_inline]] nce uint8x16_t uninitialized(uint8x16_t t) { return vuninitializedq(t); }
84[[gnu::always_inline]] nce mve_pred16_t equal(uint8x16_t a, uint8_t b) { return vcmpeqq_n_u8(a, b); }
85[[gnu::always_inline]] nce mve_pred16_t equal(uint8x16_t a, uint8_t b, mve_pred16_t p) { return vcmpeqq_m_n_u8(a, b, p); }
86[[gnu::always_inline]] nce mve_pred16_t not_equal(uint8x16_t a, uint8_t b) { return vcmpneq_n_u8(a, b); }
87[[gnu::always_inline]] nce mve_pred16_t not_equal(uint8x16_t a, uint8_t b, mve_pred16_t p) { return vcmpneq_m_n_u8(a, b, p); }
88[[gnu::always_inline]] nce mve_pred16_t higher_or_same(uint8x16_t a, uint8_t b) { return vcmpcsq_n_u8(a, b); }
89[[gnu::always_inline]] nce mve_pred16_t higher_or_same(uint8x16_t a, uint8_t b, mve_pred16_t p) { return vcmpcsq_m_n_u8(a, b, p); }
90[[gnu::always_inline]] nce mve_pred16_t higher(uint8x16_t a, uint8_t b) { return vcmphiq_n_u8(a, b); }
91[[gnu::always_inline]] nce mve_pred16_t higher(uint8x16_t a, uint8_t b, mve_pred16_t p) { return vcmphiq_m_n_u8(a, b, p); }
92[[gnu::always_inline]] nce uint8x16_t add(uint8x16_t a, uint8_t b) { return vaddq_n_u8(a, b); }
93[[gnu::always_inline]] nce uint8x16_t add(uint8x16_t a, uint8_t b, mve_pred16_t p) { return vaddq_x_n_u8(a, b, p); }
94[[gnu::always_inline]] nce uint32_t reduce_add(uint8x16_t a) { return vaddvq_u8(a); }
95[[gnu::always_inline]] nce uint8x16_t add_halve(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vhaddq_m_u8(inactive, a, b, p); }
96[[gnu::always_inline]] nce uint8x16_t add_halve_round(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vrhaddq_m_u8(inactive, a, b, p); }
97[[gnu::always_inline]] nce uint8x16_t add_saturate(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vqaddq_m_u8(inactive, a, b, p); }
98[[gnu::always_inline]] nce uint8x16_t add_halve(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vhaddq_x_u8(a, b, p); }
99[[gnu::always_inline]] nce uint8x16_t add_halve_round(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vrhaddq_x_u8(a, b, p); }
100[[gnu::always_inline]] nce uint8x16_t add_halve(uint8x16_t inactive, uint8x16_t a, uint8_t b, mve_pred16_t p) { return vhaddq_m_n_u8(inactive, a, b, p); }
101[[gnu::always_inline]] nce uint8x16_t add_saturate(uint8x16_t inactive, uint8x16_t a, uint8_t b, mve_pred16_t p) { return vqaddq_m_n_u8(inactive, a, b, p); }
102[[gnu::always_inline]] nce uint8x16_t multiply_high(uint8x16_t a, uint8x16_t b) { return vmulhq_u8(a, b); }
103[[gnu::always_inline]] nce uint8x16_t multiply_high(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmulhq_m_u8(inactive, a, b, p); }
104[[gnu::always_inline]] nce uint8x16_t multiply_high(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmulhq_x_u8(a, b, p); }
105[[gnu::always_inline]] nce uint16x8_t multiply_long_bottom_poly(uint8x16_t a, uint8x16_t b) { return vmullbq_poly_p8(a, b); }
106[[gnu::always_inline]] nce uint16x8_t multiply_long_bottom(uint8x16_t a, uint8x16_t b) { return vmullbq_int_u8(a, b); }
107[[gnu::always_inline]] nce uint16x8_t multiply_long_bottom_poly(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmullbq_poly_x_p8(a, b, p); }
108[[gnu::always_inline]] nce uint16x8_t multiply_long_bottom(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmullbq_int_x_u8(a, b, p); }
109[[gnu::always_inline]] nce uint16x8_t multply_long_top_poly(uint8x16_t a, uint8x16_t b) { return vmulltq_poly_p8(a, b); }
110[[gnu::always_inline]] nce uint16x8_t multiply_long_top(uint8x16_t a, uint8x16_t b) { return vmulltq_int_u8(a, b); }
111[[gnu::always_inline]] nce uint8x16_t multiply(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmulq_m_u8(inactive, a, b, p); }
112[[gnu::always_inline]] nce uint8x16_t multiply_round_high(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vrmulhq_m_u8(inactive, a, b, p); }
113[[gnu::always_inline]] nce uint16x8_t multply_long_top_poly(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmulltq_poly_x_p8(a, b, p); }
114[[gnu::always_inline]] nce uint16x8_t multiply_long_top(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmulltq_int_x_u8(a, b, p); }
115[[gnu::always_inline]] nce uint8x16_t multiply(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmulq_x_u8(a, b, p); }
116[[gnu::always_inline]] nce uint8x16_t multiply_round_high(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vrmulhq_x_u8(a, b, p); }
117[[gnu::always_inline]] nce uint8x16_t multiply(uint8x16_t inactive, uint8x16_t a, uint8_t b, mve_pred16_t p) { return vmulq_m_n_u8(inactive, a, b, p); }
118[[gnu::always_inline]] nce uint8x16_t multiply_round_high(uint8x16_t a, uint8x16_t b) { return vrmulhq_u8(a, b); }
119[[gnu::always_inline]] nce uint32_t multiply_add_dual_reduce_add(uint8x16_t m1, uint8x16_t m2) { return vmladavq_u8(m1, m2); }
120[[gnu::always_inline]] nce uint8x16_t subtract(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vsubq_m_u8(inactive, a, b, p); }
121[[gnu::always_inline]] nce uint8x16_t subtract_halve(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vhsubq_m_u8(inactive, a, b, p); }
122[[gnu::always_inline]] nce uint8x16_t subtract_saturate(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vqsubq_m_u8(inactive, a, b, p); }
123[[gnu::always_inline]] nce uint8x16_t bitwise_clear(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vbicq_m_u8(inactive, a, b, p); }
124[[gnu::always_inline]] nce uint8x16_t bitwise_and(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vandq_m_u8(inactive, a, b, p); }
125[[gnu::always_inline]] nce uint8x16_t bitwise_xor(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return veorq_m_u8(inactive, a, b, p); }
126[[gnu::always_inline]] nce uint8x16_t bitwise_or_not(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vornq_m_u8(inactive, a, b, p); }
127[[gnu::always_inline]] nce uint8x16_t bitwise_or(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vorrq_m_u8(inactive, a, b, p); }
128[[gnu::always_inline]] nce uint8x16_t subtract(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vsubq_x_u8(a, b, p); }
129[[gnu::always_inline]] nce uint8x16_t subtract_halve(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vhsubq_x_u8(a, b, p); }
130[[gnu::always_inline]] nce uint8x16_t count_leading_zero_bits(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) { return vclzq_m_u8(inactive, a, p); }
131[[gnu::always_inline]] nce uint8x16_t bitwise_clear(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vbicq_x_u8(a, b, p); }
132[[gnu::always_inline]] nce uint8x16_t bitwise_and(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vandq_x_u8(a, b, p); }
133[[gnu::always_inline]] nce uint8x16_t bitwise_xor(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return veorq_x_u8(a, b, p); }
134[[gnu::always_inline]] nce uint8x16_t bitwise_not(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) { return vmvnq_m_u8(inactive, a, p); }
135[[gnu::always_inline]] nce uint8x16_t bitwise_or_not(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vornq_x_u8(a, b, p); }
136[[gnu::always_inline]] nce uint8x16_t bitwise_or(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vorrq_x_u8(a, b, p); }
137[[gnu::always_inline]] nce uint8x16_t subtract(uint8x16_t inactive, uint8x16_t a, uint8_t b, mve_pred16_t p) { return vsubq_m_n_u8(inactive, a, b, p); }
138[[gnu::always_inline]] nce uint8x16_t subtract_halve(uint8x16_t inactive, uint8x16_t a, uint8_t b, mve_pred16_t p) { return vhsubq_m_n_u8(inactive, a, b, p); }
139[[gnu::always_inline]] nce uint8x16_t subtract_saturate(uint8x16_t inactive, uint8x16_t a, uint8_t b, mve_pred16_t p) { return vqsubq_m_n_u8(inactive, a, b, p); }
140[[gnu::always_inline]] nce uint8x16_t complex_add_rotate_90(uint8x16_t a, uint8x16_t b) { return vcaddq_rot90_u8(a, b); }
141[[gnu::always_inline]] nce uint8x16_t complex_add_rotate_270(uint8x16_t a, uint8x16_t b) { return vcaddq_rot270_u8(a, b); }
142[[gnu::always_inline]] nce uint8x16_t complex_add_rotate_90(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vcaddq_rot90_m_u8(inactive, a, b, p); }
143[[gnu::always_inline]] nce uint8x16_t complex_add_rotate_270(uint8x16_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vcaddq_rot270_m_u8(inactive, a, b, p); }
144[[gnu::always_inline]] nce uint8x16_t shift_left_round_saturate(uint8x16_t inactive, uint8x16_t a, int8x16_t b, mve_pred16_t p) { return vqrshlq_m_u8(inactive, a, b, p); }
145[[gnu::always_inline]] nce uint8x16_t shift_left_saturate(uint8x16_t inactive, uint8x16_t a, int8x16_t b, mve_pred16_t p) { return vqshlq_m_u8(inactive, a, b, p); }
146[[gnu::always_inline]] nce uint8x16_t shift_left_round(uint8x16_t inactive, uint8x16_t a, int8x16_t b, mve_pred16_t p) { return vrshlq_m_u8(inactive, a, b, p); }
147[[gnu::always_inline]] nce uint32_t multiply_add_dual_reduce_add(uint8x16_t m1, uint8x16_t m2, mve_pred16_t p) { return vmladavq_p_u8(m1, m2, p); }
148[[gnu::always_inline]] nce uint8x16_t complex_add_rotate_90(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vcaddq_rot90_x_u8(a, b, p); }
149[[gnu::always_inline]] nce uint8x16_t complex_add_rotate_270(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vcaddq_rot270_x_u8(a, b, p); }
150template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_round(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) { return vrshrq_m_n_u8(inactive, a, imm, p); }
151template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) { return vshrq_m_n_u8(inactive, a, imm, p); }
152template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_insert(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vsriq_m_n_u8(a, b, imm, p); }
153template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_left_saturate(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) { return vqshlq_m_n_u8(inactive, a, imm, p); }
154[[gnu::always_inline]] nce uint8x16_t multiply_add(uint8x16_t add, uint8x16_t m1, uint8_t m2) { return vmlaq_n_u8(add, m1, m2); }
155[[gnu::always_inline]] nce uint8x16_t multiply_add(uint8x16_t add, uint8x16_t m1, uint8_t m2, mve_pred16_t p) { return vmlaq_m_n_u8(add, m1, m2, p); }
156[[gnu::always_inline]] nce uint8x16_t multiply_add_scalar(uint8x16_t m1, uint8x16_t m2, uint8_t add) { return vmlasq_n_u8(m1, m2, add); }
157[[gnu::always_inline]] nce uint8x16_t multiply_add_scalar(uint8x16_t m1, uint8x16_t m2, uint8_t add, mve_pred16_t p) { return vmlasq_m_n_u8(m1, m2, add, p); }
158[[gnu::always_inline]] nce uint8x16_t shift_left_round(uint8x16_t a, int8x16_t b, mve_pred16_t p) { return vrshlq_x_u8(a, b, p); }
159[[gnu::always_inline]] nce uint8x16_t shift_left_round(uint8x16_t a, int32_t b) { return vrshlq_n_u8(a, b); }
160[[gnu::always_inline]] nce uint8x16_t shift_left_round(uint8x16_t a, int32_t b, mve_pred16_t p) { return vrshlq_m_n_u8(a, b, p); }
161template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_left_carry(uint8x16_t a, uint32_t *b) { return vshlcq_u8(a, b, imm); }
162template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_left_carry(uint8x16_t a, uint32_t *b, mve_pred16_t p) { return vshlcq_m_u8(a, b, imm, p); }
163template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left_long_bottom(uint8x16_t a) { return vshllbq_n_u8(a, imm); }
164template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left_long_bottom(uint8x16_t a, mve_pred16_t p) { return vshllbq_x_n_u8(a, imm, p); }
165template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left_long_top(uint8x16_t a) { return vshlltq_n_u8(a, imm); }
166[[gnu::always_inline]] nce uint8x16_t shift_left(uint8x16_t inactive, uint8x16_t a, int8x16_t b, mve_pred16_t p) { return vshlq_m_u8(inactive, a, b, p); }
167template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_left(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) { return vshlq_m_n_u8(inactive, a, imm, p); }
168[[gnu::always_inline]] nce uint8x16_t shift_left(uint8x16_t a, int8x16_t b, mve_pred16_t p) { return vshlq_x_u8(a, b, p); }
169template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left_long_top(uint8x16_t a, mve_pred16_t p) { return vshlltq_x_n_u8(a, imm, p); }
170template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_left(uint8x16_t a) { return vshlq_n_u8(a, imm); }
171template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_left_insert(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vsliq_m_n_u8(a, b, imm, p); }
172[[gnu::always_inline]] nce uint16x8_t move_long_bottom(uint8x16_t a) { return vmovlbq_u8(a); }
173[[gnu::always_inline]] nce uint16x8_t move_long_bottom(uint8x16_t a, mve_pred16_t p) { return vmovlbq_x_u8(a, p); }
174[[gnu::always_inline]] nce uint16x8_t move_long_top(uint8x16_t a) { return vmovltq_u8(a); }
175[[gnu::always_inline]] nce uint8x16_t predicate_select(uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vpselq_u8(a, b, p); }
176[[gnu::always_inline]] nce uint8x16_t bit_reverse_shift_right(uint8x16_t inactive, uint8x16_t a, int32_t b, mve_pred16_t p) { return vbrsrq_m_n_u8(inactive, a, b, p); }
177template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_left_unsigned_saturate(uint8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vqshluq_m_n_s8(inactive, a, imm, p); }
178template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_round_saturate_bottom(uint8x16_t a, uint16x8_t b) { return vqrshrnbq_n_u16(a, b, imm); }
179template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_round_saturate_bottom(uint8x16_t a, uint16x8_t b, mve_pred16_t p) { return vqrshrnbq_m_n_u16(a, b, imm, p); }
180template <int imm>[[gnu::always_inline]] nce uint8x16_t shight_right_narrow_round_saturate_top(uint8x16_t a, uint16x8_t b) { return vqrshrntq_n_u16(a, b, imm); }
181template <int imm>[[gnu::always_inline]] nce uint8x16_t shight_right_narrow_round_saturate_top(uint8x16_t a, uint16x8_t b, mve_pred16_t p) { return vqrshrntq_m_n_u16(a, b, imm, p); }
182template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_saturate_bottom(uint8x16_t a, uint16x8_t b) { return vqshrnbq_n_u16(a, b, imm); }
183template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_saturate_bottom(uint8x16_t a, uint16x8_t b, mve_pred16_t p) { return vqshrnbq_m_n_u16(a, b, imm, p); }
184template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_saturate_top(uint8x16_t a, uint16x8_t b) { return vqshrntq_n_u16(a, b, imm); }
185template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_saturate_top(uint8x16_t a, uint16x8_t b, mve_pred16_t p) { return vqshrntq_m_n_u16(a, b, imm, p); }
186template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_round_bottom(uint8x16_t a, uint16x8_t b) { return vrshrnbq_n_u16(a, b, imm); }
187template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_round_bottom(uint8x16_t a, uint16x8_t b, mve_pred16_t p) { return vrshrnbq_m_n_u16(a, b, imm, p); }
188template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_round_top(uint8x16_t a, uint16x8_t b) { return vrshrntq_n_u16(a, b, imm); }
189template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_round_top(uint8x16_t a, uint16x8_t b, mve_pred16_t p) { return vrshrntq_m_n_u16(a, b, imm, p); }
190template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_bottom(uint8x16_t a, uint16x8_t b) { return vshrnbq_n_u16(a, b, imm); }
191template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_bottom(uint8x16_t a, uint16x8_t b, mve_pred16_t p) { return vshrnbq_m_n_u16(a, b, imm, p); }
192template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_top_signed(uint8x16_t a, uint16x8_t b) { return vshrntq_n_u16(a, b, imm); }
193template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_top_signed(uint8x16_t a, uint16x8_t b, mve_pred16_t p) { return vshrntq_m_n_u16(a, b, imm, p); }
194[[gnu::always_inline]] nce uint8x16_t move_narrow_bottom(uint8x16_t a, uint16x8_t b) { return vmovnbq_u16(a, b); }
195[[gnu::always_inline]] nce uint8x16_t move_narrow_bottom(uint8x16_t a, uint16x8_t b, mve_pred16_t p) { return vmovnbq_m_u16(a, b, p); }
196[[gnu::always_inline]] nce uint8x16_t move_narrow_top(uint8x16_t a, uint16x8_t b) { return vmovntq_u16(a, b); }
197[[gnu::always_inline]] nce uint8x16_t move_narrow_top(uint8x16_t a, uint16x8_t b, mve_pred16_t p) { return vmovntq_m_u16(a, b, p); }
198[[gnu::always_inline]] nce uint8x16_t move_narrow_saturate_bottom(uint8x16_t a, uint16x8_t b) { return vqmovnbq_u16(a, b); }
199[[gnu::always_inline]] nce uint8x16_t move_narrow_saturate_bottom(uint8x16_t a, uint16x8_t b, mve_pred16_t p) { return vqmovnbq_m_u16(a, b, p); }
200[[gnu::always_inline]] nce uint8x16_t move_narrow_saturate_top(uint8x16_t a, uint16x8_t b) { return vqmovntq_u16(a, b); }
201[[gnu::always_inline]] nce uint8x16_t move_narrow_saturate_top(uint8x16_t a, uint16x8_t b, mve_pred16_t p) { return vqmovntq_m_u16(a, b, p); }
202template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_round_saturate_bottom(uint8x16_t a, int16x8_t b) { return vqrshrunbq_n_s16(a, b, imm); }
203template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_round_saturate_bottom(uint8x16_t a, int16x8_t b, mve_pred16_t p) { return vqrshrunbq_m_n_s16(a, b, imm, p); }
204template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_round_saturate_top(uint8x16_t a, int16x8_t b) { return vqrshruntq_n_s16(a, b, imm); }
205template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_round_saturate_top(uint8x16_t a, int16x8_t b, mve_pred16_t p) { return vqrshruntq_m_n_s16(a, b, imm, p); }
206template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_saturate_bottom(uint8x16_t a, int16x8_t b) { return vqshrunbq_n_s16(a, b, imm); }
207template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_saturate_bottom(uint8x16_t a, int16x8_t b, mve_pred16_t p) { return vqshrunbq_m_n_s16(a, b, imm, p); }
208template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_saturate_top(uint8x16_t a, int16x8_t b) { return vqshruntq_n_s16(a, b, imm); }
209template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_narrow_saturate_top(uint8x16_t a, int16x8_t b, mve_pred16_t p) { return vqshruntq_m_n_s16(a, b, imm, p); }
210[[gnu::always_inline]] nce uint8x16_t move_narrow_saturate_bottom(uint8x16_t a, int16x8_t b) { return vqmovunbq_s16(a, b); }
211[[gnu::always_inline]] nce uint8x16_t move_narrow_saturate_bottom(uint8x16_t a, int16x8_t b, mve_pred16_t p) { return vqmovunbq_m_s16(a, b, p); }
212[[gnu::always_inline]] nce uint8x16_t move_narrow_saturate_top(uint8x16_t a, int16x8_t b) { return vqmovuntq_s16(a, b); }
213[[gnu::always_inline]] nce uint8x16_t move_narrow_saturate_top(uint8x16_t a, int16x8_t b, mve_pred16_t p) { return vqmovuntq_m_s16(a, b, p); }
214[[gnu::always_inline]] nce uint32_t reduce_add(uint8x16_t a, mve_pred16_t p) { return vaddvq_p_u8(a, p); }
215[[gnu::always_inline]] nce uint8x16_t count_leading_zero_bits(uint8x16_t a, mve_pred16_t p) { return vclzq_x_u8(a, p); }
216[[gnu::always_inline]] nce uint8x16_t bitwise_not(uint8x16_t a, mve_pred16_t p) { return vmvnq_x_u8(a, p); }
217template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right_round(uint8x16_t a, mve_pred16_t p) { return vrshrq_x_n_u8(a, imm, p); }
218template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_right(uint8x16_t a, mve_pred16_t p) { return vshrq_x_n_u8(a, imm, p); }
219template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_left(uint8x16_t a, mve_pred16_t p) { return vshlq_x_n_u8(a, imm, p); }
220[[gnu::always_inline]] nce uint16x8_t move_long_top(uint8x16_t a, mve_pred16_t p) { return vmovltq_x_u8(a, p); }
221[[gnu::always_inline]] nce uint8x16_t add_halve(uint8x16_t a, uint8_t b) { return vhaddq_n_u8(a, b); }
222[[gnu::always_inline]] nce uint8x16_t add_halve(uint8x16_t a, uint8_t b, mve_pred16_t p) { return vhaddq_x_n_u8(a, b, p); }
223[[gnu::always_inline]] nce uint8x16_t add_saturate(uint8x16_t a, uint8_t b) { return vqaddq_n_u8(a, b); }
224[[gnu::always_inline]] nce uint8x16_t multiply(uint8x16_t a, uint8_t b, mve_pred16_t p) { return vmulq_x_n_u8(a, b, p); }
225[[gnu::always_inline]] nce uint8x16_t subtract(uint8x16_t a, uint8_t b) { return vsubq_n_u8(a, b); }
226[[gnu::always_inline]] nce uint8x16_t subtract(uint8x16_t a, uint8_t b, mve_pred16_t p) { return vsubq_x_n_u8(a, b, p); }
227[[gnu::always_inline]] nce uint8x16_t subtract_halve(uint8x16_t a, uint8_t b) { return vhsubq_n_u8(a, b); }
228[[gnu::always_inline]] nce uint8x16_t subtract_halve(uint8x16_t a, uint8_t b, mve_pred16_t p) { return vhsubq_x_n_u8(a, b, p); }
229[[gnu::always_inline]] nce uint8x16_t subtract_saturate(uint8x16_t a, uint8_t b) { return vqsubq_n_u8(a, b); }
230[[gnu::always_inline]] nce uint8x16_t bit_reverse_shift_right(uint8x16_t a, int32_t b) { return vbrsrq_n_u8(a, b); }
231[[gnu::always_inline]] nce uint8x16_t bit_reverse_shift_right(uint8x16_t a, int32_t b, mve_pred16_t p) { return vbrsrq_x_n_u8(a, b, p); }
232[[gnu::always_inline]] nce uint8x16_t shift_left_round_saturate(uint8x16_t a, int32_t b) { return vqrshlq_n_u8(a, b); }
233[[gnu::always_inline]] nce uint8x16_t shift_left_round_saturate(uint8x16_t a, int32_t b, mve_pred16_t p) { return vqrshlq_m_n_u8(a, b, p); }
234[[gnu::always_inline]] nce uint8x16_t shift_left_saturate(uint8x16_t a, int32_t b) { return vqshlq_r_u8(a, b); }
235[[gnu::always_inline]] nce uint8x16_t shift_left_saturate(uint8x16_t a, int32_t b, mve_pred16_t p) { return vqshlq_m_r_u8(a, b, p); }
236[[gnu::always_inline]] nce uint8x16_t shift_left(uint8x16_t a, int32_t b) { return vshlq_r_u8(a, b); }
237[[gnu::always_inline]] nce uint8x16_t shift_left(uint8x16_t a, int32_t b, mve_pred16_t p) { return vshlq_m_r_u8(a, b, p); }
238[[gnu::always_inline]] nce int8x16_t reverse_16bit(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vrev16q_m_s8(inactive, a, p); }
239[[gnu::always_inline]] nce int8x16_t reverse_32bit(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vrev32q_m_s8(inactive, a, p); }
240[[gnu::always_inline]] nce int8x16_t reverse_64bit(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vrev64q_m_s8(inactive, a, p); }
241[[gnu::always_inline]] nce mve_pred16_t equal(int8x16_t a, int8x16_t b) { return vcmpeqq_s8(a, b); }
242[[gnu::always_inline]] nce mve_pred16_t not_equal(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vcmpneq_m_s8(a, b, p); }
243[[gnu::always_inline]] nce mve_pred16_t greater_than_or_equal(int8x16_t a, int8x16_t b) { return vcmpgeq_s8(a, b); }
244[[gnu::always_inline]] nce mve_pred16_t greater_than_or_equal(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vcmpgeq_m_s8(a, b, p); }
245[[gnu::always_inline]] nce mve_pred16_t greater_than(int8x16_t a, int8x16_t b) { return vcmpgtq_s8(a, b); }
246[[gnu::always_inline]] nce mve_pred16_t greater_than(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vcmpgtq_m_s8(a, b, p); }
247[[gnu::always_inline]] nce int8x16_t reverse_64bit(int8x16_t a, mve_pred16_t p) { return vrev64q_x_s8(a, p); }
248[[gnu::always_inline]] nce int8x16_t uninitialized(int8x16_t t) { return vuninitializedq(t); }
249[[gnu::always_inline]] nce mve_pred16_t equal(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vcmpeqq_m_s8(a, b, p); }
250[[gnu::always_inline]] nce mve_pred16_t not_equal(int8x16_t a, int8x16_t b) { return vcmpneq_s8(a, b); }
251[[gnu::always_inline]] nce mve_pred16_t less_than_or_equal(int8x16_t a, int8x16_t b) { return vcmpleq_s8(a, b); }
252[[gnu::always_inline]] nce int8x16_t min(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vminq_m_s8(inactive, a, b, p); }
253[[gnu::always_inline]] nce mve_pred16_t less_than_or_equal(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vcmpleq_m_s8(a, b, p); }
254[[gnu::always_inline]] nce mve_pred16_t less_than(int8x16_t a, int8x16_t b) { return vcmpltq_s8(a, b); }
255[[gnu::always_inline]] nce int8x16_t max(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vmaxq_m_s8(inactive, a, b, p); }
256[[gnu::always_inline]] nce int8x16_t subtract_absolute(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vabdq_m_s8(inactive, a, b, p); }
257[[gnu::always_inline]] nce int8x16_t add(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vaddq_m_s8(inactive, a, b, p); }
258[[gnu::always_inline]] nce int8x16_t add_halve(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vhaddq_m_s8(inactive, a, b, p); }
259[[gnu::always_inline]] nce int8x16_t add_halve_round(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vrhaddq_m_s8(inactive, a, b, p); }
260[[gnu::always_inline]] nce int8x16_t add_saturate(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqaddq_m_s8(inactive, a, b, p); }
261[[gnu::always_inline]] nce int8x16_t add_halve(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vhaddq_x_s8(a, b, p); }
262[[gnu::always_inline]] nce int8x16_t add_halve_round(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vrhaddq_x_s8(a, b, p); }
263[[gnu::always_inline]] nce int8x16_t add_halve(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p) { return vhaddq_m_n_s8(inactive, a, b, p); }
264[[gnu::always_inline]] nce int8x16_t add_saturate(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p) { return vqaddq_m_n_s8(inactive, a, b, p); }
265[[gnu::always_inline]] nce int8x16_t multiply_high(int8x16_t a, int8x16_t b) { return vmulhq_s8(a, b); }
266[[gnu::always_inline]] nce int8x16_t multiply_high(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vmulhq_m_s8(inactive, a, b, p); }
267[[gnu::always_inline]] nce int8x16_t multiply_high(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vmulhq_x_s8(a, b, p); }
268[[gnu::always_inline]] nce int16x8_t multiply_long_bottom(int8x16_t a, int8x16_t b) { return vmullbq_int_s8(a, b); }
269[[gnu::always_inline]] nce int16x8_t multiply_long_bottom(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vmullbq_int_x_s8(a, b, p); }
270[[gnu::always_inline]] nce int16x8_t multiply_long_top(int8x16_t a, int8x16_t b) { return vmulltq_int_s8(a, b); }
271[[gnu::always_inline]] nce int8x16_t multiply(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vmulq_m_s8(inactive, a, b, p); }
272[[gnu::always_inline]] nce int8x16_t multiply_round_high(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vrmulhq_m_s8(inactive, a, b, p); }
273[[gnu::always_inline]] nce int8x16_t multiply_add_double_saturate_high(int8x16_t inactive, int8x16_t a, int8x16_t b) { return vqdmladhq_s8(inactive, a, b); }
274[[gnu::always_inline]] nce int8x16_t multiply_add_double_saturate_high(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqdmladhq_m_s8(inactive, a, b, p); }
275[[gnu::always_inline]] nce int8x16_t multiply_add_double_saturate_high_exchange_pairs(int8x16_t inactive, int8x16_t a, int8x16_t b) { return vqdmladhxq_s8(inactive, a, b); }
276[[gnu::always_inline]] nce int8x16_t multiply_add_double_saturate_high_exchange_pairs(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqdmladhxq_m_s8(inactive, a, b, p); }
277[[gnu::always_inline]] nce int8x16_t multiply_add_dual_double_round_saturate_high(int8x16_t inactive, int8x16_t a, int8x16_t b) { return vqrdmladhq_s8(inactive, a, b); }
278[[gnu::always_inline]] nce int8x16_t multiply_add_dual_double_round_saturate_high(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqrdmladhq_m_s8(inactive, a, b, p); }
279[[gnu::always_inline]] nce int8x16_t mulitply_add_dual_double_round_saturate_high_exchange_pairs(int8x16_t inactive, int8x16_t a, int8x16_t b) { return vqrdmladhxq_s8(inactive, a, b); }
280[[gnu::always_inline]] nce int8x16_t mulitply_add_dual_double_round_saturate_high_exchange_pairs(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqrdmladhxq_m_s8(inactive, a, b, p); }
281[[gnu::always_inline]] nce int16x8_t multiply_long_top(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vmulltq_int_x_s8(a, b, p); }
282[[gnu::always_inline]] nce int8x16_t multiply(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vmulq_x_s8(a, b, p); }
283[[gnu::always_inline]] nce int8x16_t multiply_round_high(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vrmulhq_x_s8(a, b, p); }
284[[gnu::always_inline]] nce int8x16_t multiply(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p) { return vmulq_m_n_s8(inactive, a, b, p); }
285[[gnu::always_inline]] nce int8x16_t multiply_round_high(int8x16_t a, int8x16_t b) { return vrmulhq_s8(a, b); }
286[[gnu::always_inline]] nce int8x16_t multiply_subtract_dual_double_saturate_high(int8x16_t inactive, int8x16_t a, int8x16_t b) { return vqdmlsdhq_s8(inactive, a, b); }
287[[gnu::always_inline]] nce int8x16_t multiply_subtract_dual_double_saturate_high(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqdmlsdhq_m_s8(inactive, a, b, p); }
288[[gnu::always_inline]] nce int8x16_t multiply_subtract_dual_double_saturate_high_exchange_pairs(int8x16_t inactive, int8x16_t a, int8x16_t b) { return vqdmlsdhxq_s8(inactive, a, b); }
289[[gnu::always_inline]] nce int8x16_t multiply_subtract_dual_double_saturate_high_exchange_pairs(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqdmlsdhxq_m_s8(inactive, a, b, p); }
290[[gnu::always_inline]] nce int8x16_t multiply_subtract_dual_double_round_saturate_high(int8x16_t inactive, int8x16_t a, int8x16_t b) { return vqrdmlsdhq_s8(inactive, a, b); }
291[[gnu::always_inline]] nce int8x16_t multiply_subtract_dual_double_round_saturate_high(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqrdmlsdhq_m_s8(inactive, a, b, p); }
292[[gnu::always_inline]] nce int8x16_t multiply_subtract_dual_double_round_saturate_high_exchange_pairs(int8x16_t inactive, int8x16_t a, int8x16_t b) { return vqrdmlsdhxq_s8(inactive, a, b); }
293[[gnu::always_inline]] nce int8x16_t multiply_subtract_dual_double_round_saturate_high_exchange_pairs(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqrdmlsdhxq_m_s8(inactive, a, b, p); }
294[[gnu::always_inline]] nce int8x16_t multiply_double_saturate_high(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqdmulhq_m_s8(inactive, a, b, p); }
295[[gnu::always_inline]] nce int8x16_t multiply_double_round_saturate_high(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqrdmulhq_m_s8(inactive, a, b, p); }
296[[gnu::always_inline]] nce mve_pred16_t less_than(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vcmpltq_m_s8(a, b, p); }
297[[gnu::always_inline]] nce int8x16_t min(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vminq_x_s8(a, b, p); }
298[[gnu::always_inline]] nce int8x16_t max(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vmaxq_x_s8(a, b, p); }
299[[gnu::always_inline]] nce int8x16_t subtract_absolute(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vabdq_x_s8(a, b, p); }
300[[gnu::always_inline]] nce int8x16_t abs(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vabsq_m_s8(inactive, a, p); }
301[[gnu::always_inline]] nce int8x16_t abs_saturate(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vqabsq_m_s8(inactive, a, p); }
302[[gnu::always_inline]] nce int8x16_t add(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vaddq_x_s8(a, b, p); }
303[[gnu::always_inline]] nce int8x16_t multiply_add_double_round_saturate_high_scalar(int8x16_t m1, int8x16_t m2, int8_t add) { return vqrdmlashq_n_s8(m1, m2, add); }
304[[gnu::always_inline]] nce int8x16_t multiply_add_double_round_saturate_high_scalar(int8x16_t m1, int8x16_t m2, int8_t add, mve_pred16_t p) { return vqrdmlashq_m_n_s8(m1, m2, add, p); }
305[[gnu::always_inline]] nce int8x16_t multiply_double_saturate_high(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p) { return vqdmulhq_m_n_s8(inactive, a, b, p); }
306[[gnu::always_inline]] nce int8x16_t multiply_double_saturate_high(int8x16_t a, int8x16_t b) { return vqdmulhq_s8(a, b); }
307[[gnu::always_inline]] nce int8x16_t multiply_double_round_saturate_high(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p) { return vqrdmulhq_m_n_s8(inactive, a, b, p); }
308[[gnu::always_inline]] nce int8x16_t multiply_double_round_saturate_high(int8x16_t a, int8x16_t b) { return vqrdmulhq_s8(a, b); }
309[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add(int8x16_t m1, int8x16_t m2) { return vmladavq_s8(m1, m2); }
310[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add(int8x16_t m1, int8x16_t m2, mve_pred16_t p) { return vmladavq_p_s8(m1, m2, p); }
311[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_exchange_pairs(int8x16_t m1, int8x16_t m2) { return vmladavxq_s8(m1, m2); }
312[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_exchange_pairs(int8x16_t m1, int8x16_t m2, mve_pred16_t p) { return vmladavxq_p_s8(m1, m2, p); }
313[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vmlsdavq_p_s8(a, b, p); }
314[[gnu::always_inline]] nce int8x16_t add(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p) { return vaddq_m_n_s8(inactive, a, b, p); }
315[[gnu::always_inline]] nce int8x16_t multiply_add(int8x16_t add, int8x16_t m1, int8_t m2) { return vmlaq_n_s8(add, m1, m2); }
316[[gnu::always_inline]] nce int8x16_t multiply_add(int8x16_t add, int8x16_t m1, int8_t m2, mve_pred16_t p) { return vmlaq_m_n_s8(add, m1, m2, p); }
317[[gnu::always_inline]] nce int8x16_t multiply_add_scalar(int8x16_t m1, int8x16_t m2, int8_t add) { return vmlasq_n_s8(m1, m2, add); }
318[[gnu::always_inline]] nce int8x16_t multiply_add_scalar(int8x16_t m1, int8x16_t m2, int8_t add, mve_pred16_t p) { return vmlasq_m_n_s8(m1, m2, add, p); }
319[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add(int8x16_t a, int8x16_t b) { return vmlsdavq_s8(a, b); }
320[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_exchange_pairs(int8x16_t a, int8x16_t b) { return vmlsdavxq_s8(a, b); }
321[[gnu::always_inline]] nce int8x16_t subtract(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vsubq_m_s8(inactive, a, b, p); }
322[[gnu::always_inline]] nce int8x16_t subtract_halve(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vhsubq_m_s8(inactive, a, b, p); }
323[[gnu::always_inline]] nce int8x16_t subtract_saturate(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqsubq_m_s8(inactive, a, b, p); }
324[[gnu::always_inline]] nce int8x16_t bitwise_clear(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vbicq_m_s8(inactive, a, b, p); }
325[[gnu::always_inline]] nce int8x16_t bitwise_and(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vandq_m_s8(inactive, a, b, p); }
326[[gnu::always_inline]] nce int8x16_t bitwise_xor(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return veorq_m_s8(inactive, a, b, p); }
327[[gnu::always_inline]] nce int8x16_t bitwise_or_not(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vornq_m_s8(inactive, a, b, p); }
328[[gnu::always_inline]] nce int8x16_t bitwise_or(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vorrq_m_s8(inactive, a, b, p); }
329[[gnu::always_inline]] nce int8x16_t complex_add_rotate_270(int8x16_t a, int8x16_t b) { return vcaddq_rot270_s8(a, b); }
330[[gnu::always_inline]] nce int8x16_t complex_add_rotate_90(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vcaddq_rot90_m_s8(inactive, a, b, p); }
331[[gnu::always_inline]] nce int8x16_t complex_add_rotate_270(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vcaddq_rot270_m_s8(inactive, a, b, p); }
332[[gnu::always_inline]] nce int8x16_t complex_add_rotate_90_halve(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vhcaddq_rot90_m_s8(inactive, a, b, p); }
333[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_exchange_pairs(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vmlsdavxq_p_s8(a, b, p); }
334[[gnu::always_inline]] nce int8x16_t complex_add_rotate_90(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vcaddq_rot90_x_s8(a, b, p); }
335[[gnu::always_inline]] nce int8x16_t complex_add_rotate_270(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vcaddq_rot270_x_s8(a, b, p); }
336[[gnu::always_inline]] nce int8x16_t complex_add_rotate_90_halve(int8x16_t a, int8x16_t b) { return vhcaddq_rot90_s8(a, b); }
337[[gnu::always_inline]] nce int8x16_t complex_add_rotate_90_halve(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vhcaddq_rot90_x_s8(a, b, p); }
338[[gnu::always_inline]] nce int8x16_t complex_add_rotate_270_halve(int8x16_t a, int8x16_t b) { return vhcaddq_rot270_s8(a, b); }
339[[gnu::always_inline]] nce int8x16_t complex_add_rotate_270_halve(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vhcaddq_rot270_m_s8(inactive, a, b, p); }
340[[gnu::always_inline]] nce int8x16_t subtract(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vsubq_x_s8(a, b, p); }
341[[gnu::always_inline]] nce int8x16_t subtract_halve(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vhsubq_x_s8(a, b, p); }
342[[gnu::always_inline]] nce int8x16_t count_leading_sign_bits(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vclsq_m_s8(inactive, a, p); }
343[[gnu::always_inline]] nce int8x16_t count_leading_zero_bits(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vclzq_m_s8(inactive, a, p); }
344[[gnu::always_inline]] nce int8x16_t bitwise_clear(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vbicq_x_s8(a, b, p); }
345[[gnu::always_inline]] nce int8x16_t negate(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vnegq_m_s8(inactive, a, p); }
346[[gnu::always_inline]] nce int8x16_t negate_saturate(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vqnegq_m_s8(inactive, a, p); }
347[[gnu::always_inline]] nce int8x16_t bitwise_and(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vandq_x_s8(a, b, p); }
348[[gnu::always_inline]] nce int8x16_t bitwise_xor(int8x16_t a, int8x16_t b, mve_pred16_t p) { return veorq_x_s8(a, b, p); }
349[[gnu::always_inline]] nce int8x16_t bitwise_not(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vmvnq_m_s8(inactive, a, p); }
350[[gnu::always_inline]] nce int8x16_t bitwise_or_not(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vornq_x_s8(a, b, p); }
351[[gnu::always_inline]] nce int8x16_t bitwise_or(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vorrq_x_s8(a, b, p); }
352[[gnu::always_inline]] nce int8x16_t complex_add_rotate_270_halve(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vhcaddq_rot270_x_s8(a, b, p); }
353[[gnu::always_inline]] nce int8x16_t subtract(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p) { return vsubq_m_n_s8(inactive, a, b, p); }
354[[gnu::always_inline]] nce int8x16_t subtract_halve(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p) { return vhsubq_m_n_s8(inactive, a, b, p); }
355[[gnu::always_inline]] nce int8x16_t subtract_saturate(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p) { return vqsubq_m_n_s8(inactive, a, b, p); }
356[[gnu::always_inline]] nce int8x16_t complex_add_rotate_90(int8x16_t a, int8x16_t b) { return vcaddq_rot90_s8(a, b); }
357[[gnu::always_inline]] nce int8x16_t shift_left_round_saturate(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqrshlq_m_s8(inactive, a, b, p); }
358[[gnu::always_inline]] nce int8x16_t shift_left_saturate(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vqshlq_m_s8(inactive, a, b, p); }
359template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_round(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vrshrq_m_n_s8(inactive, a, imm, p); }
360template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vshrq_m_n_s8(inactive, a, imm, p); }
361template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_insert(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vsriq_m_n_s8(a, b, imm, p); }
362template <int imm>[[gnu::always_inline]] nce int8x16_t shift_left_saturate(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vqshlq_m_n_s8(inactive, a, imm, p); }
363[[gnu::always_inline]] nce int8x16_t bit_reverse_shift_right(int8x16_t inactive, int8x16_t a, int32_t b, mve_pred16_t p) { return vbrsrq_m_n_s8(inactive, a, b, p); }
364template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_round_saturate_bottom(int8x16_t a, int16x8_t b) { return vqrshrnbq_n_s16(a, b, imm); }
365template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_round_saturate_bottom(int8x16_t a, int16x8_t b, mve_pred16_t p) { return vqrshrnbq_m_n_s16(a, b, imm, p); }
366template <int imm>[[gnu::always_inline]] nce int8x16_t shight_right_narrow_round_saturate_top(int8x16_t a, int16x8_t b) { return vqrshrntq_n_s16(a, b, imm); }
367template <int imm>[[gnu::always_inline]] nce int8x16_t shight_right_narrow_round_saturate_top(int8x16_t a, int16x8_t b, mve_pred16_t p) { return vqrshrntq_m_n_s16(a, b, imm, p); }
368template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_saturate_bottom(int8x16_t a, int16x8_t b) { return vqshrnbq_n_s16(a, b, imm); }
369template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_saturate_bottom(int8x16_t a, int16x8_t b, mve_pred16_t p) { return vqshrnbq_m_n_s16(a, b, imm, p); }
370template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_saturate_top(int8x16_t a, int16x8_t b) { return vqshrntq_n_s16(a, b, imm); }
371template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_saturate_top(int8x16_t a, int16x8_t b, mve_pred16_t p) { return vqshrntq_m_n_s16(a, b, imm, p); }
372template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_round_bottom(int8x16_t a, int16x8_t b) { return vrshrnbq_n_s16(a, b, imm); }
373template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_round_bottom(int8x16_t a, int16x8_t b, mve_pred16_t p) { return vrshrnbq_m_n_s16(a, b, imm, p); }
374template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_round_top(int8x16_t a, int16x8_t b) { return vrshrntq_n_s16(a, b, imm); }
375template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_round_top(int8x16_t a, int16x8_t b, mve_pred16_t p) { return vrshrntq_m_n_s16(a, b, imm, p); }
376template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_bottom(int8x16_t a, int16x8_t b) { return vshrnbq_n_s16(a, b, imm); }
377template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_bottom(int8x16_t a, int16x8_t b, mve_pred16_t p) { return vshrnbq_m_n_s16(a, b, imm, p); }
378template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_top_signed(int8x16_t a, int16x8_t b) { return vshrntq_n_s16(a, b, imm); }
379template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_narrow_top_signed(int8x16_t a, int16x8_t b, mve_pred16_t p) { return vshrntq_m_n_s16(a, b, imm, p); }
380template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right_round(int8x16_t a, mve_pred16_t p) { return vrshrq_x_n_s8(a, imm, p); }
381template <int imm>[[gnu::always_inline]] nce int8x16_t shift_right(int8x16_t a, mve_pred16_t p) { return vshrq_x_n_s8(a, imm, p); }
382[[gnu::always_inline]] nce int8x16_t bit_reverse_shift_right(int8x16_t a, int32_t b) { return vbrsrq_n_s8(a, b); }
383[[gnu::always_inline]] nce int8x16_t bit_reverse_shift_right(int8x16_t a, int32_t b, mve_pred16_t p) { return vbrsrq_x_n_s8(a, b, p); }
384[[gnu::always_inline]] nce int8x16_t shift_left_round_saturate(int8x16_t a, int32_t b) { return vqrshlq_n_s8(a, b); }
385[[gnu::always_inline]] nce int8x16_t shift_left_round_saturate(int8x16_t a, int32_t b, mve_pred16_t p) { return vqrshlq_m_n_s8(a, b, p); }
386[[gnu::always_inline]] nce int8x16_t shift_left_saturate(int8x16_t a, int32_t b) { return vqshlq_r_s8(a, b); }
387[[gnu::always_inline]] nce int8x16_t shift_left_saturate(int8x16_t a, int32_t b, mve_pred16_t p) { return vqshlq_m_r_s8(a, b, p); }
388template <int imm>[[gnu::always_inline]] nce uint8x16_t shift_left_unsigned_saturate(int8x16_t a) { return vqshluq_n_s8(a, imm); }
389[[gnu::always_inline]] nce int8x16_t shift_left_round(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vrshlq_m_s8(inactive, a, b, p); }
390[[gnu::always_inline]] nce int8x16_t shift_left(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vshlq_m_s8(inactive, a, b, p); }
391[[gnu::always_inline]] nce int8x16_t shift_left_round(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vrshlq_x_s8(a, b, p); }
392[[gnu::always_inline]] nce int8x16_t shift_left(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vshlq_x_s8(a, b, p); }
393template <int imm>[[gnu::always_inline]] nce int8x16_t shift_left(int8x16_t inactive, int8x16_t a, mve_pred16_t p) { return vshlq_m_n_s8(inactive, a, imm, p); }
394template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left_long_bottom(int8x16_t a, mve_pred16_t p) { return vshllbq_x_n_s8(a, imm, p); }
395template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left_long_top(int8x16_t a, mve_pred16_t p) { return vshlltq_x_n_s8(a, imm, p); }
396[[gnu::always_inline]] nce int8x16_t shift_left_round(int8x16_t a, int32_t b) { return vrshlq_n_s8(a, b); }
397[[gnu::always_inline]] nce int8x16_t shift_left_round(int8x16_t a, int32_t b, mve_pred16_t p) { return vrshlq_m_n_s8(a, b, p); }
398template <int imm>[[gnu::always_inline]] nce int8x16_t shift_left_carry(int8x16_t a, uint32_t *b) { return vshlcq_s8(a, b, imm); }
399template <int imm>[[gnu::always_inline]] nce int8x16_t shift_left_carry(int8x16_t a, uint32_t *b, mve_pred16_t p) { return vshlcq_m_s8(a, b, imm, p); }
400template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left_long_bottom(int8x16_t a) { return vshllbq_n_s8(a, imm); }
401template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left_long_top(int8x16_t a) { return vshlltq_n_s8(a, imm); }
402template <int imm>[[gnu::always_inline]] nce int8x16_t shift_left(int8x16_t a) { return vshlq_n_s8(a, imm); }
403template <int imm>[[gnu::always_inline]] nce int8x16_t shift_left_insert(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vsliq_m_n_s8(a, b, imm, p); }
404[[gnu::always_inline]] nce int16x8_t move_long_bottom(int8x16_t a) { return vmovlbq_s8(a); }
405[[gnu::always_inline]] nce int16x8_t move_long_bottom(int8x16_t a, mve_pred16_t p) { return vmovlbq_x_s8(a, p); }
406[[gnu::always_inline]] nce int16x8_t move_long_top(int8x16_t a) { return vmovltq_s8(a); }
407[[gnu::always_inline]] nce int8x16_t predicate_select(int8x16_t a, int8x16_t b, mve_pred16_t p) { return vpselq_s8(a, b, p); }
408[[gnu::always_inline]] nce int8x16_t multiply_add_double_saturate_high(int8x16_t add, int8x16_t m1, int8_t m2) { return vqdmlahq_n_s8(add, m1, m2); }
409[[gnu::always_inline]] nce int8x16_t multiply_add_double_saturate_high(int8x16_t add, int8x16_t m1, int8_t m2, mve_pred16_t p) { return vqdmlahq_m_n_s8(add, m1, m2, p); }
410[[gnu::always_inline]] nce int8x16_t multiply_add_double_round_saturate_high(int8x16_t add, int8x16_t m1, int8_t m2) { return vqrdmlahq_n_s8(add, m1, m2); }
411[[gnu::always_inline]] nce int8x16_t multiply_add_double_round_saturate_high(int8x16_t add, int8x16_t m1, int8_t m2, mve_pred16_t p) { return vqrdmlahq_m_n_s8(add, m1, m2, p); }
412[[gnu::always_inline]] nce int8x16_t multiply_subtract_double_saturate_high(int8x16_t m1, int8x16_t m2, int8_t add) { return vqdmlashq_n_s8(m1, m2, add); }
413[[gnu::always_inline]] nce int8x16_t multiply_subtract_double_saturate_high(int8x16_t m1, int8x16_t m2, int8_t add, mve_pred16_t p) { return vqdmlashq_m_n_s8(m1, m2, add, p); }
414[[gnu::always_inline]] nce int8x16_t move_narrow_bottom(int8x16_t a, int16x8_t b) { return vmovnbq_s16(a, b); }
415[[gnu::always_inline]] nce int8x16_t move_narrow_bottom(int8x16_t a, int16x8_t b, mve_pred16_t p) { return vmovnbq_m_s16(a, b, p); }
416[[gnu::always_inline]] nce int8x16_t move_narrow_top(int8x16_t a, int16x8_t b) { return vmovntq_s16(a, b); }
417[[gnu::always_inline]] nce int8x16_t move_narrow_top(int8x16_t a, int16x8_t b, mve_pred16_t p) { return vmovntq_m_s16(a, b, p); }
418[[gnu::always_inline]] nce int8x16_t move_narrow_saturate_bottom(int8x16_t a, int16x8_t b) { return vqmovnbq_s16(a, b); }
419[[gnu::always_inline]] nce int8x16_t move_narrow_saturate_bottom(int8x16_t a, int16x8_t b, mve_pred16_t p) { return vqmovnbq_m_s16(a, b, p); }
420[[gnu::always_inline]] nce int8x16_t move_narrow_saturate_top(int8x16_t a, int16x8_t b) { return vqmovntq_s16(a, b); }
421[[gnu::always_inline]] nce int8x16_t move_narrow_saturate_top(int8x16_t a, int16x8_t b, mve_pred16_t p) { return vqmovntq_m_s16(a, b, p); }
422[[gnu::always_inline]] nce int8x16_t reverse_16bit(int8x16_t a, mve_pred16_t p) { return vrev16q_x_s8(a, p); }
423[[gnu::always_inline]] nce int8x16_t reverse_32bit(int8x16_t a, mve_pred16_t p) { return vrev32q_x_s8(a, p); }
424[[gnu::always_inline]] nce int8x16_t abs(int8x16_t a, mve_pred16_t p) { return vabsq_x_s8(a, p); }
425[[gnu::always_inline]] nce int8x16_t count_leading_sign_bits(int8x16_t a, mve_pred16_t p) { return vclsq_x_s8(a, p); }
426[[gnu::always_inline]] nce int8x16_t count_leading_zero_bits(int8x16_t a, mve_pred16_t p) { return vclzq_x_s8(a, p); }
427[[gnu::always_inline]] nce int8x16_t negate(int8x16_t a, mve_pred16_t p) { return vnegq_x_s8(a, p); }
428[[gnu::always_inline]] nce int8x16_t bitwise_not(int8x16_t a, mve_pred16_t p) { return vmvnq_x_s8(a, p); }
429template <int imm>[[gnu::always_inline]] nce int8x16_t shift_left(int8x16_t a, mve_pred16_t p) { return vshlq_x_n_s8(a, imm, p); }
430[[gnu::always_inline]] nce int16x8_t move_long_top(int8x16_t a, mve_pred16_t p) { return vmovltq_x_s8(a, p); }
431[[gnu::always_inline]] nce int8x16_t duplicate(int8x16_t inactive, int8_t a, mve_pred16_t p) { return vdupq_m_n_s8(inactive, a, p); }
432[[gnu::always_inline]] nce mve_pred16_t equal(int8x16_t a, int8_t b) { return vcmpeqq_n_s8(a, b); }
433[[gnu::always_inline]] nce mve_pred16_t equal(int8x16_t a, int8_t b, mve_pred16_t p) { return vcmpeqq_m_n_s8(a, b, p); }
434[[gnu::always_inline]] nce mve_pred16_t not_equal(int8x16_t a, int8_t b) { return vcmpneq_n_s8(a, b); }
435[[gnu::always_inline]] nce mve_pred16_t not_equal(int8x16_t a, int8_t b, mve_pred16_t p) { return vcmpneq_m_n_s8(a, b, p); }
436[[gnu::always_inline]] nce mve_pred16_t greater_than_or_equal(int8x16_t a, int8_t b) { return vcmpgeq_n_s8(a, b); }
437[[gnu::always_inline]] nce mve_pred16_t greater_than_or_equal(int8x16_t a, int8_t b, mve_pred16_t p) { return vcmpgeq_m_n_s8(a, b, p); }
438[[gnu::always_inline]] nce mve_pred16_t greater_than(int8x16_t a, int8_t b) { return vcmpgtq_n_s8(a, b); }
439[[gnu::always_inline]] nce int8x16_t add(int8x16_t a, int8_t b, mve_pred16_t p) { return vaddq_x_n_s8(a, b, p); }
440[[gnu::always_inline]] nce int32_t reduce_add(int8x16_t a) { return vaddvq_s8(a); }
441[[gnu::always_inline]] nce int32_t reduce_add(int8x16_t a, mve_pred16_t p) { return vaddvq_p_s8(a, p); }
442[[gnu::always_inline]] nce mve_pred16_t greater_than(int8x16_t a, int8_t b, mve_pred16_t p) { return vcmpgtq_m_n_s8(a, b, p); }
443[[gnu::always_inline]] nce mve_pred16_t less_than_or_equal(int8x16_t a, int8_t b) { return vcmpleq_n_s8(a, b); }
444[[gnu::always_inline]] nce mve_pred16_t less_than_or_equal(int8x16_t a, int8_t b, mve_pred16_t p) { return vcmpleq_m_n_s8(a, b, p); }
445[[gnu::always_inline]] nce mve_pred16_t less_than(int8x16_t a, int8_t b) { return vcmpltq_n_s8(a, b); }
446[[gnu::always_inline]] nce mve_pred16_t less_than(int8x16_t a, int8_t b, mve_pred16_t p) { return vcmpltq_m_n_s8(a, b, p); }
447[[gnu::always_inline]] nce int8x16_t add(int8x16_t a, int8_t b) { return vaddq_n_s8(a, b); }
448[[gnu::always_inline]] nce int8x16_t add_halve(int8x16_t a, int8_t b) { return vhaddq_n_s8(a, b); }
449[[gnu::always_inline]] nce int8x16_t add_halve(int8x16_t a, int8_t b, mve_pred16_t p) { return vhaddq_x_n_s8(a, b, p); }
450[[gnu::always_inline]] nce int8x16_t add_saturate(int8x16_t a, int8_t b) { return vqaddq_n_s8(a, b); }
451[[gnu::always_inline]] nce int8x16_t multiply(int8x16_t a, int8_t b, mve_pred16_t p) { return vmulq_x_n_s8(a, b, p); }
452[[gnu::always_inline]] nce int8x16_t multiply_double_saturate_high(int8x16_t a, int8_t b) { return vqdmulhq_n_s8(a, b); }
453[[gnu::always_inline]] nce int8x16_t multiply_double_round_saturate_high(int8x16_t a, int8_t b) { return vqrdmulhq_n_s8(a, b); }
454[[gnu::always_inline]] nce int8x16_t subtract(int8x16_t a, int8_t b) { return vsubq_n_s8(a, b); }
455[[gnu::always_inline]] nce int8x16_t subtract(int8x16_t a, int8_t b, mve_pred16_t p) { return vsubq_x_n_s8(a, b, p); }
456[[gnu::always_inline]] nce int8x16_t subtract_halve(int8x16_t a, int8_t b) { return vhsubq_n_s8(a, b); }
457[[gnu::always_inline]] nce int8x16_t subtract_halve(int8x16_t a, int8_t b, mve_pred16_t p) { return vhsubq_x_n_s8(a, b, p); }
458[[gnu::always_inline]] nce int8x16_t subtract_saturate(int8x16_t a, int8_t b) { return vqsubq_n_s8(a, b); }
459[[gnu::always_inline]] nce int8x16_t shift_left(int8x16_t a, int32_t b) { return vshlq_r_s8(a, b); }
460[[gnu::always_inline]] nce int8x16_t shift_left(int8x16_t a, int32_t b, mve_pred16_t p) { return vshlq_m_r_s8(a, b, p); }
461template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left_long_bottom(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p) { return vshllbq_m_n_u8(inactive, a, imm, p); }
462[[gnu::always_inline]] nce uint16x8_t reverse_32bit(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) { return vrev32q_m_u16(inactive, a, p); }
463[[gnu::always_inline]] nce uint16x8_t reverse_64bit(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) { return vrev64q_m_u16(inactive, a, p); }
464[[gnu::always_inline]] nce mve_pred16_t not_equal(uint16x8_t a, uint16x8_t b) { return vcmpneq_u16(a, b); }
465[[gnu::always_inline]] nce mve_pred16_t not_equal(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vcmpneq_m_u16(a, b, p); }
466[[gnu::always_inline]] nce mve_pred16_t higher_or_same(uint16x8_t a, uint16x8_t b) { return vcmpcsq_u16(a, b); }
467[[gnu::always_inline]] nce mve_pred16_t higher_or_same(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vcmpcsq_m_u16(a, b, p); }
468[[gnu::always_inline]] nce uint16x8_t reverse_32bit(uint16x8_t a, mve_pred16_t p) { return vrev32q_x_u16(a, p); }
469[[gnu::always_inline]] nce uint16x8_t reverse_64bit(uint16x8_t a, mve_pred16_t p) { return vrev64q_x_u16(a, p); }
470template <int imm>[[gnu::always_inline]] nce uint16x8_t decrement_duplicate(uint16x8_t inactive, uint32_t a, mve_pred16_t p) { return vddupq_m_n_u16(inactive, a, imm, p); }
471template <int imm>[[gnu::always_inline]] nce uint16x8_t decrement_duplicate(uint16x8_t inactive, uint32_t *a, mve_pred16_t p) { return vddupq_m_wb_u16(inactive, a, imm, p); }
472template <int imm>[[gnu::always_inline]] nce uint16x8_t decrement_wrap_duplicate(uint16x8_t inactive, uint32_t a, uint32_t b, mve_pred16_t p) { return vdwdupq_m_n_u16(inactive, a, b, imm, p); }
473template <int imm>[[gnu::always_inline]] nce uint16x8_t decrement_wrap_duplicate(uint16x8_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p) { return vdwdupq_m_wb_u16(inactive, a, b, imm, p); }
474template <int imm>[[gnu::always_inline]] nce uint16x8_t increment_duplicate(uint16x8_t inactive, uint32_t a, mve_pred16_t p) { return vidupq_m_n_u16(inactive, a, imm, p); }
475template <int imm>[[gnu::always_inline]] nce uint16x8_t increment_duplicate(uint16x8_t inactive, uint32_t *a, mve_pred16_t p) { return vidupq_m_wb_u16(inactive, a, imm, p); }
476template <int imm>[[gnu::always_inline]] nce uint16x8_t increment_wrap_duplicate(uint16x8_t inactive, uint32_t a, uint32_t b, mve_pred16_t p) { return viwdupq_m_n_u16(inactive, a, b, imm, p); }
477template <int imm>[[gnu::always_inline]] nce uint16x8_t increment_wrap_duplicate(uint16x8_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p) { return viwdupq_m_wb_u16(inactive, a, b, imm, p); }
478[[gnu::always_inline]] nce uint16x8_t duplicate(uint16x8_t inactive, uint16_t a, mve_pred16_t p) { return vdupq_m_n_u16(inactive, a, p); }
479[[gnu::always_inline]] nce uint16x8_t uninitialized(uint16x8_t t) { return vuninitializedq(t); }
480[[gnu::always_inline]] nce mve_pred16_t equal(uint16x8_t a, uint16x8_t b) { return vcmpeqq_u16(a, b); }
481[[gnu::always_inline]] nce mve_pred16_t higher(uint16x8_t a, uint16x8_t b) { return vcmphiq_u16(a, b); }
482[[gnu::always_inline]] nce uint16x8_t min(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vminq_m_u16(inactive, a, b, p); }
483[[gnu::always_inline]] nce uint16x8_t max(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmaxq_m_u16(inactive, a, b, p); }
484[[gnu::always_inline]] nce uint16x8_t subtract_absolute(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vabdq_m_u16(inactive, a, b, p); }
485[[gnu::always_inline]] nce uint16x8_t add(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vaddq_m_u16(inactive, a, b, p); }
486[[gnu::always_inline]] nce mve_pred16_t equal(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vcmpeqq_m_u16(a, b, p); }
487[[gnu::always_inline]] nce mve_pred16_t higher(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vcmphiq_m_u16(a, b, p); }
488[[gnu::always_inline]] nce uint16x8_t min(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vminq_x_u16(a, b, p); }
489[[gnu::always_inline]] nce uint16x8_t max(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmaxq_x_u16(a, b, p); }
490[[gnu::always_inline]] nce uint16x8_t subtract_absolute(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vabdq_x_u16(a, b, p); }
491[[gnu::always_inline]] nce uint16x8_t add(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vaddq_x_u16(a, b, p); }
492[[gnu::always_inline]] nce uint16x8_t add(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve_pred16_t p) { return vaddq_m_n_u16(inactive, a, b, p); }
493[[gnu::always_inline]] nce uint16x8_t minimum_absolute(uint16x8_t a, int16x8_t b) { return vminaq_s16(a, b); }
494[[gnu::always_inline]] nce uint16x8_t minimum_absolute(uint16x8_t a, int16x8_t b, mve_pred16_t p) { return vminaq_m_s16(a, b, p); }
495[[gnu::always_inline]] nce uint16x8_t maximum_absolute(uint16x8_t a, int16x8_t b) { return vmaxaq_s16(a, b); }
496[[gnu::always_inline]] nce uint16x8_t maximum_absolute(uint16x8_t a, int16x8_t b, mve_pred16_t p) { return vmaxaq_m_s16(a, b, p); }
497[[gnu::always_inline]] nce mve_pred16_t equal(uint16x8_t a, uint16_t b) { return vcmpeqq_n_u16(a, b); }
498[[gnu::always_inline]] nce mve_pred16_t equal(uint16x8_t a, uint16_t b, mve_pred16_t p) { return vcmpeqq_m_n_u16(a, b, p); }
499[[gnu::always_inline]] nce mve_pred16_t not_equal(uint16x8_t a, uint16_t b) { return vcmpneq_n_u16(a, b); }
500[[gnu::always_inline]] nce mve_pred16_t not_equal(uint16x8_t a, uint16_t b, mve_pred16_t p) { return vcmpneq_m_n_u16(a, b, p); }
501[[gnu::always_inline]] nce mve_pred16_t higher_or_same(uint16x8_t a, uint16_t b) { return vcmpcsq_n_u16(a, b); }
502[[gnu::always_inline]] nce mve_pred16_t higher_or_same(uint16x8_t a, uint16_t b, mve_pred16_t p) { return vcmpcsq_m_n_u16(a, b, p); }
503[[gnu::always_inline]] nce mve_pred16_t higher(uint16x8_t a, uint16_t b) { return vcmphiq_n_u16(a, b); }
504[[gnu::always_inline]] nce mve_pred16_t higher(uint16x8_t a, uint16_t b, mve_pred16_t p) { return vcmphiq_m_n_u16(a, b, p); }
505[[gnu::always_inline]] nce uint16x8_t add(uint16x8_t a, uint16_t b) { return vaddq_n_u16(a, b); }
506[[gnu::always_inline]] nce uint16x8_t add(uint16x8_t a, uint16_t b, mve_pred16_t p) { return vaddq_x_n_u16(a, b, p); }
507[[gnu::always_inline]] nce uint32_t reduce_add(uint16x8_t a) { return vaddvq_u16(a); }
508[[gnu::always_inline]] nce uint16x8_t multiply_long_bottom_poly(uint16x8_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmullbq_poly_m_p8(inactive, a, b, p); }
509[[gnu::always_inline]] nce uint16x8_t multiply_long_bottom(uint16x8_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmullbq_int_m_u8(inactive, a, b, p); }
510[[gnu::always_inline]] nce uint16x8_t multply_long_top_poly(uint16x8_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmulltq_poly_m_p8(inactive, a, b, p); }
511[[gnu::always_inline]] nce uint16x8_t multiply_long_top(uint16x8_t inactive, uint8x16_t a, uint8x16_t b, mve_pred16_t p) { return vmulltq_int_m_u8(inactive, a, b, p); }
512[[gnu::always_inline]] nce uint16x8_t add_halve(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vhaddq_m_u16(inactive, a, b, p); }
513[[gnu::always_inline]] nce uint16x8_t add_halve_round(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vrhaddq_m_u16(inactive, a, b, p); }
514[[gnu::always_inline]] nce uint16x8_t add_saturate(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vqaddq_m_u16(inactive, a, b, p); }
515[[gnu::always_inline]] nce uint16x8_t multiply_high(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmulhq_m_u16(inactive, a, b, p); }
516[[gnu::always_inline]] nce uint16x8_t add_halve(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vhaddq_x_u16(a, b, p); }
517[[gnu::always_inline]] nce uint16x8_t add_halve_round(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vrhaddq_x_u16(a, b, p); }
518[[gnu::always_inline]] nce uint32x4_t multiply_long_bottom_poly(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmullbq_poly_x_p16(a, b, p); }
519[[gnu::always_inline]] nce uint16x8_t add_halve(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve_pred16_t p) { return vhaddq_m_n_u16(inactive, a, b, p); }
520[[gnu::always_inline]] nce uint16x8_t add_saturate(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve_pred16_t p) { return vqaddq_m_n_u16(inactive, a, b, p); }
521[[gnu::always_inline]] nce uint16x8_t multiply_high(uint16x8_t a, uint16x8_t b) { return vmulhq_u16(a, b); }
522[[gnu::always_inline]] nce uint16x8_t multiply_high(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmulhq_x_u16(a, b, p); }
523[[gnu::always_inline]] nce uint32x4_t multiply_long_bottom_poly(uint16x8_t a, uint16x8_t b) { return vmullbq_poly_p16(a, b); }
524[[gnu::always_inline]] nce uint32x4_t multiply_long_bottom(uint16x8_t a, uint16x8_t b) { return vmullbq_int_u16(a, b); }
525[[gnu::always_inline]] nce uint32x4_t multiply_long_bottom(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmullbq_int_x_u16(a, b, p); }
526[[gnu::always_inline]] nce uint32x4_t multply_long_top_poly(uint16x8_t a, uint16x8_t b) { return vmulltq_poly_p16(a, b); }
527[[gnu::always_inline]] nce uint32x4_t multiply_long_top(uint16x8_t a, uint16x8_t b) { return vmulltq_int_u16(a, b); }
528[[gnu::always_inline]] nce uint16x8_t multiply(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmulq_m_u16(inactive, a, b, p); }
529[[gnu::always_inline]] nce uint16x8_t multiply_round_high(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vrmulhq_m_u16(inactive, a, b, p); }
530[[gnu::always_inline]] nce uint32x4_t multply_long_top_poly(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmulltq_poly_x_p16(a, b, p); }
531[[gnu::always_inline]] nce uint32x4_t multiply_long_top(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmulltq_int_x_u16(a, b, p); }
532[[gnu::always_inline]] nce uint16x8_t multiply(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmulq_x_u16(a, b, p); }
533[[gnu::always_inline]] nce uint16x8_t multiply_round_high(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vrmulhq_x_u16(a, b, p); }
534[[gnu::always_inline]] nce uint16x8_t multiply(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve_pred16_t p) { return vmulq_m_n_u16(inactive, a, b, p); }
535[[gnu::always_inline]] nce uint16x8_t multiply_round_high(uint16x8_t a, uint16x8_t b) { return vrmulhq_u16(a, b); }
536[[gnu::always_inline]] nce uint32_t multiply_add_dual_reduce_add(uint16x8_t m1, uint16x8_t m2) { return vmladavq_u16(m1, m2); }
537[[gnu::always_inline]] nce uint16x8_t subtract(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vsubq_m_u16(inactive, a, b, p); }
538[[gnu::always_inline]] nce uint16x8_t subtract_halve(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vhsubq_m_u16(inactive, a, b, p); }
539[[gnu::always_inline]] nce uint16x8_t subtract_saturate(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vqsubq_m_u16(inactive, a, b, p); }
540[[gnu::always_inline]] nce uint16x8_t bitwise_clear(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vbicq_m_u16(inactive, a, b, p); }
541[[gnu::always_inline]] nce uint16x8_t bitwise_and(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vandq_m_u16(inactive, a, b, p); }
542[[gnu::always_inline]] nce uint16x8_t bitwise_xor(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return veorq_m_u16(inactive, a, b, p); }
543[[gnu::always_inline]] nce uint16x8_t bitwise_or_not(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vornq_m_u16(inactive, a, b, p); }
544[[gnu::always_inline]] nce uint16x8_t bitwise_or(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vorrq_m_u16(inactive, a, b, p); }
545[[gnu::always_inline]] nce uint32_t multiply_add_dual_reduce_add(uint16x8_t m1, uint16x8_t m2, mve_pred16_t p) { return vmladavq_p_u16(m1, m2, p); }
546[[gnu::always_inline]] nce uint64_t multiply_add_long_dual_reduce_add(uint16x8_t m1, uint16x8_t m2) { return vmlaldavq_u16(m1, m2); }
547[[gnu::always_inline]] nce uint64_t multiply_add_long_dual_reduce_add(uint16x8_t m1, uint16x8_t m2, mve_pred16_t p) { return vmlaldavq_p_u16(m1, m2, p); }
548[[gnu::always_inline]] nce uint16x8_t multiply_add(uint16x8_t add, uint16x8_t m1, uint16_t m2) { return vmlaq_n_u16(add, m1, m2); }
549[[gnu::always_inline]] nce uint16x8_t multiply_add(uint16x8_t add, uint16x8_t m1, uint16_t m2, mve_pred16_t p) { return vmlaq_m_n_u16(add, m1, m2, p); }
550[[gnu::always_inline]] nce uint16x8_t multiply_add_scalar(uint16x8_t m1, uint16x8_t m2, uint16_t add) { return vmlasq_n_u16(m1, m2, add); }
551[[gnu::always_inline]] nce uint16x8_t multiply_add_scalar(uint16x8_t m1, uint16x8_t m2, uint16_t add, mve_pred16_t p) { return vmlasq_m_n_u16(m1, m2, add, p); }
552[[gnu::always_inline]] nce uint16x8_t complex_add_rotate_90(uint16x8_t a, uint16x8_t b) { return vcaddq_rot90_u16(a, b); }
553[[gnu::always_inline]] nce uint16x8_t complex_add_rotate_270(uint16x8_t a, uint16x8_t b) { return vcaddq_rot270_u16(a, b); }
554[[gnu::always_inline]] nce uint16x8_t complex_add_rotate_90(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vcaddq_rot90_m_u16(inactive, a, b, p); }
555[[gnu::always_inline]] nce uint16x8_t complex_add_rotate_270(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vcaddq_rot270_m_u16(inactive, a, b, p); }
556[[gnu::always_inline]] nce uint16x8_t shift_left_round(uint16x8_t inactive, uint16x8_t a, int16x8_t b, mve_pred16_t p) { return vrshlq_m_u16(inactive, a, b, p); }
557[[gnu::always_inline]] nce uint16x8_t subtract(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vsubq_x_u16(a, b, p); }
558[[gnu::always_inline]] nce uint16x8_t subtract_halve(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vhsubq_x_u16(a, b, p); }
559[[gnu::always_inline]] nce uint16x8_t count_leading_zero_bits(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) { return vclzq_m_u16(inactive, a, p); }
560[[gnu::always_inline]] nce uint16x8_t bitwise_clear(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vbicq_x_u16(a, b, p); }
561[[gnu::always_inline]] nce uint16x8_t bitwise_and(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vandq_x_u16(a, b, p); }
562[[gnu::always_inline]] nce uint16x8_t bitwise_xor(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return veorq_x_u16(a, b, p); }
563[[gnu::always_inline]] nce uint16x8_t bitwise_not(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) { return vmvnq_m_u16(inactive, a, p); }
564[[gnu::always_inline]] nce uint16x8_t bitwise_or_not(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vornq_x_u16(a, b, p); }
565[[gnu::always_inline]] nce uint16x8_t bitwise_or(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vorrq_x_u16(a, b, p); }
566[[gnu::always_inline]] nce uint16x8_t complex_add_rotate_90(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vcaddq_rot90_x_u16(a, b, p); }
567[[gnu::always_inline]] nce uint16x8_t complex_add_rotate_270(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vcaddq_rot270_x_u16(a, b, p); }
568[[gnu::always_inline]] nce uint16x8_t subtract(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve_pred16_t p) { return vsubq_m_n_u16(inactive, a, b, p); }
569[[gnu::always_inline]] nce uint16x8_t subtract_halve(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve_pred16_t p) { return vhsubq_m_n_u16(inactive, a, b, p); }
570[[gnu::always_inline]] nce uint16x8_t subtract_saturate(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve_pred16_t p) { return vqsubq_m_n_u16(inactive, a, b, p); }
571[[gnu::always_inline]] nce uint16x8_t shift_left_round(uint16x8_t a, int16x8_t b, mve_pred16_t p) { return vrshlq_x_u16(a, b, p); }
572template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left_long_bottom(uint16x8_t a, mve_pred16_t p) { return vshllbq_x_n_u16(a, imm, p); }
573[[gnu::always_inline]] nce uint16x8_t shift_left_round(uint16x8_t a, int32_t b) { return vrshlq_n_u16(a, b); }
574[[gnu::always_inline]] nce uint16x8_t shift_left_round(uint16x8_t a, int32_t b, mve_pred16_t p) { return vrshlq_m_n_u16(a, b, p); }
575template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left_carry(uint16x8_t a, uint32_t *b) { return vshlcq_u16(a, b, imm); }
576template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left_carry(uint16x8_t a, uint32_t *b, mve_pred16_t p) { return vshlcq_m_u16(a, b, imm, p); }
577template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left_long_bottom(uint16x8_t a) { return vshllbq_n_u16(a, imm); }
578template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left_long_top(uint16x8_t a) { return vshlltq_n_u16(a, imm); }
579template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left_long_top(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p) { return vshlltq_m_n_u8(inactive, a, imm, p); }
580[[gnu::always_inline]] nce uint16x8_t shift_left(uint16x8_t inactive, uint16x8_t a, int16x8_t b, mve_pred16_t p) { return vshlq_m_u16(inactive, a, b, p); }
581[[gnu::always_inline]] nce uint16x8_t shift_left(uint16x8_t a, int16x8_t b, mve_pred16_t p) { return vshlq_x_u16(a, b, p); }
582template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left_long_top(uint16x8_t a, mve_pred16_t p) { return vshlltq_x_n_u16(a, imm, p); }
583template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left(uint16x8_t a) { return vshlq_n_u16(a, imm); }
584template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left_insert(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vsliq_m_n_u16(a, b, imm, p); }
585[[gnu::always_inline]] nce uint32x4_t move_long_bottom(uint16x8_t a) { return vmovlbq_u16(a); }
586[[gnu::always_inline]] nce uint16x8_t move_long_bottom(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p) { return vmovlbq_m_u8(inactive, a, p); }
587template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) { return vshlq_m_n_u16(inactive, a, imm, p); }
588[[gnu::always_inline]] nce uint32x4_t move_long_bottom(uint16x8_t a, mve_pred16_t p) { return vmovlbq_x_u16(a, p); }
589[[gnu::always_inline]] nce uint32x4_t move_long_top(uint16x8_t a) { return vmovltq_u16(a); }
590[[gnu::always_inline]] nce uint16x8_t move_long_top(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p) { return vmovltq_m_u8(inactive, a, p); }
591[[gnu::always_inline]] nce uint16x8_t shift_left_round_saturate(uint16x8_t inactive, uint16x8_t a, int16x8_t b, mve_pred16_t p) { return vqrshlq_m_u16(inactive, a, b, p); }
592[[gnu::always_inline]] nce uint16x8_t shift_left_saturate(uint16x8_t inactive, uint16x8_t a, int16x8_t b, mve_pred16_t p) { return vqshlq_m_u16(inactive, a, b, p); }
593template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_round(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) { return vrshrq_m_n_u16(inactive, a, imm, p); }
594template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) { return vshrq_m_n_u16(inactive, a, imm, p); }
595template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_insert(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vsriq_m_n_u16(a, b, imm, p); }
596template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left_saturate(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) { return vqshlq_m_n_u16(inactive, a, imm, p); }
597[[gnu::always_inline]] nce uint16x8_t predicate_select(uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vpselq_u16(a, b, p); }
598[[gnu::always_inline]] nce uint16x8_t bit_reverse_shift_right(uint16x8_t inactive, uint16x8_t a, int32_t b, mve_pred16_t p) { return vbrsrq_m_n_u16(inactive, a, b, p); }
599template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left_unsigned_saturate(uint16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vqshluq_m_n_s16(inactive, a, imm, p); }
600template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_round_saturate_bottom(uint16x8_t a, int32x4_t b) { return vqrshrunbq_n_s32(a, b, imm); }
601template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_round_saturate_bottom(uint16x8_t a, int32x4_t b, mve_pred16_t p) { return vqrshrunbq_m_n_s32(a, b, imm, p); }
602template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_round_saturate_top(uint16x8_t a, int32x4_t b) { return vqrshruntq_n_s32(a, b, imm); }
603template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_round_saturate_top(uint16x8_t a, int32x4_t b, mve_pred16_t p) { return vqrshruntq_m_n_s32(a, b, imm, p); }
604template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_saturate_bottom(uint16x8_t a, int32x4_t b) { return vqshrunbq_n_s32(a, b, imm); }
605template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_saturate_bottom(uint16x8_t a, int32x4_t b, mve_pred16_t p) { return vqshrunbq_m_n_s32(a, b, imm, p); }
606template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_saturate_top(uint16x8_t a, int32x4_t b) { return vqshruntq_n_s32(a, b, imm); }
607template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_saturate_top(uint16x8_t a, int32x4_t b, mve_pred16_t p) { return vqshruntq_m_n_s32(a, b, imm, p); }
608[[gnu::always_inline]] nce uint16x8_t move_narrow_saturate_bottom(uint16x8_t a, int32x4_t b) { return vqmovunbq_s32(a, b); }
609[[gnu::always_inline]] nce uint16x8_t move_narrow_saturate_bottom(uint16x8_t a, int32x4_t b, mve_pred16_t p) { return vqmovunbq_m_s32(a, b, p); }
610[[gnu::always_inline]] nce uint16x8_t move_narrow_saturate_top(uint16x8_t a, int32x4_t b) { return vqmovuntq_s32(a, b); }
611[[gnu::always_inline]] nce uint16x8_t move_narrow_saturate_top(uint16x8_t a, int32x4_t b, mve_pred16_t p) { return vqmovuntq_m_s32(a, b, p); }
612template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_round_saturate_bottom(uint16x8_t a, uint32x4_t b) { return vqrshrnbq_n_u32(a, b, imm); }
613template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_round_saturate_bottom(uint16x8_t a, uint32x4_t b, mve_pred16_t p) { return vqrshrnbq_m_n_u32(a, b, imm, p); }
614template <int imm>[[gnu::always_inline]] nce uint16x8_t shight_right_narrow_round_saturate_top(uint16x8_t a, uint32x4_t b) { return vqrshrntq_n_u32(a, b, imm); }
615template <int imm>[[gnu::always_inline]] nce uint16x8_t shight_right_narrow_round_saturate_top(uint16x8_t a, uint32x4_t b, mve_pred16_t p) { return vqrshrntq_m_n_u32(a, b, imm, p); }
616template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_saturate_bottom(uint16x8_t a, uint32x4_t b) { return vqshrnbq_n_u32(a, b, imm); }
617template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_saturate_bottom(uint16x8_t a, uint32x4_t b, mve_pred16_t p) { return vqshrnbq_m_n_u32(a, b, imm, p); }
618template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_saturate_top(uint16x8_t a, uint32x4_t b) { return vqshrntq_n_u32(a, b, imm); }
619template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_saturate_top(uint16x8_t a, uint32x4_t b, mve_pred16_t p) { return vqshrntq_m_n_u32(a, b, imm, p); }
620template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_round_bottom(uint16x8_t a, uint32x4_t b) { return vrshrnbq_n_u32(a, b, imm); }
621template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_round_bottom(uint16x8_t a, uint32x4_t b, mve_pred16_t p) { return vrshrnbq_m_n_u32(a, b, imm, p); }
622template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_round_top(uint16x8_t a, uint32x4_t b) { return vrshrntq_n_u32(a, b, imm); }
623template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_round_top(uint16x8_t a, uint32x4_t b, mve_pred16_t p) { return vrshrntq_m_n_u32(a, b, imm, p); }
624template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_bottom(uint16x8_t a, uint32x4_t b) { return vshrnbq_n_u32(a, b, imm); }
625template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_bottom(uint16x8_t a, uint32x4_t b, mve_pred16_t p) { return vshrnbq_m_n_u32(a, b, imm, p); }
626template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_top_signed(uint16x8_t a, uint32x4_t b) { return vshrntq_n_u32(a, b, imm); }
627template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_narrow_top_signed(uint16x8_t a, uint32x4_t b, mve_pred16_t p) { return vshrntq_m_n_u32(a, b, imm, p); }
628[[gnu::always_inline]] nce uint16x8_t move_narrow_bottom(uint16x8_t a, uint32x4_t b) { return vmovnbq_u32(a, b); }
629[[gnu::always_inline]] nce uint16x8_t move_narrow_bottom(uint16x8_t a, uint32x4_t b, mve_pred16_t p) { return vmovnbq_m_u32(a, b, p); }
630[[gnu::always_inline]] nce uint16x8_t move_narrow_top(uint16x8_t a, uint32x4_t b) { return vmovntq_u32(a, b); }
631[[gnu::always_inline]] nce uint16x8_t move_narrow_top(uint16x8_t a, uint32x4_t b, mve_pred16_t p) { return vmovntq_m_u32(a, b, p); }
632[[gnu::always_inline]] nce uint16x8_t move_narrow_saturate_bottom(uint16x8_t a, uint32x4_t b) { return vqmovnbq_u32(a, b); }
633[[gnu::always_inline]] nce uint16x8_t move_narrow_saturate_bottom(uint16x8_t a, uint32x4_t b, mve_pred16_t p) { return vqmovnbq_m_u32(a, b, p); }
634[[gnu::always_inline]] nce uint16x8_t move_narrow_saturate_top(uint16x8_t a, uint32x4_t b) { return vqmovntq_u32(a, b); }
635[[gnu::always_inline]] nce uint16x8_t move_narrow_saturate_top(uint16x8_t a, uint32x4_t b, mve_pred16_t p) { return vqmovntq_m_u32(a, b, p); }
636[[gnu::always_inline]] nce uint32_t reduce_add(uint16x8_t a, mve_pred16_t p) { return vaddvq_p_u16(a, p); }
637[[gnu::always_inline]] nce uint16x8_t count_leading_zero_bits(uint16x8_t a, mve_pred16_t p) { return vclzq_x_u16(a, p); }
638[[gnu::always_inline]] nce uint16x8_t bitwise_not(uint16x8_t a, mve_pred16_t p) { return vmvnq_x_u16(a, p); }
639template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right_round(uint16x8_t a, mve_pred16_t p) { return vrshrq_x_n_u16(a, imm, p); }
640template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_right(uint16x8_t a, mve_pred16_t p) { return vshrq_x_n_u16(a, imm, p); }
641template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left(uint16x8_t a, mve_pred16_t p) { return vshlq_x_n_u16(a, imm, p); }
642[[gnu::always_inline]] nce uint32x4_t move_long_top(uint16x8_t a, mve_pred16_t p) { return vmovltq_x_u16(a, p); }
643[[gnu::always_inline]] nce uint16x8_t add_halve(uint16x8_t a, uint16_t b) { return vhaddq_n_u16(a, b); }
644[[gnu::always_inline]] nce uint16x8_t add_halve(uint16x8_t a, uint16_t b, mve_pred16_t p) { return vhaddq_x_n_u16(a, b, p); }
645[[gnu::always_inline]] nce uint16x8_t add_saturate(uint16x8_t a, uint16_t b) { return vqaddq_n_u16(a, b); }
646[[gnu::always_inline]] nce uint16x8_t multiply(uint16x8_t a, uint16_t b, mve_pred16_t p) { return vmulq_x_n_u16(a, b, p); }
647[[gnu::always_inline]] nce uint16x8_t subtract(uint16x8_t a, uint16_t b) { return vsubq_n_u16(a, b); }
648[[gnu::always_inline]] nce uint16x8_t subtract(uint16x8_t a, uint16_t b, mve_pred16_t p) { return vsubq_x_n_u16(a, b, p); }
649[[gnu::always_inline]] nce uint16x8_t subtract_halve(uint16x8_t a, uint16_t b) { return vhsubq_n_u16(a, b); }
650[[gnu::always_inline]] nce uint16x8_t subtract_halve(uint16x8_t a, uint16_t b, mve_pred16_t p) { return vhsubq_x_n_u16(a, b, p); }
651[[gnu::always_inline]] nce uint16x8_t subtract_saturate(uint16x8_t a, uint16_t b) { return vqsubq_n_u16(a, b); }
652[[gnu::always_inline]] nce uint16x8_t bitwise_clear(uint16x8_t a, const uint16_t imm) { return vbicq_n_u16(a, imm); }
653[[gnu::always_inline]] nce uint16x8_t bitwise_clear(uint16x8_t a, const uint16_t imm, mve_pred16_t p) { return vbicq_m_n_u16(a, imm, p); }
654[[gnu::always_inline]] nce uint16x8_t bitwise_not(uint16x8_t inactive, const uint16_t imm, mve_pred16_t p) { return vmvnq_m_n_u16(inactive, imm, p); }
655[[gnu::always_inline]] nce uint16x8_t bitwise_or(uint16x8_t a, const uint16_t imm) { return vorrq_n_u16(a, imm); }
656[[gnu::always_inline]] nce uint16x8_t bitwise_or(uint16x8_t a, const uint16_t imm, mve_pred16_t p) { return vorrq_m_n_u16(a, imm, p); }
657[[gnu::always_inline]] nce uint16x8_t bit_reverse_shift_right(uint16x8_t a, int32_t b) { return vbrsrq_n_u16(a, b); }
658[[gnu::always_inline]] nce uint16x8_t bit_reverse_shift_right(uint16x8_t a, int32_t b, mve_pred16_t p) { return vbrsrq_x_n_u16(a, b, p); }
659[[gnu::always_inline]] nce uint16x8_t shift_left_round_saturate(uint16x8_t a, int32_t b) { return vqrshlq_n_u16(a, b); }
660[[gnu::always_inline]] nce uint16x8_t shift_left_round_saturate(uint16x8_t a, int32_t b, mve_pred16_t p) { return vqrshlq_m_n_u16(a, b, p); }
661[[gnu::always_inline]] nce uint16x8_t shift_left_saturate(uint16x8_t a, int32_t b) { return vqshlq_r_u16(a, b); }
662[[gnu::always_inline]] nce uint16x8_t shift_left_saturate(uint16x8_t a, int32_t b, mve_pred16_t p) { return vqshlq_m_r_u16(a, b, p); }
663[[gnu::always_inline]] nce uint16x8_t shift_left(uint16x8_t a, int32_t b) { return vshlq_r_u16(a, b); }
664[[gnu::always_inline]] nce uint16x8_t shift_left(uint16x8_t a, int32_t b, mve_pred16_t p) { return vshlq_m_r_u16(a, b, p); }
665[[gnu::always_inline]] nce int16x8_t subtract(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vsubq_m_s16(inactive, a, b, p); }
666[[gnu::always_inline]] nce int16x8_t subtract_halve(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vhsubq_m_s16(inactive, a, b, p); }
667[[gnu::always_inline]] nce int16x8_t subtract_saturate(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqsubq_m_s16(inactive, a, b, p); }
668[[gnu::always_inline]] nce int16x8_t bitwise_clear(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vbicq_m_s16(inactive, a, b, p); }
669[[gnu::always_inline]] nce int16x8_t bitwise_and(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vandq_m_s16(inactive, a, b, p); }
670[[gnu::always_inline]] nce int16x8_t bitwise_xor(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return veorq_m_s16(inactive, a, b, p); }
671[[gnu::always_inline]] nce int16x8_t bitwise_or_not(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vornq_m_s16(inactive, a, b, p); }
672[[gnu::always_inline]] nce int16x8_t bitwise_or(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vorrq_m_s16(inactive, a, b, p); }
673[[gnu::always_inline]] nce int16x8_t complex_add_rotate_270(int16x8_t a, int16x8_t b) { return vcaddq_rot270_s16(a, b); }
674[[gnu::always_inline]] nce int16x8_t complex_add_rotate_90(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vcaddq_rot90_m_s16(inactive, a, b, p); }
675[[gnu::always_inline]] nce int16x8_t complex_add_rotate_270(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vcaddq_rot270_m_s16(inactive, a, b, p); }
676[[gnu::always_inline]] nce int16x8_t complex_add_rotate_270_halve(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vhcaddq_rot270_m_s16(inactive, a, b, p); }
677[[gnu::always_inline]] nce int16x8_t subtract(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vsubq_x_s16(a, b, p); }
678[[gnu::always_inline]] nce int16x8_t subtract_halve(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vhsubq_x_s16(a, b, p); }
679[[gnu::always_inline]] nce int16x8_t count_leading_sign_bits(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vclsq_m_s16(inactive, a, p); }
680[[gnu::always_inline]] nce int16x8_t count_leading_zero_bits(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vclzq_m_s16(inactive, a, p); }
681[[gnu::always_inline]] nce int16x8_t bitwise_clear(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vbicq_x_s16(a, b, p); }
682[[gnu::always_inline]] nce int16x8_t negate(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vnegq_m_s16(inactive, a, p); }
683[[gnu::always_inline]] nce int16x8_t negate_saturate(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vqnegq_m_s16(inactive, a, p); }
684[[gnu::always_inline]] nce int16x8_t bitwise_and(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vandq_x_s16(a, b, p); }
685[[gnu::always_inline]] nce int16x8_t bitwise_xor(int16x8_t a, int16x8_t b, mve_pred16_t p) { return veorq_x_s16(a, b, p); }
686[[gnu::always_inline]] nce int16x8_t bitwise_not(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vmvnq_m_s16(inactive, a, p); }
687[[gnu::always_inline]] nce int16x8_t bitwise_or_not(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vornq_x_s16(a, b, p); }
688[[gnu::always_inline]] nce int16x8_t bitwise_or(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vorrq_x_s16(a, b, p); }
689[[gnu::always_inline]] nce int16x8_t complex_add_rotate_90(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vcaddq_rot90_x_s16(a, b, p); }
690[[gnu::always_inline]] nce int16x8_t complex_add_rotate_270(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vcaddq_rot270_x_s16(a, b, p); }
691[[gnu::always_inline]] nce int16x8_t complex_add_rotate_90_halve(int16x8_t a, int16x8_t b) { return vhcaddq_rot90_s16(a, b); }
692[[gnu::always_inline]] nce int16x8_t complex_add_rotate_90_halve(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vhcaddq_rot90_m_s16(inactive, a, b, p); }
693[[gnu::always_inline]] nce int16x8_t complex_add_rotate_90_halve(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vhcaddq_rot90_x_s16(a, b, p); }
694[[gnu::always_inline]] nce int16x8_t complex_add_rotate_270_halve(int16x8_t a, int16x8_t b) { return vhcaddq_rot270_s16(a, b); }
695[[gnu::always_inline]] nce int16x8_t complex_add_rotate_270_halve(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vhcaddq_rot270_x_s16(a, b, p); }
696[[gnu::always_inline]] nce int16x8_t subtract(int16x8_t inactive, int16x8_t a, int16_t b, mve_pred16_t p) { return vsubq_m_n_s16(inactive, a, b, p); }
697[[gnu::always_inline]] nce int16x8_t subtract_halve(int16x8_t inactive, int16x8_t a, int16_t b, mve_pred16_t p) { return vhsubq_m_n_s16(inactive, a, b, p); }
698[[gnu::always_inline]] nce int16x8_t subtract_saturate(int16x8_t inactive, int16x8_t a, int16_t b, mve_pred16_t p) { return vqsubq_m_n_s16(inactive, a, b, p); }
699[[gnu::always_inline]] nce int16x8_t complex_add_rotate_90(int16x8_t a, int16x8_t b) { return vcaddq_rot90_s16(a, b); }
700[[gnu::always_inline]] nce int16x8_t shift_left_round_saturate(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqrshlq_m_s16(inactive, a, b, p); }
701[[gnu::always_inline]] nce int16x8_t shift_left_saturate(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqshlq_m_s16(inactive, a, b, p); }
702template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_round(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vrshrq_m_n_s16(inactive, a, imm, p); }
703template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vshrq_m_n_s16(inactive, a, imm, p); }
704template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_insert(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vsriq_m_n_s16(a, b, imm, p); }
705template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left_saturate(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vqshlq_m_n_s16(inactive, a, imm, p); }
706[[gnu::always_inline]] nce int16x8_t bit_reverse_shift_right(int16x8_t inactive, int16x8_t a, int32_t b, mve_pred16_t p) { return vbrsrq_m_n_s16(inactive, a, b, p); }
707template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_round_saturate_bottom(int16x8_t a, int32x4_t b) { return vqrshrnbq_n_s32(a, b, imm); }
708template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_round_saturate_bottom(int16x8_t a, int32x4_t b, mve_pred16_t p) { return vqrshrnbq_m_n_s32(a, b, imm, p); }
709template <int imm>[[gnu::always_inline]] nce int16x8_t shight_right_narrow_round_saturate_top(int16x8_t a, int32x4_t b) { return vqrshrntq_n_s32(a, b, imm); }
710template <int imm>[[gnu::always_inline]] nce int16x8_t shight_right_narrow_round_saturate_top(int16x8_t a, int32x4_t b, mve_pred16_t p) { return vqrshrntq_m_n_s32(a, b, imm, p); }
711template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_saturate_bottom(int16x8_t a, int32x4_t b) { return vqshrnbq_n_s32(a, b, imm); }
712template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_saturate_bottom(int16x8_t a, int32x4_t b, mve_pred16_t p) { return vqshrnbq_m_n_s32(a, b, imm, p); }
713template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_saturate_top(int16x8_t a, int32x4_t b) { return vqshrntq_n_s32(a, b, imm); }
714template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_saturate_top(int16x8_t a, int32x4_t b, mve_pred16_t p) { return vqshrntq_m_n_s32(a, b, imm, p); }
715template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_round_bottom(int16x8_t a, int32x4_t b) { return vrshrnbq_n_s32(a, b, imm); }
716template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_round_bottom(int16x8_t a, int32x4_t b, mve_pred16_t p) { return vrshrnbq_m_n_s32(a, b, imm, p); }
717template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_round_top(int16x8_t a, int32x4_t b) { return vrshrntq_n_s32(a, b, imm); }
718template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_round_top(int16x8_t a, int32x4_t b, mve_pred16_t p) { return vrshrntq_m_n_s32(a, b, imm, p); }
719template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_bottom(int16x8_t a, int32x4_t b) { return vshrnbq_n_s32(a, b, imm); }
720template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_bottom(int16x8_t a, int32x4_t b, mve_pred16_t p) { return vshrnbq_m_n_s32(a, b, imm, p); }
721template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_top_signed(int16x8_t a, int32x4_t b) { return vshrntq_n_s32(a, b, imm); }
722template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_narrow_top_signed(int16x8_t a, int32x4_t b, mve_pred16_t p) { return vshrntq_m_n_s32(a, b, imm, p); }
723template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right_round(int16x8_t a, mve_pred16_t p) { return vrshrq_x_n_s16(a, imm, p); }
724template <int imm>[[gnu::always_inline]] nce int16x8_t shift_right(int16x8_t a, mve_pred16_t p) { return vshrq_x_n_s16(a, imm, p); }
725[[gnu::always_inline]] nce int16x8_t bit_reverse_shift_right(int16x8_t a, int32_t b) { return vbrsrq_n_s16(a, b); }
726[[gnu::always_inline]] nce int16x8_t bit_reverse_shift_right(int16x8_t a, int32_t b, mve_pred16_t p) { return vbrsrq_x_n_s16(a, b, p); }
727[[gnu::always_inline]] nce int16x8_t shift_left_round_saturate(int16x8_t a, int32_t b) { return vqrshlq_n_s16(a, b); }
728[[gnu::always_inline]] nce int16x8_t shift_left_round_saturate(int16x8_t a, int32_t b, mve_pred16_t p) { return vqrshlq_m_n_s16(a, b, p); }
729[[gnu::always_inline]] nce int16x8_t shift_left_saturate(int16x8_t a, int32_t b) { return vqshlq_r_s16(a, b); }
730[[gnu::always_inline]] nce int16x8_t shift_left_saturate(int16x8_t a, int32_t b, mve_pred16_t p) { return vqshlq_m_r_s16(a, b, p); }
731template <int imm>[[gnu::always_inline]] nce uint16x8_t shift_left_unsigned_saturate(int16x8_t a) { return vqshluq_n_s16(a, imm); }
732template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left_long_bottom(int16x8_t inactive, int8x16_t a, mve_pred16_t p) { return vshllbq_m_n_s8(inactive, a, imm, p); }
733template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left_long_top(int16x8_t inactive, int8x16_t a, mve_pred16_t p) { return vshlltq_m_n_s8(inactive, a, imm, p); }
734[[gnu::always_inline]] nce int16x8_t shift_left_round(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vrshlq_m_s16(inactive, a, b, p); }
735[[gnu::always_inline]] nce int16x8_t shift_left(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vshlq_m_s16(inactive, a, b, p); }
736[[gnu::always_inline]] nce int16x8_t shift_left_round(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vrshlq_x_s16(a, b, p); }
737[[gnu::always_inline]] nce int16x8_t shift_left(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vshlq_x_s16(a, b, p); }
738template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left_long_top(int16x8_t a, mve_pred16_t p) { return vshlltq_x_n_s16(a, imm, p); }
739[[gnu::always_inline]] nce int16x8_t shift_left_round(int16x8_t a, int32_t b) { return vrshlq_n_s16(a, b); }
740[[gnu::always_inline]] nce int16x8_t shift_left_round(int16x8_t a, int32_t b, mve_pred16_t p) { return vrshlq_m_n_s16(a, b, p); }
741template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left_carry(int16x8_t a, uint32_t *b) { return vshlcq_s16(a, b, imm); }
742template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left_carry(int16x8_t a, uint32_t *b, mve_pred16_t p) { return vshlcq_m_s16(a, b, imm, p); }
743template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left_long_bottom(int16x8_t a) { return vshllbq_n_s16(a, imm); }
744template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left_long_bottom(int16x8_t a, mve_pred16_t p) { return vshllbq_x_n_s16(a, imm, p); }
745template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left_long_top(int16x8_t a) { return vshlltq_n_s16(a, imm); }
746template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left(int16x8_t a) { return vshlq_n_s16(a, imm); }
747template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left_insert(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vsliq_m_n_s16(a, b, imm, p); }
748[[gnu::always_inline]] nce int32x4_t move_long_bottom(int16x8_t a) { return vmovlbq_s16(a); }
749[[gnu::always_inline]] nce int16x8_t move_long_bottom(int16x8_t inactive, int8x16_t a, mve_pred16_t p) { return vmovlbq_m_s8(inactive, a, p); }
750[[gnu::always_inline]] nce int32x4_t move_long_bottom(int16x8_t a, mve_pred16_t p) { return vmovlbq_x_s16(a, p); }
751[[gnu::always_inline]] nce int32x4_t move_long_top(int16x8_t a) { return vmovltq_s16(a); }
752[[gnu::always_inline]] nce int16x8_t move_long_top(int16x8_t inactive, int8x16_t a, mve_pred16_t p) { return vmovltq_m_s8(inactive, a, p); }
753[[gnu::always_inline]] nce int16x8_t reverse_32bit(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vrev32q_m_s16(inactive, a, p); }
754[[gnu::always_inline]] nce int16x8_t reverse_64bit(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vrev64q_m_s16(inactive, a, p); }
755[[gnu::always_inline]] nce mve_pred16_t not_equal(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vcmpneq_m_s16(a, b, p); }
756[[gnu::always_inline]] nce mve_pred16_t greater_than_or_equal(int16x8_t a, int16x8_t b) { return vcmpgeq_s16(a, b); }
757[[gnu::always_inline]] nce mve_pred16_t greater_than_or_equal(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vcmpgeq_m_s16(a, b, p); }
758[[gnu::always_inline]] nce mve_pred16_t greater_than(int16x8_t a, int16x8_t b) { return vcmpgtq_s16(a, b); }
759[[gnu::always_inline]] nce mve_pred16_t greater_than(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vcmpgtq_m_s16(a, b, p); }
760[[gnu::always_inline]] nce int16x8_t reverse_64bit(int16x8_t a, mve_pred16_t p) { return vrev64q_x_s16(a, p); }
761[[gnu::always_inline]] nce int16x8_t uninitialized(int16x8_t t) { return vuninitializedq(t); }
762[[gnu::always_inline]] nce mve_pred16_t equal(int16x8_t a, int16x8_t b) { return vcmpeqq_s16(a, b); }
763[[gnu::always_inline]] nce mve_pred16_t equal(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vcmpeqq_m_s16(a, b, p); }
764[[gnu::always_inline]] nce mve_pred16_t not_equal(int16x8_t a, int16x8_t b) { return vcmpneq_s16(a, b); }
765[[gnu::always_inline]] nce mve_pred16_t less_than_or_equal(int16x8_t a, int16x8_t b) { return vcmpleq_s16(a, b); }
766[[gnu::always_inline]] nce mve_pred16_t less_than_or_equal(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vcmpleq_m_s16(a, b, p); }
767[[gnu::always_inline]] nce mve_pred16_t less_than(int16x8_t a, int16x8_t b) { return vcmpltq_s16(a, b); }
768[[gnu::always_inline]] nce int16x8_t min(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vminq_m_s16(inactive, a, b, p); }
769[[gnu::always_inline]] nce int16x8_t max(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmaxq_m_s16(inactive, a, b, p); }
770[[gnu::always_inline]] nce int16x8_t subtract_absolute(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vabdq_m_s16(inactive, a, b, p); }
771[[gnu::always_inline]] nce int16x8_t add(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vaddq_m_s16(inactive, a, b, p); }
772[[gnu::always_inline]] nce mve_pred16_t less_than(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vcmpltq_m_s16(a, b, p); }
773[[gnu::always_inline]] nce int16x8_t min(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vminq_x_s16(a, b, p); }
774[[gnu::always_inline]] nce int16x8_t max(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmaxq_x_s16(a, b, p); }
775[[gnu::always_inline]] nce int16x8_t subtract_absolute(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vabdq_x_s16(a, b, p); }
776[[gnu::always_inline]] nce int16x8_t abs(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vabsq_m_s16(inactive, a, p); }
777[[gnu::always_inline]] nce int16x8_t abs_saturate(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vqabsq_m_s16(inactive, a, p); }
778[[gnu::always_inline]] nce int16x8_t add(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vaddq_x_s16(a, b, p); }
779[[gnu::always_inline]] nce int16x8_t add(int16x8_t inactive, int16x8_t a, int16_t b, mve_pred16_t p) { return vaddq_m_n_s16(inactive, a, b, p); }
780[[gnu::always_inline]] nce int16x8_t reverse_32bit(int16x8_t a, mve_pred16_t p) { return vrev32q_x_s16(a, p); }
781[[gnu::always_inline]] nce int16x8_t abs(int16x8_t a, mve_pred16_t p) { return vabsq_x_s16(a, p); }
782[[gnu::always_inline]] nce int32_t reduce_add(int16x8_t a, mve_pred16_t p) { return vaddvq_p_s16(a, p); }
783[[gnu::always_inline]] nce int16x8_t duplicate(int16x8_t inactive, int16_t a, mve_pred16_t p) { return vdupq_m_n_s16(inactive, a, p); }
784[[gnu::always_inline]] nce mve_pred16_t equal(int16x8_t a, int16_t b) { return vcmpeqq_n_s16(a, b); }
785[[gnu::always_inline]] nce mve_pred16_t equal(int16x8_t a, int16_t b, mve_pred16_t p) { return vcmpeqq_m_n_s16(a, b, p); }
786[[gnu::always_inline]] nce mve_pred16_t not_equal(int16x8_t a, int16_t b) { return vcmpneq_n_s16(a, b); }
787[[gnu::always_inline]] nce mve_pred16_t not_equal(int16x8_t a, int16_t b, mve_pred16_t p) { return vcmpneq_m_n_s16(a, b, p); }
788[[gnu::always_inline]] nce mve_pred16_t greater_than_or_equal(int16x8_t a, int16_t b) { return vcmpgeq_n_s16(a, b); }
789[[gnu::always_inline]] nce mve_pred16_t greater_than_or_equal(int16x8_t a, int16_t b, mve_pred16_t p) { return vcmpgeq_m_n_s16(a, b, p); }
790[[gnu::always_inline]] nce mve_pred16_t greater_than(int16x8_t a, int16_t b) { return vcmpgtq_n_s16(a, b); }
791[[gnu::always_inline]] nce mve_pred16_t greater_than(int16x8_t a, int16_t b, mve_pred16_t p) { return vcmpgtq_m_n_s16(a, b, p); }
792[[gnu::always_inline]] nce mve_pred16_t less_than_or_equal(int16x8_t a, int16_t b) { return vcmpleq_n_s16(a, b); }
793[[gnu::always_inline]] nce mve_pred16_t less_than_or_equal(int16x8_t a, int16_t b, mve_pred16_t p) { return vcmpleq_m_n_s16(a, b, p); }
794[[gnu::always_inline]] nce mve_pred16_t less_than(int16x8_t a, int16_t b) { return vcmpltq_n_s16(a, b); }
795[[gnu::always_inline]] nce mve_pred16_t less_than(int16x8_t a, int16_t b, mve_pred16_t p) { return vcmpltq_m_n_s16(a, b, p); }
796[[gnu::always_inline]] nce int16x8_t add(int16x8_t a, int16_t b) { return vaddq_n_s16(a, b); }
797[[gnu::always_inline]] nce int16x8_t add(int16x8_t a, int16_t b, mve_pred16_t p) { return vaddq_x_n_s16(a, b, p); }
798[[gnu::always_inline]] nce int32_t reduce_add(int16x8_t a) { return vaddvq_s16(a); }
799[[gnu::always_inline]] nce int16x8_t multiply_long_bottom(int16x8_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vmullbq_int_m_s8(inactive, a, b, p); }
800[[gnu::always_inline]] nce int16x8_t multiply_long_top(int16x8_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p) { return vmulltq_int_m_s8(inactive, a, b, p); }
801[[gnu::always_inline]] nce int16x8_t add_halve(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vhaddq_m_s16(inactive, a, b, p); }
802[[gnu::always_inline]] nce int16x8_t add_halve_round(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vrhaddq_m_s16(inactive, a, b, p); }
803[[gnu::always_inline]] nce int16x8_t add_saturate(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqaddq_m_s16(inactive, a, b, p); }
804[[gnu::always_inline]] nce int16x8_t add_halve(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vhaddq_x_s16(a, b, p); }
805[[gnu::always_inline]] nce int16x8_t add_halve_round(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vrhaddq_x_s16(a, b, p); }
806[[gnu::always_inline]] nce int16x8_t add_halve(int16x8_t inactive, int16x8_t a, int16_t b, mve_pred16_t p) { return vhaddq_m_n_s16(inactive, a, b, p); }
807[[gnu::always_inline]] nce int16x8_t add_saturate(int16x8_t inactive, int16x8_t a, int16_t b, mve_pred16_t p) { return vqaddq_m_n_s16(inactive, a, b, p); }
808[[gnu::always_inline]] nce int16x8_t multiply_high(int16x8_t a, int16x8_t b) { return vmulhq_s16(a, b); }
809[[gnu::always_inline]] nce int16x8_t multiply_high(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmulhq_m_s16(inactive, a, b, p); }
810[[gnu::always_inline]] nce int16x8_t multiply_high(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmulhq_x_s16(a, b, p); }
811[[gnu::always_inline]] nce int32x4_t multiply_long_bottom(int16x8_t a, int16x8_t b) { return vmullbq_int_s16(a, b); }
812[[gnu::always_inline]] nce int32x4_t multiply_long_bottom(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmullbq_int_x_s16(a, b, p); }
813[[gnu::always_inline]] nce int32x4_t multiply_long_top(int16x8_t a, int16x8_t b) { return vmulltq_int_s16(a, b); }
814[[gnu::always_inline]] nce int16x8_t multiply(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmulq_m_s16(inactive, a, b, p); }
815[[gnu::always_inline]] nce int16x8_t multiply_round_high(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vrmulhq_m_s16(inactive, a, b, p); }
816[[gnu::always_inline]] nce int16x8_t multiply_add_double_saturate_high(int16x8_t inactive, int16x8_t a, int16x8_t b) { return vqdmladhq_s16(inactive, a, b); }
817[[gnu::always_inline]] nce int16x8_t multiply_add_double_saturate_high(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqdmladhq_m_s16(inactive, a, b, p); }
818[[gnu::always_inline]] nce int16x8_t multiply_add_double_saturate_high_exchange_pairs(int16x8_t inactive, int16x8_t a, int16x8_t b) { return vqdmladhxq_s16(inactive, a, b); }
819[[gnu::always_inline]] nce int16x8_t multiply_add_double_saturate_high_exchange_pairs(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqdmladhxq_m_s16(inactive, a, b, p); }
820[[gnu::always_inline]] nce int16x8_t multiply_add_dual_double_round_saturate_high(int16x8_t inactive, int16x8_t a, int16x8_t b) { return vqrdmladhq_s16(inactive, a, b); }
821[[gnu::always_inline]] nce int16x8_t multiply_add_dual_double_round_saturate_high(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqrdmladhq_m_s16(inactive, a, b, p); }
822[[gnu::always_inline]] nce int16x8_t mulitply_add_dual_double_round_saturate_high_exchange_pairs(int16x8_t inactive, int16x8_t a, int16x8_t b) { return vqrdmladhxq_s16(inactive, a, b); }
823[[gnu::always_inline]] nce int16x8_t mulitply_add_dual_double_round_saturate_high_exchange_pairs(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqrdmladhxq_m_s16(inactive, a, b, p); }
824[[gnu::always_inline]] nce int32x4_t multiply_long_top(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmulltq_int_x_s16(a, b, p); }
825[[gnu::always_inline]] nce int16x8_t multiply(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmulq_x_s16(a, b, p); }
826[[gnu::always_inline]] nce int16x8_t multiply_round_high(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vrmulhq_x_s16(a, b, p); }
827[[gnu::always_inline]] nce int16x8_t multiply(int16x8_t inactive, int16x8_t a, int16_t b, mve_pred16_t p) { return vmulq_m_n_s16(inactive, a, b, p); }
828[[gnu::always_inline]] nce int16x8_t multiply_round_high(int16x8_t a, int16x8_t b) { return vrmulhq_s16(a, b); }
829[[gnu::always_inline]] nce int16x8_t multiply_subtract_dual_double_saturate_high(int16x8_t inactive, int16x8_t a, int16x8_t b) { return vqdmlsdhq_s16(inactive, a, b); }
830[[gnu::always_inline]] nce int16x8_t multiply_subtract_dual_double_saturate_high(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqdmlsdhq_m_s16(inactive, a, b, p); }
831[[gnu::always_inline]] nce int16x8_t multiply_subtract_dual_double_saturate_high_exchange_pairs(int16x8_t inactive, int16x8_t a, int16x8_t b) { return vqdmlsdhxq_s16(inactive, a, b); }
832[[gnu::always_inline]] nce int16x8_t multiply_subtract_dual_double_saturate_high_exchange_pairs(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqdmlsdhxq_m_s16(inactive, a, b, p); }
833[[gnu::always_inline]] nce int16x8_t multiply_subtract_dual_double_round_saturate_high(int16x8_t inactive, int16x8_t a, int16x8_t b) { return vqrdmlsdhq_s16(inactive, a, b); }
834[[gnu::always_inline]] nce int16x8_t multiply_subtract_dual_double_round_saturate_high(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqrdmlsdhq_m_s16(inactive, a, b, p); }
835[[gnu::always_inline]] nce int16x8_t multiply_subtract_dual_double_round_saturate_high_exchange_pairs(int16x8_t inactive, int16x8_t a, int16x8_t b) { return vqrdmlsdhxq_s16(inactive, a, b); }
836[[gnu::always_inline]] nce int16x8_t multiply_subtract_dual_double_round_saturate_high_exchange_pairs(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqrdmlsdhxq_m_s16(inactive, a, b, p); }
837[[gnu::always_inline]] nce int16x8_t multiply_double_saturate_high(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqdmulhq_m_s16(inactive, a, b, p); }
838[[gnu::always_inline]] nce int16x8_t multiply_double_round_saturate_high(int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqrdmulhq_m_s16(inactive, a, b, p); }
839[[gnu::always_inline]] nce int16x8_t multiply_add_double_round_saturate_high_scalar(int16x8_t m1, int16x8_t m2, int16_t add) { return vqrdmlashq_n_s16(m1, m2, add); }
840[[gnu::always_inline]] nce int16x8_t multiply_add_double_round_saturate_high_scalar(int16x8_t m1, int16x8_t m2, int16_t add, mve_pred16_t p) { return vqrdmlashq_m_n_s16(m1, m2, add, p); }
841[[gnu::always_inline]] nce int16x8_t multiply_double_saturate_high(int16x8_t inactive, int16x8_t a, int16_t b, mve_pred16_t p) { return vqdmulhq_m_n_s16(inactive, a, b, p); }
842[[gnu::always_inline]] nce int16x8_t multiply_double_round_saturate_high(int16x8_t inactive, int16x8_t a, int16_t b, mve_pred16_t p) { return vqrdmulhq_m_n_s16(inactive, a, b, p); }
843[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_bottom(int16x8_t a, int16x8_t b) { return vqdmullbq_s16(a, b); }
844[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_top(int16x8_t a, int16x8_t b) { return vqdmulltq_s16(a, b); }
845[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add(int16x8_t m1, int16x8_t m2) { return vmladavq_s16(m1, m2); }
846[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add(int16x8_t m1, int16x8_t m2, mve_pred16_t p) { return vmladavq_p_s16(m1, m2, p); }
847[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_exchange_pairs(int16x8_t m1, int16x8_t m2) { return vmladavxq_s16(m1, m2); }
848[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_exchange_pairs(int16x8_t m1, int16x8_t m2, mve_pred16_t p) { return vmladavxq_p_s16(m1, m2, p); }
849[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add(int16x8_t m1, int16x8_t m2) { return vmlaldavq_s16(m1, m2); }
850[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add(int16x8_t m1, int16x8_t m2, mve_pred16_t p) { return vmlaldavq_p_s16(m1, m2, p); }
851[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add_exchange_pairs(int16x8_t m1, int16x8_t m2) { return vmlaldavxq_s16(m1, m2); }
852[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add_exchange_pairs(int16x8_t m1, int16x8_t m2, mve_pred16_t p) { return vmlaldavxq_p_s16(m1, m2, p); }
853[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmlsdavq_p_s16(a, b, p); }
854[[gnu::always_inline]] nce int16x8_t multiply_add_double_saturate_high(int16x8_t add, int16x8_t m1, int16_t m2) { return vqdmlahq_n_s16(add, m1, m2); }
855[[gnu::always_inline]] nce int16x8_t multiply_add_double_saturate_high(int16x8_t add, int16x8_t m1, int16_t m2, mve_pred16_t p) { return vqdmlahq_m_n_s16(add, m1, m2, p); }
856[[gnu::always_inline]] nce int16x8_t multiply_add_double_round_saturate_high(int16x8_t add, int16x8_t m1, int16_t m2) { return vqrdmlahq_n_s16(add, m1, m2); }
857[[gnu::always_inline]] nce int16x8_t multiply_add_double_round_saturate_high(int16x8_t add, int16x8_t m1, int16_t m2, mve_pred16_t p) { return vqrdmlahq_m_n_s16(add, m1, m2, p); }
858[[gnu::always_inline]] nce int16x8_t multiply_subtract_double_saturate_high(int16x8_t m1, int16x8_t m2, int16_t add) { return vqdmlashq_n_s16(m1, m2, add); }
859[[gnu::always_inline]] nce int16x8_t multiply_subtract_double_saturate_high(int16x8_t m1, int16x8_t m2, int16_t add, mve_pred16_t p) { return vqdmlashq_m_n_s16(m1, m2, add, p); }
860[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_exchange_pairs(int16x8_t a, int16x8_t b) { return vmlsdavxq_s16(a, b); }
861[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_exchange_pairs(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmlsdavxq_p_s16(a, b, p); }
862[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add(int16x8_t a, int16x8_t b) { return vmlsldavq_s16(a, b); }
863[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmlsldavq_p_s16(a, b, p); }
864[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add_exchange_pairs(int16x8_t a, int16x8_t b) { return vmlsldavxq_s16(a, b); }
865[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add_exchange_pairs(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmlsldavxq_p_s16(a, b, p); }
866[[gnu::always_inline]] nce int16x8_t multiply_add(int16x8_t add, int16x8_t m1, int16_t m2) { return vmlaq_n_s16(add, m1, m2); }
867[[gnu::always_inline]] nce int16x8_t multiply_add(int16x8_t add, int16x8_t m1, int16_t m2, mve_pred16_t p) { return vmlaq_m_n_s16(add, m1, m2, p); }
868[[gnu::always_inline]] nce int16x8_t multiply_add_scalar(int16x8_t m1, int16x8_t m2, int16_t add) { return vmlasq_n_s16(m1, m2, add); }
869[[gnu::always_inline]] nce int16x8_t multiply_add_scalar(int16x8_t m1, int16x8_t m2, int16_t add, mve_pred16_t p) { return vmlasq_m_n_s16(m1, m2, add, p); }
870[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add(int16x8_t a, int16x8_t b) { return vmlsdavq_s16(a, b); }
871template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left(int16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vshlq_m_n_s16(inactive, a, imm, p); }
872[[gnu::always_inline]] nce int16x8_t predicate_select(int16x8_t a, int16x8_t b, mve_pred16_t p) { return vpselq_s16(a, b, p); }
873[[gnu::always_inline]] nce int16x8_t move_narrow_bottom(int16x8_t a, int32x4_t b) { return vmovnbq_s32(a, b); }
874[[gnu::always_inline]] nce int16x8_t move_narrow_bottom(int16x8_t a, int32x4_t b, mve_pred16_t p) { return vmovnbq_m_s32(a, b, p); }
875[[gnu::always_inline]] nce int16x8_t move_narrow_top(int16x8_t a, int32x4_t b) { return vmovntq_s32(a, b); }
876[[gnu::always_inline]] nce int16x8_t move_narrow_top(int16x8_t a, int32x4_t b, mve_pred16_t p) { return vmovntq_m_s32(a, b, p); }
877[[gnu::always_inline]] nce int16x8_t move_narrow_saturate_bottom(int16x8_t a, int32x4_t b) { return vqmovnbq_s32(a, b); }
878[[gnu::always_inline]] nce int16x8_t move_narrow_saturate_bottom(int16x8_t a, int32x4_t b, mve_pred16_t p) { return vqmovnbq_m_s32(a, b, p); }
879[[gnu::always_inline]] nce int16x8_t move_narrow_saturate_top(int16x8_t a, int32x4_t b) { return vqmovntq_s32(a, b); }
880[[gnu::always_inline]] nce int16x8_t move_narrow_saturate_top(int16x8_t a, int32x4_t b, mve_pred16_t p) { return vqmovntq_m_s32(a, b, p); }
881[[gnu::always_inline]] nce int16x8_t count_leading_sign_bits(int16x8_t a, mve_pred16_t p) { return vclsq_x_s16(a, p); }
882[[gnu::always_inline]] nce int16x8_t count_leading_zero_bits(int16x8_t a, mve_pred16_t p) { return vclzq_x_s16(a, p); }
883[[gnu::always_inline]] nce int16x8_t negate(int16x8_t a, mve_pred16_t p) { return vnegq_x_s16(a, p); }
884[[gnu::always_inline]] nce int16x8_t bitwise_not(int16x8_t a, mve_pred16_t p) { return vmvnq_x_s16(a, p); }
885template <int imm>[[gnu::always_inline]] nce int16x8_t shift_left(int16x8_t a, mve_pred16_t p) { return vshlq_x_n_s16(a, imm, p); }
886[[gnu::always_inline]] nce int32x4_t move_long_top(int16x8_t a, mve_pred16_t p) { return vmovltq_x_s16(a, p); }
887[[gnu::always_inline]] nce int16x8_t add_halve(int16x8_t a, int16_t b) { return vhaddq_n_s16(a, b); }
888[[gnu::always_inline]] nce int16x8_t add_halve(int16x8_t a, int16_t b, mve_pred16_t p) { return vhaddq_x_n_s16(a, b, p); }
889[[gnu::always_inline]] nce int16x8_t add_saturate(int16x8_t a, int16_t b) { return vqaddq_n_s16(a, b); }
890[[gnu::always_inline]] nce int16x8_t multiply(int16x8_t a, int16_t b, mve_pred16_t p) { return vmulq_x_n_s16(a, b, p); }
891[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_bottom(int16x8_t a, int16_t b) { return vqdmullbq_n_s16(a, b); }
892[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_top(int16x8_t a, int16_t b) { return vqdmulltq_n_s16(a, b); }
893[[gnu::always_inline]] nce int16x8_t subtract(int16x8_t a, int16_t b) { return vsubq_n_s16(a, b); }
894[[gnu::always_inline]] nce int16x8_t subtract(int16x8_t a, int16_t b, mve_pred16_t p) { return vsubq_x_n_s16(a, b, p); }
895[[gnu::always_inline]] nce int16x8_t subtract_halve(int16x8_t a, int16_t b) { return vhsubq_n_s16(a, b); }
896[[gnu::always_inline]] nce int16x8_t subtract_halve(int16x8_t a, int16_t b, mve_pred16_t p) { return vhsubq_x_n_s16(a, b, p); }
897[[gnu::always_inline]] nce int16x8_t subtract_saturate(int16x8_t a, int16_t b) { return vqsubq_n_s16(a, b); }
898[[gnu::always_inline]] nce int16x8_t bitwise_clear(int16x8_t a, const int16_t imm) { return vbicq_n_s16(a, imm); }
899[[gnu::always_inline]] nce int16x8_t bitwise_clear(int16x8_t a, const int16_t imm, mve_pred16_t p) { return vbicq_m_n_s16(a, imm, p); }
900[[gnu::always_inline]] nce int16x8_t bitwise_not(int16x8_t inactive, const int16_t imm, mve_pred16_t p) { return vmvnq_m_n_s16(inactive, imm, p); }
901[[gnu::always_inline]] nce int16x8_t bitwise_or(int16x8_t a, const int16_t imm) { return vorrq_n_s16(a, imm); }
902[[gnu::always_inline]] nce int16x8_t bitwise_or(int16x8_t a, const int16_t imm, mve_pred16_t p) { return vorrq_m_n_s16(a, imm, p); }
903[[gnu::always_inline]] nce int16x8_t shift_left(int16x8_t a, int32_t b) { return vshlq_r_s16(a, b); }
904[[gnu::always_inline]] nce int16x8_t shift_left(int16x8_t a, int32_t b, mve_pred16_t p) { return vshlq_m_r_s16(a, b, p); }
905[[gnu::always_inline]] nce int32x4_t reverse_64bit(int32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vrev64q_m_s32(inactive, a, p); }
906[[gnu::always_inline]] nce mve_pred16_t not_equal(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vcmpneq_m_s32(a, b, p); }
907[[gnu::always_inline]] nce mve_pred16_t greater_than_or_equal(int32x4_t a, int32x4_t b) { return vcmpgeq_s32(a, b); }
908[[gnu::always_inline]] nce mve_pred16_t greater_than_or_equal(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vcmpgeq_m_s32(a, b, p); }
909[[gnu::always_inline]] nce mve_pred16_t greater_than(int32x4_t a, int32x4_t b) { return vcmpgtq_s32(a, b); }
910[[gnu::always_inline]] nce mve_pred16_t greater_than(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vcmpgtq_m_s32(a, b, p); }
911[[gnu::always_inline]] nce int32x4_t reverse_64bit(int32x4_t a, mve_pred16_t p) { return vrev64q_x_s32(a, p); }
912[[gnu::always_inline]] nce int32x4_t duplicate(int32x4_t inactive, int32_t a, mve_pred16_t p) { return vdupq_m_n_s32(inactive, a, p); }
913[[gnu::always_inline]] nce int32x4_t uninitialized(int32x4_t t) { return vuninitializedq(t); }
914[[gnu::always_inline]] nce mve_pred16_t equal(int32x4_t a, int32x4_t b) { return vcmpeqq_s32(a, b); }
915[[gnu::always_inline]] nce mve_pred16_t equal(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vcmpeqq_m_s32(a, b, p); }
916[[gnu::always_inline]] nce mve_pred16_t not_equal(int32x4_t a, int32x4_t b) { return vcmpneq_s32(a, b); }
917[[gnu::always_inline]] nce mve_pred16_t less_than_or_equal(int32x4_t a, int32x4_t b) { return vcmpleq_s32(a, b); }
918[[gnu::always_inline]] nce mve_pred16_t less_than_or_equal(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vcmpleq_m_s32(a, b, p); }
919[[gnu::always_inline]] nce mve_pred16_t less_than(int32x4_t a, int32x4_t b) { return vcmpltq_s32(a, b); }
920[[gnu::always_inline]] nce int32x4_t min(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vminq_m_s32(inactive, a, b, p); }
921[[gnu::always_inline]] nce int32x4_t max(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmaxq_m_s32(inactive, a, b, p); }
922[[gnu::always_inline]] nce int32x4_t subtract_absolute(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vabdq_m_s32(inactive, a, b, p); }
923[[gnu::always_inline]] nce int32x4_t add(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vaddq_m_s32(inactive, a, b, p); }
924[[gnu::always_inline]] nce int32x4_t add_carry_initialized(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry_out, mve_pred16_t p) { return vadciq_m_s32(inactive, a, b, carry_out, p); }
925[[gnu::always_inline]] nce int32x4_t add_carry(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred16_t p) { return vadcq_m_s32(inactive, a, b, carry, p); }
926[[gnu::always_inline]] nce mve_pred16_t less_than(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vcmpltq_m_s32(a, b, p); }
927[[gnu::always_inline]] nce int32x4_t min(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vminq_x_s32(a, b, p); }
928[[gnu::always_inline]] nce int32x4_t max(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmaxq_x_s32(a, b, p); }
929[[gnu::always_inline]] nce int32x4_t subtract_absolute(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vabdq_x_s32(a, b, p); }
930[[gnu::always_inline]] nce int32x4_t abs(int32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vabsq_m_s32(inactive, a, p); }
931[[gnu::always_inline]] nce int32x4_t abs_saturate(int32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vqabsq_m_s32(inactive, a, p); }
932[[gnu::always_inline]] nce int32x4_t add(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vaddq_x_s32(a, b, p); }
933[[gnu::always_inline]] nce int32x4_t add_carry_initialized(int32x4_t a, int32x4_t b, unsigned *carry_out) { return vadciq_s32(a, b, carry_out); }
934[[gnu::always_inline]] nce int32x4_t add_carry(int32x4_t a, int32x4_t b, unsigned *carry) { return vadcq_s32(a, b, carry); }
935[[gnu::always_inline]] nce int32x4_t add(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { return vaddq_m_n_s32(inactive, a, b, p); }
936[[gnu::always_inline]] nce int32x4_t abs(int32x4_t a, mve_pred16_t p) { return vabsq_x_s32(a, p); }
937[[gnu::always_inline]] nce int64_t reduce_add_long(int32x4_t a, mve_pred16_t p) { return vaddlvq_p_s32(a, p); }
938[[gnu::always_inline]] nce int32_t reduce_add(int32x4_t a, mve_pred16_t p) { return vaddvq_p_s32(a, p); }
939[[gnu::always_inline]] nce mve_pred16_t equal(int32x4_t a, int32_t b) { return vcmpeqq_n_s32(a, b); }
940[[gnu::always_inline]] nce mve_pred16_t equal(int32x4_t a, int32_t b, mve_pred16_t p) { return vcmpeqq_m_n_s32(a, b, p); }
941[[gnu::always_inline]] nce mve_pred16_t not_equal(int32x4_t a, int32_t b) { return vcmpneq_n_s32(a, b); }
942[[gnu::always_inline]] nce mve_pred16_t not_equal(int32x4_t a, int32_t b, mve_pred16_t p) { return vcmpneq_m_n_s32(a, b, p); }
943[[gnu::always_inline]] nce mve_pred16_t greater_than_or_equal(int32x4_t a, int32_t b) { return vcmpgeq_n_s32(a, b); }
944[[gnu::always_inline]] nce mve_pred16_t greater_than_or_equal(int32x4_t a, int32_t b, mve_pred16_t p) { return vcmpgeq_m_n_s32(a, b, p); }
945[[gnu::always_inline]] nce mve_pred16_t greater_than(int32x4_t a, int32_t b) { return vcmpgtq_n_s32(a, b); }
946[[gnu::always_inline]] nce mve_pred16_t greater_than(int32x4_t a, int32_t b, mve_pred16_t p) { return vcmpgtq_m_n_s32(a, b, p); }
947[[gnu::always_inline]] nce mve_pred16_t less_than_or_equal(int32x4_t a, int32_t b) { return vcmpleq_n_s32(a, b); }
948[[gnu::always_inline]] nce mve_pred16_t less_than_or_equal(int32x4_t a, int32_t b, mve_pred16_t p) { return vcmpleq_m_n_s32(a, b, p); }
949[[gnu::always_inline]] nce mve_pred16_t less_than(int32x4_t a, int32_t b) { return vcmpltq_n_s32(a, b); }
950[[gnu::always_inline]] nce mve_pred16_t less_than(int32x4_t a, int32_t b, mve_pred16_t p) { return vcmpltq_m_n_s32(a, b, p); }
951[[gnu::always_inline]] nce int32x4_t add(int32x4_t a, int32_t b) { return vaddq_n_s32(a, b); }
952[[gnu::always_inline]] nce int32x4_t add(int32x4_t a, int32_t b, mve_pred16_t p) { return vaddq_x_n_s32(a, b, p); }
953[[gnu::always_inline]] nce int64_t reduce_add_long(int32x4_t a) { return vaddlvq_s32(a); }
954[[gnu::always_inline]] nce int32_t reduce_add(int32x4_t a) { return vaddvq_s32(a); }
955[[gnu::always_inline]] nce int32x4_t multiply_long_bottom(int32x4_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmullbq_int_m_s16(inactive, a, b, p); }
956[[gnu::always_inline]] nce int32x4_t multiply_long_top(int32x4_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vmulltq_int_m_s16(inactive, a, b, p); }
957[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_bottom(int32x4_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqdmullbq_m_s16(inactive, a, b, p); }
958[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_top(int32x4_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p) { return vqdmulltq_m_s16(inactive, a, b, p); }
959[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_bottom(int32x4_t inactive, int16x8_t a, int16_t b, mve_pred16_t p) { return vqdmullbq_m_n_s16(inactive, a, b, p); }
960[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_long_top(int32x4_t inactive, int16x8_t a, int16_t b, mve_pred16_t p) { return vqdmulltq_m_n_s16(inactive, a, b, p); }
961[[gnu::always_inline]] nce int32x4_t add_halve(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vhaddq_m_s32(inactive, a, b, p); }
962[[gnu::always_inline]] nce int32x4_t add_halve_round(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vrhaddq_m_s32(inactive, a, b, p); }
963[[gnu::always_inline]] nce int32x4_t add_saturate(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqaddq_m_s32(inactive, a, b, p); }
964[[gnu::always_inline]] nce int32x4_t add_halve(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vhaddq_x_s32(a, b, p); }
965[[gnu::always_inline]] nce int32x4_t add_halve_round(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vrhaddq_x_s32(a, b, p); }
966[[gnu::always_inline]] nce int32x4_t add_halve(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { return vhaddq_m_n_s32(inactive, a, b, p); }
967[[gnu::always_inline]] nce int32x4_t add_saturate(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { return vqaddq_m_n_s32(inactive, a, b, p); }
968[[gnu::always_inline]] nce int32x4_t multiply_high(int32x4_t a, int32x4_t b) { return vmulhq_s32(a, b); }
969[[gnu::always_inline]] nce int32x4_t multiply_high(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmulhq_m_s32(inactive, a, b, p); }
970[[gnu::always_inline]] nce int32x4_t multiply_high(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmulhq_x_s32(a, b, p); }
971[[gnu::always_inline]] nce int64x2_t multiply_long_bottom(int32x4_t a, int32x4_t b) { return vmullbq_int_s32(a, b); }
972[[gnu::always_inline]] nce int64x2_t multiply_long_bottom(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmullbq_int_x_s32(a, b, p); }
973[[gnu::always_inline]] nce int64x2_t multiply_long_top(int32x4_t a, int32x4_t b) { return vmulltq_int_s32(a, b); }
974[[gnu::always_inline]] nce int32x4_t multiply(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmulq_m_s32(inactive, a, b, p); }
975[[gnu::always_inline]] nce int32x4_t multiply_round_high(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vrmulhq_m_s32(inactive, a, b, p); }
976[[gnu::always_inline]] nce int32x4_t multiply_add_double_saturate_high(int32x4_t inactive, int32x4_t a, int32x4_t b) { return vqdmladhq_s32(inactive, a, b); }
977[[gnu::always_inline]] nce int32x4_t multiply_add_double_saturate_high(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqdmladhq_m_s32(inactive, a, b, p); }
978[[gnu::always_inline]] nce int32x4_t multiply_add_double_saturate_high_exchange_pairs(int32x4_t inactive, int32x4_t a, int32x4_t b) { return vqdmladhxq_s32(inactive, a, b); }
979[[gnu::always_inline]] nce int32x4_t multiply_add_double_saturate_high_exchange_pairs(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqdmladhxq_m_s32(inactive, a, b, p); }
980[[gnu::always_inline]] nce int32x4_t multiply_add_dual_double_round_saturate_high(int32x4_t inactive, int32x4_t a, int32x4_t b) { return vqrdmladhq_s32(inactive, a, b); }
981[[gnu::always_inline]] nce int32x4_t multiply_add_dual_double_round_saturate_high(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqrdmladhq_m_s32(inactive, a, b, p); }
982[[gnu::always_inline]] nce int32x4_t mulitply_add_dual_double_round_saturate_high_exchange_pairs(int32x4_t inactive, int32x4_t a, int32x4_t b) { return vqrdmladhxq_s32(inactive, a, b); }
983[[gnu::always_inline]] nce int32x4_t mulitply_add_dual_double_round_saturate_high_exchange_pairs(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqrdmladhxq_m_s32(inactive, a, b, p); }
984[[gnu::always_inline]] nce int32x4_t multiply_subtract_dual_double_saturate_high(int32x4_t inactive, int32x4_t a, int32x4_t b) { return vqdmlsdhq_s32(inactive, a, b); }
985[[gnu::always_inline]] nce int32x4_t multiply_subtract_dual_double_saturate_high(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqdmlsdhq_m_s32(inactive, a, b, p); }
986[[gnu::always_inline]] nce int32x4_t multiply_subtract_dual_double_saturate_high_exchange_pairs(int32x4_t inactive, int32x4_t a, int32x4_t b) { return vqdmlsdhxq_s32(inactive, a, b); }
987[[gnu::always_inline]] nce int32x4_t multiply_subtract_dual_double_saturate_high_exchange_pairs(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqdmlsdhxq_m_s32(inactive, a, b, p); }
988[[gnu::always_inline]] nce int32x4_t multiply_subtract_dual_double_round_saturate_high(int32x4_t inactive, int32x4_t a, int32x4_t b) { return vqrdmlsdhq_s32(inactive, a, b); }
989[[gnu::always_inline]] nce int32x4_t multiply_subtract_dual_double_round_saturate_high(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqrdmlsdhq_m_s32(inactive, a, b, p); }
990[[gnu::always_inline]] nce int32x4_t multiply_subtract_dual_double_round_saturate_high_exchange_pairs(int32x4_t inactive, int32x4_t a, int32x4_t b) { return vqrdmlsdhxq_s32(inactive, a, b); }
991[[gnu::always_inline]] nce int32x4_t multiply_subtract_dual_double_round_saturate_high_exchange_pairs(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqrdmlsdhxq_m_s32(inactive, a, b, p); }
992[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_high(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqdmulhq_m_s32(inactive, a, b, p); }
993[[gnu::always_inline]] nce int32x4_t multiply_double_round_saturate_high(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqrdmulhq_m_s32(inactive, a, b, p); }
994[[gnu::always_inline]] nce int64x2_t multiply_long_top(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmulltq_int_x_s32(a, b, p); }
995[[gnu::always_inline]] nce int32x4_t multiply(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmulq_x_s32(a, b, p); }
996[[gnu::always_inline]] nce int32x4_t multiply_round_high(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vrmulhq_x_s32(a, b, p); }
997[[gnu::always_inline]] nce int32x4_t multiply_subtract_double_saturate_high(int32x4_t m1, int32x4_t m2, int32_t add, mve_pred16_t p) { return vqdmlashq_m_n_s32(m1, m2, add, p); }
998[[gnu::always_inline]] nce int32x4_t multiply_add_double_round_saturate_high_scalar(int32x4_t m1, int32x4_t m2, int32_t add) { return vqrdmlashq_n_s32(m1, m2, add); }
999[[gnu::always_inline]] nce int32x4_t multiply_add_double_round_saturate_high_scalar(int32x4_t m1, int32x4_t m2, int32_t add, mve_pred16_t p) { return vqrdmlashq_m_n_s32(m1, m2, add, p); }
1000[[gnu::always_inline]] nce int32x4_t multiply_double_saturate_high(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { return vqdmulhq_m_n_s32(inactive, a, b, p); }
1001[[gnu::always_inline]] nce int32x4_t multiply_double_round_saturate_high(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { return vqrdmulhq_m_n_s32(inactive, a, b, p); }
1002[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_bottom(int32x4_t a, int32x4_t b) { return vqdmullbq_s32(a, b); }
1003[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_top(int32x4_t a, int32x4_t b) { return vqdmulltq_s32(a, b); }
1004[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add(int32x4_t m1, int32x4_t m2) { return vmladavq_s32(m1, m2); }
1005[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add(int32x4_t m1, int32x4_t m2, mve_pred16_t p) { return vmladavq_p_s32(m1, m2, p); }
1006[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_exchange_pairs(int32x4_t m1, int32x4_t m2) { return vmladavxq_s32(m1, m2); }
1007[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_exchange_pairs(int32x4_t m1, int32x4_t m2, mve_pred16_t p) { return vmladavxq_p_s32(m1, m2, p); }
1008[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add(int32x4_t m1, int32x4_t m2) { return vmlaldavq_s32(m1, m2); }
1009[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add(int32x4_t m1, int32x4_t m2, mve_pred16_t p) { return vmlaldavq_p_s32(m1, m2, p); }
1010[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add_exchange_pairs(int32x4_t m1, int32x4_t m2) { return vmlaldavxq_s32(m1, m2); }
1011[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add_exchange_pairs(int32x4_t m1, int32x4_t m2, mve_pred16_t p) { return vmlaldavxq_p_s32(m1, m2, p); }
1012[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmlsdavq_p_s32(a, b, p); }
1013[[gnu::always_inline]] nce int32x4_t multiply_add(int32x4_t add, int32x4_t m1, int32_t m2) { return vmlaq_n_s32(add, m1, m2); }
1014[[gnu::always_inline]] nce int32x4_t multiply_add(int32x4_t add, int32x4_t m1, int32_t m2, mve_pred16_t p) { return vmlaq_m_n_s32(add, m1, m2, p); }
1015[[gnu::always_inline]] nce int32x4_t multiply_add_scalar(int32x4_t m1, int32x4_t m2, int32_t add) { return vmlasq_n_s32(m1, m2, add); }
1016[[gnu::always_inline]] nce int32x4_t multiply_add_scalar(int32x4_t m1, int32x4_t m2, int32_t add, mve_pred16_t p) { return vmlasq_m_n_s32(m1, m2, add, p); }
1017[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add(int32x4_t a, int32x4_t b) { return vmlsdavq_s32(a, b); }
1018[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_exchange_pairs(int32x4_t a, int32x4_t b) { return vmlsdavxq_s32(a, b); }
1019[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_exchange_pairs(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmlsdavxq_p_s32(a, b, p); }
1020[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add(int32x4_t a, int32x4_t b) { return vmlsldavq_s32(a, b); }
1021[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmlsldavq_p_s32(a, b, p); }
1022[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add_exchange_pairs(int32x4_t a, int32x4_t b) { return vmlsldavxq_s32(a, b); }
1023[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add_exchange_pairs(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmlsldavxq_p_s32(a, b, p); }
1024[[gnu::always_inline]] nce int64_t multiply_add_long_round_dual_reduce_add_high(int32x4_t a, int32x4_t b) { return vrmlaldavhq_s32(a, b); }
1025[[gnu::always_inline]] nce int64_t multiply_add_long_round_dual_reduce_add_high(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vrmlaldavhq_p_s32(a, b, p); }
1026[[gnu::always_inline]] nce int64_t multiply_add_long_round_dual_reduce_add_high_exchange_pairs(int32x4_t a, int32x4_t b) { return vrmlaldavhxq_s32(a, b); }
1027[[gnu::always_inline]] nce int64_t multiply_add_long_round_dual_reduce_add_high_exchange_pairs(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vrmlaldavhxq_p_s32(a, b, p); }
1028[[gnu::always_inline]] nce int64_t multiply_subtract_long_round_dual_reduce_add_high(int32x4_t a, int32x4_t b) { return vrmlsldavhq_s32(a, b); }
1029[[gnu::always_inline]] nce int32x4_t subtract_with_carry_initialized(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry_out, mve_pred16_t p) { return vsbciq_m_s32(inactive, a, b, carry_out, p); }
1030[[gnu::always_inline]] nce int64_t multiply_subtract_long_round_dual_reduce_add_high(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vrmlsldavhq_p_s32(a, b, p); }
1031[[gnu::always_inline]] nce int64_t multiply_subtract_long_round_dual_reduce_add_high_exchange_pairs(int32x4_t a, int32x4_t b) { return vrmlsldavhxq_s32(a, b); }
1032[[gnu::always_inline]] nce int32x4_t shift_left_round_saturate(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqrshlq_m_s32(inactive, a, b, p); }
1033[[gnu::always_inline]] nce int32x4_t shift_left_saturate(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqshlq_m_s32(inactive, a, b, p); }
1034template <int imm>[[gnu::always_inline]] nce int32x4_t shift_right_round(int32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vrshrq_m_n_s32(inactive, a, imm, p); }
1035template <int imm>[[gnu::always_inline]] nce int32x4_t shift_right(int32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vshrq_m_n_s32(inactive, a, imm, p); }
1036template <int imm>[[gnu::always_inline]] nce int32x4_t shift_right_insert(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vsriq_m_n_s32(a, b, imm, p); }
1037template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left_saturate(int32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vqshlq_m_n_s32(inactive, a, imm, p); }
1038[[gnu::always_inline]] nce int32x4_t bit_reverse_shift_right(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { return vbrsrq_m_n_s32(inactive, a, b, p); }
1039template <int imm>[[gnu::always_inline]] nce int32x4_t shift_right_round(int32x4_t a, mve_pred16_t p) { return vrshrq_x_n_s32(a, imm, p); }
1040template <int imm>[[gnu::always_inline]] nce int32x4_t shift_right(int32x4_t a, mve_pred16_t p) { return vshrq_x_n_s32(a, imm, p); }
1041[[gnu::always_inline]] nce int32x4_t bit_reverse_shift_right(int32x4_t a, int32_t b) { return vbrsrq_n_s32(a, b); }
1042[[gnu::always_inline]] nce int32x4_t bit_reverse_shift_right(int32x4_t a, int32_t b, mve_pred16_t p) { return vbrsrq_x_n_s32(a, b, p); }
1043[[gnu::always_inline]] nce int32x4_t shift_left_round_saturate(int32x4_t a, int32_t b) { return vqrshlq_n_s32(a, b); }
1044[[gnu::always_inline]] nce int32x4_t shift_left_round_saturate(int32x4_t a, int32_t b, mve_pred16_t p) { return vqrshlq_m_n_s32(a, b, p); }
1045[[gnu::always_inline]] nce int32x4_t shift_left_saturate(int32x4_t a, int32_t b) { return vqshlq_r_s32(a, b); }
1046[[gnu::always_inline]] nce int32x4_t shift_left_saturate(int32x4_t a, int32_t b, mve_pred16_t p) { return vqshlq_m_r_s32(a, b, p); }
1047template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left_unsigned_saturate(int32x4_t a) { return vqshluq_n_s32(a, imm); }
1048template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left_long_bottom(int32x4_t inactive, int16x8_t a, mve_pred16_t p) { return vshllbq_m_n_s16(inactive, a, imm, p); }
1049template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left_long_top(int32x4_t inactive, int16x8_t a, mve_pred16_t p) { return vshlltq_m_n_s16(inactive, a, imm, p); }
1050[[gnu::always_inline]] nce int32x4_t shift_left_round(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vrshlq_m_s32(inactive, a, b, p); }
1051[[gnu::always_inline]] nce int32x4_t shift_left(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vshlq_m_s32(inactive, a, b, p); }
1052[[gnu::always_inline]] nce int32x4_t shift_left_round(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vrshlq_x_s32(a, b, p); }
1053[[gnu::always_inline]] nce int32x4_t shift_left(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vshlq_x_s32(a, b, p); }
1054[[gnu::always_inline]] nce int32x4_t shift_left_round(int32x4_t a, int32_t b) { return vrshlq_n_s32(a, b); }
1055[[gnu::always_inline]] nce int32x4_t shift_left_round(int32x4_t a, int32_t b, mve_pred16_t p) { return vrshlq_m_n_s32(a, b, p); }
1056template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left_carry(int32x4_t a, uint32_t *b) { return vshlcq_s32(a, b, imm); }
1057template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left_carry(int32x4_t a, uint32_t *b, mve_pred16_t p) { return vshlcq_m_s32(a, b, imm, p); }
1058template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left(int32x4_t a) { return vshlq_n_s32(a, imm); }
1059[[gnu::always_inline]] nce int32x4_t move_long_bottom(int32x4_t inactive, int16x8_t a, mve_pred16_t p) { return vmovlbq_m_s16(inactive, a, p); }
1060[[gnu::always_inline]] nce int32x4_t move_long_top(int32x4_t inactive, int16x8_t a, mve_pred16_t p) { return vmovltq_m_s16(inactive, a, p); }
1061[[gnu::always_inline]] nce int32x4_t subtract(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vsubq_m_s32(inactive, a, b, p); }
1062[[gnu::always_inline]] nce int32x4_t subtract_halve(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vhsubq_m_s32(inactive, a, b, p); }
1063[[gnu::always_inline]] nce int32x4_t subtract_saturate(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqsubq_m_s32(inactive, a, b, p); }
1064[[gnu::always_inline]] nce int32x4_t bitwise_clear(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vbicq_m_s32(inactive, a, b, p); }
1065[[gnu::always_inline]] nce int32x4_t bitwise_and(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vandq_m_s32(inactive, a, b, p); }
1066[[gnu::always_inline]] nce int32x4_t bitwise_xor(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return veorq_m_s32(inactive, a, b, p); }
1067[[gnu::always_inline]] nce int32x4_t bitwise_or_not(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vornq_m_s32(inactive, a, b, p); }
1068[[gnu::always_inline]] nce int32x4_t bitwise_or(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vorrq_m_s32(inactive, a, b, p); }
1069[[gnu::always_inline]] nce int32x4_t complex_add_rotate_270(int32x4_t a, int32x4_t b) { return vcaddq_rot270_s32(a, b); }
1070[[gnu::always_inline]] nce int32x4_t complex_add_rotate_90(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vcaddq_rot90_m_s32(inactive, a, b, p); }
1071[[gnu::always_inline]] nce int32x4_t complex_add_rotate_270(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vcaddq_rot270_m_s32(inactive, a, b, p); }
1072[[gnu::always_inline]] nce int32x4_t complex_add_rotate_90_halve(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vhcaddq_rot90_m_s32(inactive, a, b, p); }
1073[[gnu::always_inline]] nce int32x4_t complex_add_rotate_270_halve(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vhcaddq_rot270_m_s32(inactive, a, b, p); }
1074[[gnu::always_inline]] nce int32x4_t subtract_with_carry(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred16_t p) { return vsbcq_m_s32(inactive, a, b, carry, p); }
1075[[gnu::always_inline]] nce int64_t multiply_subtract_long_round_dual_reduce_add_high_exchange_pairs(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vrmlsldavhxq_p_s32(a, b, p); }
1076[[gnu::always_inline]] nce int32x4_t subtract(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vsubq_x_s32(a, b, p); }
1077[[gnu::always_inline]] nce int32x4_t subtract_halve(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vhsubq_x_s32(a, b, p); }
1078[[gnu::always_inline]] nce int32x4_t count_leading_sign_bits(int32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vclsq_m_s32(inactive, a, p); }
1079[[gnu::always_inline]] nce int32x4_t count_leading_zero_bits(int32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vclzq_m_s32(inactive, a, p); }
1080[[gnu::always_inline]] nce int32x4_t bitwise_clear(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vbicq_x_s32(a, b, p); }
1081[[gnu::always_inline]] nce int32x4_t negate(int32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vnegq_m_s32(inactive, a, p); }
1082[[gnu::always_inline]] nce int32x4_t negate_saturate(int32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vqnegq_m_s32(inactive, a, p); }
1083[[gnu::always_inline]] nce int32x4_t bitwise_and(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vandq_x_s32(a, b, p); }
1084[[gnu::always_inline]] nce int32x4_t bitwise_xor(int32x4_t a, int32x4_t b, mve_pred16_t p) { return veorq_x_s32(a, b, p); }
1085[[gnu::always_inline]] nce int32x4_t bitwise_not(int32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vmvnq_m_s32(inactive, a, p); }
1086[[gnu::always_inline]] nce int32x4_t bitwise_or_not(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vornq_x_s32(a, b, p); }
1087[[gnu::always_inline]] nce int32x4_t bitwise_or(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vorrq_x_s32(a, b, p); }
1088[[gnu::always_inline]] nce int32x4_t complex_add_rotate_90(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vcaddq_rot90_x_s32(a, b, p); }
1089[[gnu::always_inline]] nce int32x4_t complex_add_rotate_270(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vcaddq_rot270_x_s32(a, b, p); }
1090[[gnu::always_inline]] nce int32x4_t complex_add_rotate_90_halve(int32x4_t a, int32x4_t b) { return vhcaddq_rot90_s32(a, b); }
1091[[gnu::always_inline]] nce int32x4_t complex_add_rotate_90_halve(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vhcaddq_rot90_x_s32(a, b, p); }
1092[[gnu::always_inline]] nce int32x4_t complex_add_rotate_270_halve(int32x4_t a, int32x4_t b) { return vhcaddq_rot270_s32(a, b); }
1093[[gnu::always_inline]] nce int32x4_t complex_add_rotate_270_halve(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vhcaddq_rot270_x_s32(a, b, p); }
1094[[gnu::always_inline]] nce int32x4_t multiply(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { return vmulq_m_n_s32(inactive, a, b, p); }
1095[[gnu::always_inline]] nce int32x4_t multiply_round_high(int32x4_t a, int32x4_t b) { return vrmulhq_s32(a, b); }
1096[[gnu::always_inline]] nce int32x4_t multiply_add_double_saturate_high(int32x4_t add, int32x4_t m1, int32_t m2) { return vqdmlahq_n_s32(add, m1, m2); }
1097[[gnu::always_inline]] nce int32x4_t multiply_add_double_saturate_high(int32x4_t add, int32x4_t m1, int32_t m2, mve_pred16_t p) { return vqdmlahq_m_n_s32(add, m1, m2, p); }
1098[[gnu::always_inline]] nce int32x4_t multiply_add_double_round_saturate_high(int32x4_t add, int32x4_t m1, int32_t m2) { return vqrdmlahq_n_s32(add, m1, m2); }
1099[[gnu::always_inline]] nce int32x4_t multiply_add_double_round_saturate_high(int32x4_t add, int32x4_t m1, int32_t m2, mve_pred16_t p) { return vqrdmlahq_m_n_s32(add, m1, m2, p); }
1100[[gnu::always_inline]] nce int32x4_t multiply_subtract_double_saturate_high(int32x4_t m1, int32x4_t m2, int32_t add) { return vqdmlashq_n_s32(m1, m2, add); }
1101[[gnu::always_inline]] nce int32x4_t subtract_with_carry_initialized(int32x4_t a, int32x4_t b, unsigned *carry_out) { return vsbciq_s32(a, b, carry_out); }
1102[[gnu::always_inline]] nce int32x4_t subtract_with_carry(int32x4_t a, int32x4_t b, unsigned *carry) { return vsbcq_s32(a, b, carry); }
1103[[gnu::always_inline]] nce int32x4_t subtract(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { return vsubq_m_n_s32(inactive, a, b, p); }
1104[[gnu::always_inline]] nce int32x4_t subtract_halve(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { return vhsubq_m_n_s32(inactive, a, b, p); }
1105[[gnu::always_inline]] nce int32x4_t subtract_saturate(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { return vqsubq_m_n_s32(inactive, a, b, p); }
1106[[gnu::always_inline]] nce int32x4_t complex_add_rotate_90(int32x4_t a, int32x4_t b) { return vcaddq_rot90_s32(a, b); }
1107template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left(int32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vshlq_m_n_s32(inactive, a, imm, p); }
1108template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left_insert(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vsliq_m_n_s32(a, b, imm, p); }
1109[[gnu::always_inline]] nce int32x4_t predicate_select(int32x4_t a, int32x4_t b, mve_pred16_t p) { return vpselq_s32(a, b, p); }
1110[[gnu::always_inline]] nce int32x4_t count_leading_sign_bits(int32x4_t a, mve_pred16_t p) { return vclsq_x_s32(a, p); }
1111[[gnu::always_inline]] nce int32x4_t count_leading_zero_bits(int32x4_t a, mve_pred16_t p) { return vclzq_x_s32(a, p); }
1112[[gnu::always_inline]] nce int32x4_t negate(int32x4_t a, mve_pred16_t p) { return vnegq_x_s32(a, p); }
1113[[gnu::always_inline]] nce int32x4_t bitwise_not(int32x4_t a, mve_pred16_t p) { return vmvnq_x_s32(a, p); }
1114template <int imm>[[gnu::always_inline]] nce int32x4_t shift_left(int32x4_t a, mve_pred16_t p) { return vshlq_x_n_s32(a, imm, p); }
1115[[gnu::always_inline]] nce int32x4_t add_halve(int32x4_t a, int32_t b) { return vhaddq_n_s32(a, b); }
1116[[gnu::always_inline]] nce int32x4_t add_halve(int32x4_t a, int32_t b, mve_pred16_t p) { return vhaddq_x_n_s32(a, b, p); }
1117[[gnu::always_inline]] nce int32x4_t add_saturate(int32x4_t a, int32_t b) { return vqaddq_n_s32(a, b); }
1118[[gnu::always_inline]] nce int32x4_t multiply(int32x4_t a, int32_t b, mve_pred16_t p) { return vmulq_x_n_s32(a, b, p); }
1119[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_bottom(int32x4_t a, int32_t b) { return vqdmullbq_n_s32(a, b); }
1120[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_top(int32x4_t a, int32_t b) { return vqdmulltq_n_s32(a, b); }
1121[[gnu::always_inline]] nce int32x4_t subtract(int32x4_t a, int32_t b) { return vsubq_n_s32(a, b); }
1122[[gnu::always_inline]] nce int32x4_t subtract(int32x4_t a, int32_t b, mve_pred16_t p) { return vsubq_x_n_s32(a, b, p); }
1123[[gnu::always_inline]] nce int32x4_t subtract_halve(int32x4_t a, int32_t b) { return vhsubq_n_s32(a, b); }
1124[[gnu::always_inline]] nce int32x4_t subtract_halve(int32x4_t a, int32_t b, mve_pred16_t p) { return vhsubq_x_n_s32(a, b, p); }
1125[[gnu::always_inline]] nce int32x4_t subtract_saturate(int32x4_t a, int32_t b) { return vqsubq_n_s32(a, b); }
1126[[gnu::always_inline]] nce int32x4_t bitwise_clear(int32x4_t a, const int32_t imm) { return vbicq_n_s32(a, imm); }
1127[[gnu::always_inline]] nce int32x4_t bitwise_clear(int32x4_t a, const int32_t imm, mve_pred16_t p) { return vbicq_m_n_s32(a, imm, p); }
1128[[gnu::always_inline]] nce int32x4_t bitwise_not(int32x4_t inactive, const int32_t imm, mve_pred16_t p) { return vmvnq_m_n_s32(inactive, imm, p); }
1129[[gnu::always_inline]] nce int32x4_t bitwise_or(int32x4_t a, const int32_t imm) { return vorrq_n_s32(a, imm); }
1130[[gnu::always_inline]] nce int32x4_t bitwise_or(int32x4_t a, const int32_t imm, mve_pred16_t p) { return vorrq_m_n_s32(a, imm, p); }
1131[[gnu::always_inline]] nce int32x4_t shift_left(int32x4_t a, int32_t b) { return vshlq_r_s32(a, b); }
1132[[gnu::always_inline]] nce int32x4_t shift_left(int32x4_t a, int32_t b, mve_pred16_t p) { return vshlq_m_r_s32(a, b, p); }
1133[[gnu::always_inline]] nce uint64x2_t uninitialized(uint64x2_t t) { return vuninitializedq(t); }
1134template <int offset>[[gnu::always_inline]] nce int64x2_t load_doubleword_gather_base(uint64x2_t addr) { return vldrdq_gather_base_s64(addr, offset); }
1135template <int offset>[[gnu::always_inline]] nce uint64x2_t load_doubleword_gather_base(uint64x2_t addr) { return vldrdq_gather_base_u64(addr, offset); }
1136template <int offset>[[gnu::always_inline]] nce int64x2_t load_doubleword_gather_base(uint64x2_t addr, mve_pred16_t p) { return vldrdq_gather_base_z_s64(addr, offset, p); }
1137template <int offset>[[gnu::always_inline]] nce uint64x2_t load_doubleword_gather_base(uint64x2_t addr, mve_pred16_t p) { return vldrdq_gather_base_z_u64(addr, offset, p); }
1138template <int offset>[[gnu::always_inline]] nce int64x2_t load_doubleword_gather_base(uint64x2_t *addr) { return vldrdq_gather_base_wb_s64(addr, offset); }
1139template <int offset>[[gnu::always_inline]] nce uint64x2_t load_doubleword_gather_base(uint64x2_t *addr) { return vldrdq_gather_base_wb_u64(addr, offset); }
1140template <int offset>[[gnu::always_inline]] nce void store_doubleword_scatter_base(uint64x2_t addr, uint64x2_t value) { return vstrdq_scatter_base_u64(addr, offset, value); }
1141template <int offset>[[gnu::always_inline]] nce void store_doubleword_scatter_base(uint64x2_t addr, uint64x2_t value, mve_pred16_t p) { return vstrdq_scatter_base_p_u64(addr, offset, value, p); }
1142template <int offset>[[gnu::always_inline]] nce void store_doubleword_scatter_base(uint64x2_t *addr, uint64x2_t value) { return vstrdq_scatter_base_wb_u64(addr, offset, value); }
1143template <int offset>[[gnu::always_inline]] nce void store_doubleword_scatter_base(uint64x2_t *addr, uint64x2_t value, mve_pred16_t p) { return vstrdq_scatter_base_wb_p_u64(addr, offset, value, p); }
1144[[gnu::always_inline]] nce uint64x2_t predicate_select(uint64x2_t a, uint64x2_t b, mve_pred16_t p) { return vpselq_u64(a, b, p); }
1145[[gnu::always_inline]] nce uint64x2_t multiply_long_bottom(uint64x2_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vmullbq_int_m_u32(inactive, a, b, p); }
1146[[gnu::always_inline]] nce uint64x2_t multiply_long_top(uint64x2_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vmulltq_int_m_u32(inactive, a, b, p); }
1147template <int offset>[[gnu::always_inline]] nce int64x2_t load_doubleword_gather_base(uint64x2_t *addr, mve_pred16_t p) { return vldrdq_gather_base_wb_z_s64(addr, offset, p); }
1148template <int offset>[[gnu::always_inline]] nce uint64x2_t load_doubleword_gather_base(uint64x2_t *addr, mve_pred16_t p) { return vldrdq_gather_base_wb_z_u64(addr, offset, p); }
1149template <int offset>[[gnu::always_inline]] nce void store_doubleword_scatter_base(uint64x2_t addr, int64x2_t value) { return vstrdq_scatter_base_s64(addr, offset, value); }
1150template <int offset>[[gnu::always_inline]] nce void store_doubleword_scatter_base(uint64x2_t addr, int64x2_t value, mve_pred16_t p) { return vstrdq_scatter_base_p_s64(addr, offset, value, p); }
1151template <int offset>[[gnu::always_inline]] nce void store_doubleword_scatter_base(uint64x2_t *addr, int64x2_t value) { return vstrdq_scatter_base_wb_s64(addr, offset, value); }
1152template <int offset>[[gnu::always_inline]] nce void store_doubleword_scatter_base(uint64x2_t *addr, int64x2_t value, mve_pred16_t p) { return vstrdq_scatter_base_wb_p_s64(addr, offset, value, p); }
1153[[gnu::always_inline]] nce uint32x4_t subtract(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vsubq_m_u32(inactive, a, b, p); }
1154[[gnu::always_inline]] nce uint32x4_t subtract_halve(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vhsubq_m_u32(inactive, a, b, p); }
1155[[gnu::always_inline]] nce uint32x4_t subtract_saturate(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vqsubq_m_u32(inactive, a, b, p); }
1156[[gnu::always_inline]] nce uint32x4_t bitwise_clear(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vbicq_m_u32(inactive, a, b, p); }
1157[[gnu::always_inline]] nce uint32x4_t bitwise_and(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vandq_m_u32(inactive, a, b, p); }
1158[[gnu::always_inline]] nce uint32x4_t bitwise_xor(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return veorq_m_u32(inactive, a, b, p); }
1159[[gnu::always_inline]] nce uint32x4_t bitwise_or_not(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vornq_m_u32(inactive, a, b, p); }
1160[[gnu::always_inline]] nce uint32x4_t bitwise_or(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vorrq_m_u32(inactive, a, b, p); }
1161[[gnu::always_inline]] nce uint32x4_t complex_add_rotate_90(uint32x4_t a, uint32x4_t b) { return vcaddq_rot90_u32(a, b); }
1162[[gnu::always_inline]] nce uint32x4_t complex_add_rotate_270(uint32x4_t a, uint32x4_t b) { return vcaddq_rot270_u32(a, b); }
1163[[gnu::always_inline]] nce uint32x4_t complex_add_rotate_90(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vcaddq_rot90_m_u32(inactive, a, b, p); }
1164[[gnu::always_inline]] nce uint32x4_t complex_add_rotate_270(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vcaddq_rot270_m_u32(inactive, a, b, p); }
1165[[gnu::always_inline]] nce uint32x4_t subtract_with_carry(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry, mve_pred16_t p) { return vsbcq_m_u32(inactive, a, b, carry, p); }
1166[[gnu::always_inline]] nce uint32x4_t subtract(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vsubq_x_u32(a, b, p); }
1167[[gnu::always_inline]] nce uint32x4_t subtract_halve(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vhsubq_x_u32(a, b, p); }
1168[[gnu::always_inline]] nce uint32x4_t count_leading_zero_bits(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) { return vclzq_m_u32(inactive, a, p); }
1169[[gnu::always_inline]] nce uint32x4_t bitwise_clear(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vbicq_x_u32(a, b, p); }
1170[[gnu::always_inline]] nce uint32x4_t bitwise_and(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vandq_x_u32(a, b, p); }
1171[[gnu::always_inline]] nce uint32x4_t bitwise_xor(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return veorq_x_u32(a, b, p); }
1172[[gnu::always_inline]] nce uint32x4_t bitwise_not(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) { return vmvnq_m_u32(inactive, a, p); }
1173[[gnu::always_inline]] nce uint32x4_t bitwise_or_not(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vornq_x_u32(a, b, p); }
1174[[gnu::always_inline]] nce uint32x4_t bitwise_or(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vorrq_x_u32(a, b, p); }
1175[[gnu::always_inline]] nce uint32x4_t complex_add_rotate_90(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vcaddq_rot90_x_u32(a, b, p); }
1176[[gnu::always_inline]] nce uint32x4_t complex_add_rotate_270(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vcaddq_rot270_x_u32(a, b, p); }
1177[[gnu::always_inline]] nce uint32x4_t subtract_with_carry(uint32x4_t a, uint32x4_t b, unsigned *carry) { return vsbcq_u32(a, b, carry); }
1178[[gnu::always_inline]] nce uint32x4_t subtract(uint32x4_t inactive, uint32x4_t a, uint32_t b, mve_pred16_t p) { return vsubq_m_n_u32(inactive, a, b, p); }
1179[[gnu::always_inline]] nce uint32x4_t subtract_halve(uint32x4_t inactive, uint32x4_t a, uint32_t b, mve_pred16_t p) { return vhsubq_m_n_u32(inactive, a, b, p); }
1180[[gnu::always_inline]] nce uint32x4_t subtract_saturate(uint32x4_t inactive, uint32x4_t a, uint32_t b, mve_pred16_t p) { return vqsubq_m_n_u32(inactive, a, b, p); }
1181[[gnu::always_inline]] nce uint32x4_t count_leading_zero_bits(uint32x4_t a, mve_pred16_t p) { return vclzq_x_u32(a, p); }
1182[[gnu::always_inline]] nce uint32x4_t bitwise_not(uint32x4_t a, mve_pred16_t p) { return vmvnq_x_u32(a, p); }
1183template <int offset>[[gnu::always_inline]] nce int32x4_t load_word_gather_base(uint32x4_t addr) { return vldrwq_gather_base_s32(addr, offset); }
1184template <int offset>[[gnu::always_inline]] nce uint32x4_t load_word_gather_base(uint32x4_t addr) { return vldrwq_gather_base_u32(addr, offset); }
1185template <int offset>[[gnu::always_inline]] nce int32x4_t load_word_gather_base(uint32x4_t addr, mve_pred16_t p) { return vldrwq_gather_base_z_s32(addr, offset, p); }
1186template <int offset>[[gnu::always_inline]] nce uint32x4_t load_word_gather_base(uint32x4_t addr, mve_pred16_t p) { return vldrwq_gather_base_z_u32(addr, offset, p); }
1187template <int offset>[[gnu::always_inline]] nce int32x4_t load_word_gather_base(uint32x4_t *addr) { return vldrwq_gather_base_wb_s32(addr, offset); }
1188template <int offset>[[gnu::always_inline]] nce uint32x4_t load_word_gather_base(uint32x4_t *addr) { return vldrwq_gather_base_wb_u32(addr, offset); }
1189template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left_long_bottom(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p) { return vshllbq_m_n_u16(inactive, a, imm, p); }
1190template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left_long_top(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p) { return vshlltq_m_n_u16(inactive, a, imm, p); }
1191[[gnu::always_inline]] nce uint32x4_t shift_left_round(uint32x4_t a, int32x4_t b, mve_pred16_t p) { return vrshlq_x_u32(a, b, p); }
1192[[gnu::always_inline]] nce uint32x4_t shift_left(uint32x4_t a, int32x4_t b, mve_pred16_t p) { return vshlq_x_u32(a, b, p); }
1193[[gnu::always_inline]] nce uint32x4_t shift_left_round(uint32x4_t inactive, uint32x4_t a, int32x4_t b, mve_pred16_t p) { return vrshlq_m_u32(inactive, a, b, p); }
1194[[gnu::always_inline]] nce uint32x4_t shift_left(uint32x4_t inactive, uint32x4_t a, int32x4_t b, mve_pred16_t p) { return vshlq_m_u32(inactive, a, b, p); }
1195[[gnu::always_inline]] nce uint32x4_t shift_left_round(uint32x4_t a, int32_t b) { return vrshlq_n_u32(a, b); }
1196[[gnu::always_inline]] nce uint32x4_t shift_left_round(uint32x4_t a, int32_t b, mve_pred16_t p) { return vrshlq_m_n_u32(a, b, p); }
1197template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left_carry(uint32x4_t a, uint32_t *b) { return vshlcq_u32(a, b, imm); }
1198template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left_carry(uint32x4_t a, uint32_t *b, mve_pred16_t p) { return vshlcq_m_u32(a, b, imm, p); }
1199template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left(uint32x4_t a) { return vshlq_n_u32(a, imm); }
1200[[gnu::always_inline]] nce uint32x4_t move_long_bottom(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p) { return vmovlbq_m_u16(inactive, a, p); }
1201[[gnu::always_inline]] nce uint32x4_t move_long_top(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p) { return vmovltq_m_u16(inactive, a, p); }
1202template <int offset>[[gnu::always_inline]] nce void store_word_scatter_base(uint32x4_t addr, int32x4_t value) { return vstrwq_scatter_base_s32(addr, offset, value); }
1203template <int offset>[[gnu::always_inline]] nce void store_word_scatter_base(uint32x4_t addr, int32x4_t value, mve_pred16_t p) { return vstrwq_scatter_base_p_s32(addr, offset, value, p); }
1204template <int offset>[[gnu::always_inline]] nce void store_word_scatter_base(uint32x4_t *addr, int32x4_t value) { return vstrwq_scatter_base_wb_s32(addr, offset, value); }
1205template <int offset>[[gnu::always_inline]] nce void store_word_scatter_base(uint32x4_t *addr, int32x4_t value, mve_pred16_t p) { return vstrwq_scatter_base_wb_p_s32(addr, offset, value, p); }
1206template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left_unsigned_saturate(uint32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vqshluq_m_n_s32(inactive, a, imm, p); }
1207[[gnu::always_inline]] nce uint32x4_t reverse_64bit(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) { return vrev64q_m_u32(inactive, a, p); }
1208[[gnu::always_inline]] nce mve_pred16_t not_equal(uint32x4_t a, uint32x4_t b) { return vcmpneq_u32(a, b); }
1209[[gnu::always_inline]] nce mve_pred16_t not_equal(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vcmpneq_m_u32(a, b, p); }
1210[[gnu::always_inline]] nce mve_pred16_t higher_or_same(uint32x4_t a, uint32x4_t b) { return vcmpcsq_u32(a, b); }
1211[[gnu::always_inline]] nce mve_pred16_t higher_or_same(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vcmpcsq_m_u32(a, b, p); }
1212[[gnu::always_inline]] nce uint32x4_t reverse_64bit(uint32x4_t a, mve_pred16_t p) { return vrev64q_x_u32(a, p); }
1213template <int imm>[[gnu::always_inline]] nce uint32x4_t decrement_duplicate(uint32x4_t inactive, uint32_t a, mve_pred16_t p) { return vddupq_m_n_u32(inactive, a, imm, p); }
1214template <int imm>[[gnu::always_inline]] nce uint32x4_t decrement_duplicate(uint32x4_t inactive, uint32_t *a, mve_pred16_t p) { return vddupq_m_wb_u32(inactive, a, imm, p); }
1215template <int imm>[[gnu::always_inline]] nce uint32x4_t decrement_wrap_duplicate(uint32x4_t inactive, uint32_t a, uint32_t b, mve_pred16_t p) { return vdwdupq_m_n_u32(inactive, a, b, imm, p); }
1216template <int imm>[[gnu::always_inline]] nce uint32x4_t decrement_wrap_duplicate(uint32x4_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p) { return vdwdupq_m_wb_u32(inactive, a, b, imm, p); }
1217template <int imm>[[gnu::always_inline]] nce uint32x4_t increment_duplicate(uint32x4_t inactive, uint32_t a, mve_pred16_t p) { return vidupq_m_n_u32(inactive, a, imm, p); }
1218template <int imm>[[gnu::always_inline]] nce uint32x4_t increment_duplicate(uint32x4_t inactive, uint32_t *a, mve_pred16_t p) { return vidupq_m_wb_u32(inactive, a, imm, p); }
1219template <int imm>[[gnu::always_inline]] nce uint32x4_t increment_wrap_duplicate(uint32x4_t inactive, uint32_t a, uint32_t b, mve_pred16_t p) { return viwdupq_m_n_u32(inactive, a, b, imm, p); }
1220template <int imm>[[gnu::always_inline]] nce uint32x4_t increment_wrap_duplicate(uint32x4_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p) { return viwdupq_m_wb_u32(inactive, a, b, imm, p); }
1221[[gnu::always_inline]] nce uint32x4_t duplicate(uint32x4_t inactive, uint32_t a, mve_pred16_t p) { return vdupq_m_n_u32(inactive, a, p); }
1222[[gnu::always_inline]] nce uint32x4_t uninitialized(uint32x4_t t) { return vuninitializedq(t); }
1223[[gnu::always_inline]] nce uint32x4_t minimum_absolute(uint32x4_t a, int32x4_t b) { return vminaq_s32(a, b); }
1224[[gnu::always_inline]] nce uint32x4_t minimum_absolute(uint32x4_t a, int32x4_t b, mve_pred16_t p) { return vminaq_m_s32(a, b, p); }
1225[[gnu::always_inline]] nce uint32x4_t maximum_absolute(uint32x4_t a, int32x4_t b) { return vmaxaq_s32(a, b); }
1226[[gnu::always_inline]] nce uint32x4_t maximum_absolute(uint32x4_t a, int32x4_t b, mve_pred16_t p) { return vmaxaq_m_s32(a, b, p); }
1227[[gnu::always_inline]] nce mve_pred16_t equal(uint32x4_t a, uint32x4_t b) { return vcmpeqq_u32(a, b); }
1228[[gnu::always_inline]] nce mve_pred16_t higher(uint32x4_t a, uint32x4_t b) { return vcmphiq_u32(a, b); }
1229[[gnu::always_inline]] nce uint32x4_t min(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vminq_m_u32(inactive, a, b, p); }
1230[[gnu::always_inline]] nce uint32x4_t max(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vmaxq_m_u32(inactive, a, b, p); }
1231[[gnu::always_inline]] nce uint32x4_t subtract_absolute(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vabdq_m_u32(inactive, a, b, p); }
1232[[gnu::always_inline]] nce uint32x4_t add(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vaddq_m_u32(inactive, a, b, p); }
1233[[gnu::always_inline]] nce uint32x4_t add_carry_initialized(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry_out, mve_pred16_t p) { return vadciq_m_u32(inactive, a, b, carry_out, p); }
1234[[gnu::always_inline]] nce uint32x4_t add_carry(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry, mve_pred16_t p) { return vadcq_m_u32(inactive, a, b, carry, p); }
1235[[gnu::always_inline]] nce mve_pred16_t equal(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vcmpeqq_m_u32(a, b, p); }
1236[[gnu::always_inline]] nce mve_pred16_t higher(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vcmphiq_m_u32(a, b, p); }
1237[[gnu::always_inline]] nce uint32x4_t min(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vminq_x_u32(a, b, p); }
1238[[gnu::always_inline]] nce uint32x4_t max(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vmaxq_x_u32(a, b, p); }
1239[[gnu::always_inline]] nce uint32x4_t subtract_absolute(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vabdq_x_u32(a, b, p); }
1240[[gnu::always_inline]] nce uint32x4_t add(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vaddq_x_u32(a, b, p); }
1241[[gnu::always_inline]] nce uint32x4_t add_carry_initialized(uint32x4_t a, uint32x4_t b, unsigned *carry_out) { return vadciq_u32(a, b, carry_out); }
1242[[gnu::always_inline]] nce uint32x4_t add_carry(uint32x4_t a, uint32x4_t b, unsigned *carry) { return vadcq_u32(a, b, carry); }
1243[[gnu::always_inline]] nce uint32x4_t add(uint32x4_t inactive, uint32x4_t a, uint32_t b, mve_pred16_t p) { return vaddq_m_n_u32(inactive, a, b, p); }
1244[[gnu::always_inline]] nce uint64_t reduce_add_long(uint32x4_t a, mve_pred16_t p) { return vaddlvq_p_u32(a, p); }
1245[[gnu::always_inline]] nce mve_pred16_t equal(uint32x4_t a, uint32_t b) { return vcmpeqq_n_u32(a, b); }
1246[[gnu::always_inline]] nce mve_pred16_t equal(uint32x4_t a, uint32_t b, mve_pred16_t p) { return vcmpeqq_m_n_u32(a, b, p); }
1247[[gnu::always_inline]] nce mve_pred16_t not_equal(uint32x4_t a, uint32_t b) { return vcmpneq_n_u32(a, b); }
1248[[gnu::always_inline]] nce mve_pred16_t not_equal(uint32x4_t a, uint32_t b, mve_pred16_t p) { return vcmpneq_m_n_u32(a, b, p); }
1249[[gnu::always_inline]] nce mve_pred16_t higher_or_same(uint32x4_t a, uint32_t b) { return vcmpcsq_n_u32(a, b); }
1250[[gnu::always_inline]] nce mve_pred16_t higher_or_same(uint32x4_t a, uint32_t b, mve_pred16_t p) { return vcmpcsq_m_n_u32(a, b, p); }
1251[[gnu::always_inline]] nce mve_pred16_t higher(uint32x4_t a, uint32_t b) { return vcmphiq_n_u32(a, b); }
1252[[gnu::always_inline]] nce mve_pred16_t higher(uint32x4_t a, uint32_t b, mve_pred16_t p) { return vcmphiq_m_n_u32(a, b, p); }
1253[[gnu::always_inline]] nce uint32x4_t add(uint32x4_t a, uint32_t b) { return vaddq_n_u32(a, b); }
1254[[gnu::always_inline]] nce uint32x4_t add(uint32x4_t a, uint32_t b, mve_pred16_t p) { return vaddq_x_n_u32(a, b, p); }
1255[[gnu::always_inline]] nce uint64_t reduce_add_long(uint32x4_t a) { return vaddlvq_u32(a); }
1256[[gnu::always_inline]] nce uint32_t reduce_add(uint32x4_t a) { return vaddvq_u32(a); }
1257[[gnu::always_inline]] nce uint32x4_t multiply_long_bottom_poly(uint32x4_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmullbq_poly_m_p16(inactive, a, b, p); }
1258[[gnu::always_inline]] nce uint32x4_t multiply_long_bottom(uint32x4_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmullbq_int_m_u16(inactive, a, b, p); }
1259[[gnu::always_inline]] nce uint32x4_t multply_long_top_poly(uint32x4_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmulltq_poly_m_p16(inactive, a, b, p); }
1260[[gnu::always_inline]] nce uint32x4_t multiply_long_top(uint32x4_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p) { return vmulltq_int_m_u16(inactive, a, b, p); }
1261[[gnu::always_inline]] nce uint32x4_t add_halve(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vhaddq_m_u32(inactive, a, b, p); }
1262[[gnu::always_inline]] nce uint32x4_t add_halve_round(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vrhaddq_m_u32(inactive, a, b, p); }
1263[[gnu::always_inline]] nce uint32x4_t add_saturate(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vqaddq_m_u32(inactive, a, b, p); }
1264[[gnu::always_inline]] nce uint32x4_t multiply_high(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vmulhq_m_u32(inactive, a, b, p); }
1265[[gnu::always_inline]] nce uint32x4_t add_halve(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vhaddq_x_u32(a, b, p); }
1266[[gnu::always_inline]] nce uint32x4_t add_halve_round(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vrhaddq_x_u32(a, b, p); }
1267[[gnu::always_inline]] nce uint32x4_t add_halve(uint32x4_t inactive, uint32x4_t a, uint32_t b, mve_pred16_t p) { return vhaddq_m_n_u32(inactive, a, b, p); }
1268[[gnu::always_inline]] nce uint32x4_t add_saturate(uint32x4_t inactive, uint32x4_t a, uint32_t b, mve_pred16_t p) { return vqaddq_m_n_u32(inactive, a, b, p); }
1269[[gnu::always_inline]] nce uint32x4_t multiply_high(uint32x4_t a, uint32x4_t b) { return vmulhq_u32(a, b); }
1270[[gnu::always_inline]] nce uint32x4_t multiply_high(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vmulhq_x_u32(a, b, p); }
1271[[gnu::always_inline]] nce uint64x2_t multiply_long_bottom(uint32x4_t a, uint32x4_t b) { return vmullbq_int_u32(a, b); }
1272[[gnu::always_inline]] nce uint64x2_t multiply_long_bottom(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vmullbq_int_x_u32(a, b, p); }
1273[[gnu::always_inline]] nce uint64x2_t multiply_long_top(uint32x4_t a, uint32x4_t b) { return vmulltq_int_u32(a, b); }
1274[[gnu::always_inline]] nce uint32x4_t multiply(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vmulq_m_u32(inactive, a, b, p); }
1275[[gnu::always_inline]] nce uint32x4_t multiply_round_high(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vrmulhq_m_u32(inactive, a, b, p); }
1276[[gnu::always_inline]] nce uint64x2_t multiply_long_top(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vmulltq_int_x_u32(a, b, p); }
1277[[gnu::always_inline]] nce uint32x4_t multiply(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vmulq_x_u32(a, b, p); }
1278[[gnu::always_inline]] nce uint32x4_t multiply_round_high(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vrmulhq_x_u32(a, b, p); }
1279[[gnu::always_inline]] nce uint32x4_t multiply(uint32x4_t inactive, uint32x4_t a, uint32_t b, mve_pred16_t p) { return vmulq_m_n_u32(inactive, a, b, p); }
1280[[gnu::always_inline]] nce uint32x4_t multiply_round_high(uint32x4_t a, uint32x4_t b) { return vrmulhq_u32(a, b); }
1281[[gnu::always_inline]] nce uint32_t multiply_add_dual_reduce_add(uint32x4_t m1, uint32x4_t m2) { return vmladavq_u32(m1, m2); }
1282[[gnu::always_inline]] nce uint32_t multiply_add_dual_reduce_add(uint32x4_t m1, uint32x4_t m2, mve_pred16_t p) { return vmladavq_p_u32(m1, m2, p); }
1283[[gnu::always_inline]] nce uint64_t multiply_add_long_dual_reduce_add(uint32x4_t m1, uint32x4_t m2) { return vmlaldavq_u32(m1, m2); }
1284[[gnu::always_inline]] nce uint64_t multiply_add_long_round_dual_reduce_add_high(uint32x4_t a, uint32x4_t b) { return vrmlaldavhq_u32(a, b); }
1285template <int offset>[[gnu::always_inline]] nce void store_word_scatter_base(uint32x4_t addr, uint32x4_t value) { return vstrwq_scatter_base_u32(addr, offset, value); }
1286template <int offset>[[gnu::always_inline]] nce void store_word_scatter_base(uint32x4_t addr, uint32x4_t value, mve_pred16_t p) { return vstrwq_scatter_base_p_u32(addr, offset, value, p); }
1287template <int offset>[[gnu::always_inline]] nce void store_word_scatter_base(uint32x4_t *addr, uint32x4_t value) { return vstrwq_scatter_base_wb_u32(addr, offset, value); }
1288[[gnu::always_inline]] nce uint32x4_t shift_left_round_saturate(uint32x4_t inactive, uint32x4_t a, int32x4_t b, mve_pred16_t p) { return vqrshlq_m_u32(inactive, a, b, p); }
1289[[gnu::always_inline]] nce uint32x4_t shift_left_saturate(uint32x4_t inactive, uint32x4_t a, int32x4_t b, mve_pred16_t p) { return vqshlq_m_u32(inactive, a, b, p); }
1290[[gnu::always_inline]] nce uint32x4_t subtract_with_carry_initialized(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry_out, mve_pred16_t p) { return vsbciq_m_u32(inactive, a, b, carry_out, p); }
1291[[gnu::always_inline]] nce uint64_t multiply_add_long_dual_reduce_add(uint32x4_t m1, uint32x4_t m2, mve_pred16_t p) { return vmlaldavq_p_u32(m1, m2, p); }
1292[[gnu::always_inline]] nce uint64_t multiply_add_long_round_dual_reduce_add_high(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vrmlaldavhq_p_u32(a, b, p); }
1293template <int offset>[[gnu::always_inline]] nce void store_word_scatter_base(uint32x4_t *addr, uint32x4_t value, mve_pred16_t p) { return vstrwq_scatter_base_wb_p_u32(addr, offset, value, p); }
1294template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_right_round(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) { return vrshrq_m_n_u32(inactive, a, imm, p); }
1295template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_right(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) { return vshrq_m_n_u32(inactive, a, imm, p); }
1296template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_right_insert(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vsriq_m_n_u32(a, b, imm, p); }
1297template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left_saturate(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) { return vqshlq_m_n_u32(inactive, a, imm, p); }
1298template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) { return vshlq_m_n_u32(inactive, a, imm, p); }
1299template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left_insert(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vsliq_m_n_u32(a, b, imm, p); }
1300[[gnu::always_inline]] nce uint32x4_t predicate_select(uint32x4_t a, uint32x4_t b, mve_pred16_t p) { return vpselq_u32(a, b, p); }
1301[[gnu::always_inline]] nce uint32x4_t multiply_add(uint32x4_t add, uint32x4_t m1, uint32_t m2) { return vmlaq_n_u32(add, m1, m2); }
1302[[gnu::always_inline]] nce uint32x4_t multiply_add(uint32x4_t add, uint32x4_t m1, uint32_t m2, mve_pred16_t p) { return vmlaq_m_n_u32(add, m1, m2, p); }
1303[[gnu::always_inline]] nce uint32x4_t multiply_add_scalar(uint32x4_t m1, uint32x4_t m2, uint32_t add) { return vmlasq_n_u32(m1, m2, add); }
1304[[gnu::always_inline]] nce uint32x4_t multiply_add_scalar(uint32x4_t m1, uint32x4_t m2, uint32_t add, mve_pred16_t p) { return vmlasq_m_n_u32(m1, m2, add, p); }
1305[[gnu::always_inline]] nce uint32x4_t subtract_with_carry_initialized(uint32x4_t a, uint32x4_t b, unsigned *carry_out) { return vsbciq_u32(a, b, carry_out); }
1306[[gnu::always_inline]] nce uint32x4_t bit_reverse_shift_right(uint32x4_t inactive, uint32x4_t a, int32_t b, mve_pred16_t p) { return vbrsrq_m_n_u32(inactive, a, b, p); }
1307[[gnu::always_inline]] nce uint32_t reduce_add(uint32x4_t a, mve_pred16_t p) { return vaddvq_p_u32(a, p); }
1308template <int offset>[[gnu::always_inline]] nce int32x4_t load_word_gather_base(uint32x4_t *addr, mve_pred16_t p) { return vldrwq_gather_base_wb_z_s32(addr, offset, p); }
1309template <int offset>[[gnu::always_inline]] nce uint32x4_t load_word_gather_base(uint32x4_t *addr, mve_pred16_t p) { return vldrwq_gather_base_wb_z_u32(addr, offset, p); }
1310template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_right_round(uint32x4_t a, mve_pred16_t p) { return vrshrq_x_n_u32(a, imm, p); }
1311template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_right(uint32x4_t a, mve_pred16_t p) { return vshrq_x_n_u32(a, imm, p); }
1312template <int imm>[[gnu::always_inline]] nce uint32x4_t shift_left(uint32x4_t a, mve_pred16_t p) { return vshlq_x_n_u32(a, imm, p); }
1313[[gnu::always_inline]] nce uint32x4_t add_halve(uint32x4_t a, uint32_t b) { return vhaddq_n_u32(a, b); }
1314[[gnu::always_inline]] nce uint32x4_t add_halve(uint32x4_t a, uint32_t b, mve_pred16_t p) { return vhaddq_x_n_u32(a, b, p); }
1315[[gnu::always_inline]] nce uint32x4_t add_saturate(uint32x4_t a, uint32_t b) { return vqaddq_n_u32(a, b); }
1316[[gnu::always_inline]] nce uint32x4_t multiply(uint32x4_t a, uint32_t b, mve_pred16_t p) { return vmulq_x_n_u32(a, b, p); }
1317[[gnu::always_inline]] nce uint32x4_t subtract(uint32x4_t a, uint32_t b) { return vsubq_n_u32(a, b); }
1318[[gnu::always_inline]] nce uint32x4_t subtract(uint32x4_t a, uint32_t b, mve_pred16_t p) { return vsubq_x_n_u32(a, b, p); }
1319[[gnu::always_inline]] nce uint32x4_t subtract_halve(uint32x4_t a, uint32_t b) { return vhsubq_n_u32(a, b); }
1320[[gnu::always_inline]] nce uint32x4_t subtract_halve(uint32x4_t a, uint32_t b, mve_pred16_t p) { return vhsubq_x_n_u32(a, b, p); }
1321[[gnu::always_inline]] nce uint32x4_t subtract_saturate(uint32x4_t a, uint32_t b) { return vqsubq_n_u32(a, b); }
1322[[gnu::always_inline]] nce uint32x4_t bitwise_clear(uint32x4_t a, const uint32_t imm) { return vbicq_n_u32(a, imm); }
1323[[gnu::always_inline]] nce uint32x4_t bitwise_clear(uint32x4_t a, const uint32_t imm, mve_pred16_t p) { return vbicq_m_n_u32(a, imm, p); }
1324[[gnu::always_inline]] nce uint32x4_t bitwise_not(uint32x4_t inactive, const uint32_t imm, mve_pred16_t p) { return vmvnq_m_n_u32(inactive, imm, p); }
1325[[gnu::always_inline]] nce uint32x4_t bitwise_or(uint32x4_t a, const uint32_t imm) { return vorrq_n_u32(a, imm); }
1326[[gnu::always_inline]] nce uint32x4_t bitwise_or(uint32x4_t a, const uint32_t imm, mve_pred16_t p) { return vorrq_m_n_u32(a, imm, p); }
1327[[gnu::always_inline]] nce uint32x4_t bit_reverse_shift_right(uint32x4_t a, int32_t b) { return vbrsrq_n_u32(a, b); }
1328[[gnu::always_inline]] nce uint32x4_t bit_reverse_shift_right(uint32x4_t a, int32_t b, mve_pred16_t p) { return vbrsrq_x_n_u32(a, b, p); }
1329[[gnu::always_inline]] nce uint32x4_t shift_left_round_saturate(uint32x4_t a, int32_t b) { return vqrshlq_n_u32(a, b); }
1330[[gnu::always_inline]] nce uint32x4_t shift_left_round_saturate(uint32x4_t a, int32_t b, mve_pred16_t p) { return vqrshlq_m_n_u32(a, b, p); }
1331[[gnu::always_inline]] nce uint32x4_t shift_left_saturate(uint32x4_t a, int32_t b) { return vqshlq_r_u32(a, b); }
1332[[gnu::always_inline]] nce uint32x4_t shift_left_saturate(uint32x4_t a, int32_t b, mve_pred16_t p) { return vqshlq_m_r_u32(a, b, p); }
1333[[gnu::always_inline]] nce uint32x4_t shift_left(uint32x4_t a, int32_t b) { return vshlq_r_u32(a, b); }
1334[[gnu::always_inline]] nce uint32x4_t shift_left(uint32x4_t a, int32_t b, mve_pred16_t p) { return vshlq_m_r_u32(a, b, p); }
1335[[gnu::always_inline]] nce mve_pred16_t predicate_not(mve_pred16_t a) { return vpnot(a); }
1336template <> [[gnu::always_inline]] nce int8x16_t create(uint64_t a, uint64_t b) { return vcreateq_s8(a, b); }
1337template <> [[gnu::always_inline]] nce int16x8_t create(uint64_t a, uint64_t b) { return vcreateq_s16(a, b); }
1338template <> [[gnu::always_inline]] nce int32x4_t create(uint64_t a, uint64_t b) { return vcreateq_s32(a, b); }
1339template <> [[gnu::always_inline]] nce int64x2_t create(uint64_t a, uint64_t b) { return vcreateq_s64(a, b); }
1340template <> [[gnu::always_inline]] nce uint8x16_t create(uint64_t a, uint64_t b) { return vcreateq_u8(a, b); }
1341template <> [[gnu::always_inline]] nce uint16x8_t create(uint64_t a, uint64_t b) { return vcreateq_u16(a, b); }
1342template <> [[gnu::always_inline]] nce uint32x4_t create(uint64_t a, uint64_t b) { return vcreateq_u32(a, b); }
1343template <> [[gnu::always_inline]] nce uint64x2_t create(uint64_t a, uint64_t b) { return vcreateq_u64(a, b); }
1344template <int imm>[[gnu::always_inline]] nce uint8x16_t decrement_duplicate(uint32_t a) { return vddupq_n_u8(a, imm); }
1345template <int imm>[[gnu::always_inline]] nce uint16x8_t decrement_duplicate(uint32_t a) { return vddupq_n_u16(a, imm); }
1346template <int imm>[[gnu::always_inline]] nce uint32x4_t decrement_duplicate(uint32_t a) { return vddupq_n_u32(a, imm); }
1347template <int imm>[[gnu::always_inline]] nce uint8x16_t decrement_duplicate(uint32_t *a) { return vddupq_wb_u8(a, imm); }
1348template <int imm>[[gnu::always_inline]] nce uint16x8_t decrement_duplicate(uint32_t *a) { return vddupq_wb_u16(a, imm); }
1349template <int imm>[[gnu::always_inline]] nce uint32x4_t decrement_duplicate(uint32_t *a) { return vddupq_wb_u32(a, imm); }
1350template <int imm>[[gnu::always_inline]] nce uint8x16_t decrement_duplicate(uint32_t a, mve_pred16_t p) { return vddupq_x_n_u8(a, imm, p); }
1351template <int imm>[[gnu::always_inline]] nce uint16x8_t decrement_duplicate(uint32_t a, mve_pred16_t p) { return vddupq_x_n_u16(a, imm, p); }
1352template <int imm>[[gnu::always_inline]] nce uint32x4_t decrement_duplicate(uint32_t a, mve_pred16_t p) { return vddupq_x_n_u32(a, imm, p); }
1353template <int imm>[[gnu::always_inline]] nce uint8x16_t decrement_duplicate(uint32_t *a, mve_pred16_t p) { return vddupq_x_wb_u8(a, imm, p); }
1354template <int imm>[[gnu::always_inline]] nce uint16x8_t decrement_duplicate(uint32_t *a, mve_pred16_t p) { return vddupq_x_wb_u16(a, imm, p); }
1355template <int imm>[[gnu::always_inline]] nce uint32x4_t decrement_duplicate(uint32_t *a, mve_pred16_t p) { return vddupq_x_wb_u32(a, imm, p); }
1356template <int imm>[[gnu::always_inline]] nce uint8x16_t decrement_wrap_duplicate(uint32_t a, uint32_t b) { return vdwdupq_n_u8(a, b, imm); }
1357template <int imm>[[gnu::always_inline]] nce uint16x8_t decrement_wrap_duplicate(uint32_t a, uint32_t b) { return vdwdupq_n_u16(a, b, imm); }
1358template <int imm>[[gnu::always_inline]] nce uint32x4_t decrement_wrap_duplicate(uint32_t a, uint32_t b) { return vdwdupq_n_u32(a, b, imm); }
1359template <int imm>[[gnu::always_inline]] nce uint8x16_t decrement_wrap_duplicate(uint32_t *a, uint32_t b) { return vdwdupq_wb_u8(a, b, imm); }
1360template <int imm>[[gnu::always_inline]] nce uint16x8_t decrement_wrap_duplicate(uint32_t *a, uint32_t b) { return vdwdupq_wb_u16(a, b, imm); }
1361template <int imm>[[gnu::always_inline]] nce uint32x4_t decrement_wrap_duplicate(uint32_t *a, uint32_t b) { return vdwdupq_wb_u32(a, b, imm); }
1362template <int imm>[[gnu::always_inline]] nce uint8x16_t decrement_wrap_duplicate(uint32_t a, uint32_t b, mve_pred16_t p) { return vdwdupq_x_n_u8(a, b, imm, p); }
1363template <int imm>[[gnu::always_inline]] nce uint16x8_t decrement_wrap_duplicate(uint32_t a, uint32_t b, mve_pred16_t p) { return vdwdupq_x_n_u16(a, b, imm, p); }
1364template <int imm>[[gnu::always_inline]] nce uint32x4_t decrement_wrap_duplicate(uint32_t a, uint32_t b, mve_pred16_t p) { return vdwdupq_x_n_u32(a, b, imm, p); }
1365template <int imm>[[gnu::always_inline]] nce uint8x16_t decrement_wrap_duplicate(uint32_t *a, uint32_t b, mve_pred16_t p) { return vdwdupq_x_wb_u8(a, b, imm, p); }
1366template <int imm>[[gnu::always_inline]] nce uint16x8_t decrement_wrap_duplicate(uint32_t *a, uint32_t b, mve_pred16_t p) { return vdwdupq_x_wb_u16(a, b, imm, p); }
1367template <int imm>[[gnu::always_inline]] nce uint32x4_t decrement_wrap_duplicate(uint32_t *a, uint32_t b, mve_pred16_t p) { return vdwdupq_x_wb_u32(a, b, imm, p); }
1368template <int imm>[[gnu::always_inline]] nce uint8x16_t increment_duplicate(uint32_t a) { return vidupq_n_u8(a, imm); }
1369template <int imm>[[gnu::always_inline]] nce uint16x8_t increment_duplicate(uint32_t a) { return vidupq_n_u16(a, imm); }
1370template <int imm>[[gnu::always_inline]] nce uint32x4_t increment_duplicate(uint32_t a) { return vidupq_n_u32(a, imm); }
1371template <int imm>[[gnu::always_inline]] nce uint8x16_t increment_duplicate(uint32_t *a) { return vidupq_wb_u8(a, imm); }
1372template <int imm>[[gnu::always_inline]] nce uint16x8_t increment_duplicate(uint32_t *a) { return vidupq_wb_u16(a, imm); }
1373template <int imm>[[gnu::always_inline]] nce uint32x4_t increment_duplicate(uint32_t *a) { return vidupq_wb_u32(a, imm); }
1374template <int imm>[[gnu::always_inline]] nce uint8x16_t increment_duplicate(uint32_t a, mve_pred16_t p) { return vidupq_x_n_u8(a, imm, p); }
1375template <int imm>[[gnu::always_inline]] nce uint16x8_t increment_duplicate(uint32_t a, mve_pred16_t p) { return vidupq_x_n_u16(a, imm, p); }
1376template <int imm>[[gnu::always_inline]] nce uint32x4_t increment_duplicate(uint32_t a, mve_pred16_t p) { return vidupq_x_n_u32(a, imm, p); }
1377template <int imm>[[gnu::always_inline]] nce uint8x16_t increment_duplicate(uint32_t *a, mve_pred16_t p) { return vidupq_x_wb_u8(a, imm, p); }
1378template <int imm>[[gnu::always_inline]] nce uint16x8_t increment_duplicate(uint32_t *a, mve_pred16_t p) { return vidupq_x_wb_u16(a, imm, p); }
1379template <int imm>[[gnu::always_inline]] nce uint32x4_t increment_duplicate(uint32_t *a, mve_pred16_t p) { return vidupq_x_wb_u32(a, imm, p); }
1380template <int imm>[[gnu::always_inline]] nce uint8x16_t increment_wrap_duplicate(uint32_t a, uint32_t b) { return viwdupq_n_u8(a, b, imm); }
1381template <int imm>[[gnu::always_inline]] nce uint16x8_t increment_wrap_duplicate(uint32_t a, uint32_t b) { return viwdupq_n_u16(a, b, imm); }
1382template <int imm>[[gnu::always_inline]] nce uint32x4_t increment_wrap_duplicate(uint32_t a, uint32_t b) { return viwdupq_n_u32(a, b, imm); }
1383template <int imm>[[gnu::always_inline]] nce uint8x16_t increment_wrap_duplicate(uint32_t *a, uint32_t b) { return viwdupq_wb_u8(a, b, imm); }
1384template <int imm>[[gnu::always_inline]] nce uint16x8_t increment_wrap_duplicate(uint32_t *a, uint32_t b) { return viwdupq_wb_u16(a, b, imm); }
1385template <int imm>[[gnu::always_inline]] nce uint32x4_t increment_wrap_duplicate(uint32_t *a, uint32_t b) { return viwdupq_wb_u32(a, b, imm); }
1386template <int imm>[[gnu::always_inline]] nce uint8x16_t increment_wrap_duplicate(uint32_t a, uint32_t b, mve_pred16_t p) { return viwdupq_x_n_u8(a, b, imm, p); }
1387template <int imm>[[gnu::always_inline]] nce uint16x8_t increment_wrap_duplicate(uint32_t a, uint32_t b, mve_pred16_t p) { return viwdupq_x_n_u16(a, b, imm, p); }
1388template <int imm>[[gnu::always_inline]] nce uint32x4_t increment_wrap_duplicate(uint32_t a, uint32_t b, mve_pred16_t p) { return viwdupq_x_n_u32(a, b, imm, p); }
1389template <int imm>[[gnu::always_inline]] nce uint8x16_t increment_wrap_duplicate(uint32_t *a, uint32_t b, mve_pred16_t p) { return viwdupq_x_wb_u8(a, b, imm, p); }
1390template <int imm>[[gnu::always_inline]] nce uint16x8_t increment_wrap_duplicate(uint32_t *a, uint32_t b, mve_pred16_t p) { return viwdupq_x_wb_u16(a, b, imm, p); }
1391template <int imm>[[gnu::always_inline]] nce uint32x4_t increment_wrap_duplicate(uint32_t *a, uint32_t b, mve_pred16_t p) { return viwdupq_x_wb_u32(a, b, imm, p); }
1392[[gnu::always_inline]] nce int8x16_t duplicate(int8_t a, mve_pred16_t p) { return vdupq_x_n_s8(a, p); }
1393[[gnu::always_inline]] nce int16x8_t duplicate(int16_t a, mve_pred16_t p) { return vdupq_x_n_s16(a, p); }
1394[[gnu::always_inline]] nce int32x4_t duplicate(int32_t a, mve_pred16_t p) { return vdupq_x_n_s32(a, p); }
1395[[gnu::always_inline]] nce uint8x16_t duplicate(uint8_t a, mve_pred16_t p) { return vdupq_x_n_u8(a, p); }
1396[[gnu::always_inline]] nce uint16x8_t duplicate(uint16_t a, mve_pred16_t p) { return vdupq_x_n_u16(a, p); }
1397[[gnu::always_inline]] nce uint32x4_t duplicate(uint32_t a, mve_pred16_t p) { return vdupq_x_n_u32(a, p); }
1398template <> [[gnu::always_inline]] nce int8x16_t uninitialized() { return vuninitializedq_s8(); }
1399template <> [[gnu::always_inline]] nce int16x8_t uninitialized() { return vuninitializedq_s16(); }
1400template <> [[gnu::always_inline]] nce int32x4_t uninitialized() { return vuninitializedq_s32(); }
1401template <> [[gnu::always_inline]] nce int64x2_t uninitialized() { return vuninitializedq_s64(); }
1402template <> [[gnu::always_inline]] nce uint8x16_t uninitialized() { return vuninitializedq_u8(); }
1403template <> [[gnu::always_inline]] nce uint16x8_t uninitialized() { return vuninitializedq_u16(); }
1404template <> [[gnu::always_inline]] nce uint32x4_t uninitialized() { return vuninitializedq_u32(); }
1405template <> [[gnu::always_inline]] nce uint64x2_t uninitialized() { return vuninitializedq_u64(); }
1406[[gnu::always_inline]] nce int64x2_t uninitialized(int64x2_t t) { return vuninitializedq(t); }
1407[[gnu::always_inline]] nce int8_t minimum_across_vector(int8_t a, int8x16_t b) { return vminvq_s8(a, b); }
1408[[gnu::always_inline]] nce int16_t minimum_across_vector(int16_t a, int16x8_t b) { return vminvq_s16(a, b); }
1409[[gnu::always_inline]] nce int32_t minimum_across_vector(int32_t a, int32x4_t b) { return vminvq_s32(a, b); }
1410[[gnu::always_inline]] nce uint8_t minimum_across_vector(uint8_t a, uint8x16_t b) { return vminvq_u8(a, b); }
1411[[gnu::always_inline]] nce uint16_t minimum_across_vector(uint16_t a, uint16x8_t b) { return vminvq_u16(a, b); }
1412[[gnu::always_inline]] nce uint32_t minimum_across_vector(uint32_t a, uint32x4_t b) { return vminvq_u32(a, b); }
1413[[gnu::always_inline]] nce int8_t minimum_across_vector(int8_t a, int8x16_t b, mve_pred16_t p) { return vminvq_p_s8(a, b, p); }
1414[[gnu::always_inline]] nce int16_t minimum_across_vector(int16_t a, int16x8_t b, mve_pred16_t p) { return vminvq_p_s16(a, b, p); }
1415[[gnu::always_inline]] nce int32_t minimum_across_vector(int32_t a, int32x4_t b, mve_pred16_t p) { return vminvq_p_s32(a, b, p); }
1416[[gnu::always_inline]] nce uint8_t minimum_across_vector(uint8_t a, uint8x16_t b, mve_pred16_t p) { return vminvq_p_u8(a, b, p); }
1417[[gnu::always_inline]] nce uint16_t minimum_across_vector(uint16_t a, uint16x8_t b, mve_pred16_t p) { return vminvq_p_u16(a, b, p); }
1418[[gnu::always_inline]] nce uint32_t minimum_across_vector(uint32_t a, uint32x4_t b, mve_pred16_t p) { return vminvq_p_u32(a, b, p); }
1419[[gnu::always_inline]] nce uint8_t minimum_absolute_across_vector(uint8_t a, int8x16_t b) { return vminavq_s8(a, b); }
1420[[gnu::always_inline]] nce uint16_t minimum_absolute_across_vector(uint16_t a, int16x8_t b) { return vminavq_s16(a, b); }
1421[[gnu::always_inline]] nce uint32_t minimum_absolute_across_vector(uint32_t a, int32x4_t b) { return vminavq_s32(a, b); }
1422[[gnu::always_inline]] nce uint8_t minimum_absolute_across_vector(uint8_t a, int8x16_t b, mve_pred16_t p) { return vminavq_p_s8(a, b, p); }
1423[[gnu::always_inline]] nce uint16_t minimum_absolute_across_vector(uint16_t a, int16x8_t b, mve_pred16_t p) { return vminavq_p_s16(a, b, p); }
1424[[gnu::always_inline]] nce uint32_t minimum_absolute_across_vector(uint32_t a, int32x4_t b, mve_pred16_t p) { return vminavq_p_s32(a, b, p); }
1425[[gnu::always_inline]] nce int8_t maximum_across_vector(int8_t a, int8x16_t b) { return vmaxvq_s8(a, b); }
1426[[gnu::always_inline]] nce int16_t maximum_across_vector(int16_t a, int16x8_t b) { return vmaxvq_s16(a, b); }
1427[[gnu::always_inline]] nce int32_t maximum_across_vector(int32_t a, int32x4_t b) { return vmaxvq_s32(a, b); }
1428[[gnu::always_inline]] nce uint8_t maximum_across_vector(uint8_t a, uint8x16_t b) { return vmaxvq_u8(a, b); }
1429[[gnu::always_inline]] nce uint16_t maximum_across_vector(uint16_t a, uint16x8_t b) { return vmaxvq_u16(a, b); }
1430[[gnu::always_inline]] nce uint32_t maximum_across_vector(uint32_t a, uint32x4_t b) { return vmaxvq_u32(a, b); }
1431[[gnu::always_inline]] nce int8_t maximum_across_vector(int8_t a, int8x16_t b, mve_pred16_t p) { return vmaxvq_p_s8(a, b, p); }
1432[[gnu::always_inline]] nce int16_t maximum_across_vector(int16_t a, int16x8_t b, mve_pred16_t p) { return vmaxvq_p_s16(a, b, p); }
1433[[gnu::always_inline]] nce int32_t maximum_across_vector(int32_t a, int32x4_t b, mve_pred16_t p) { return vmaxvq_p_s32(a, b, p); }
1434[[gnu::always_inline]] nce uint8_t maximum_across_vector(uint8_t a, uint8x16_t b, mve_pred16_t p) { return vmaxvq_p_u8(a, b, p); }
1435[[gnu::always_inline]] nce uint16_t maximum_across_vector(uint16_t a, uint16x8_t b, mve_pred16_t p) { return vmaxvq_p_u16(a, b, p); }
1436[[gnu::always_inline]] nce uint32_t maximum_across_vector(uint32_t a, uint32x4_t b, mve_pred16_t p) { return vmaxvq_p_u32(a, b, p); }
1437[[gnu::always_inline]] nce uint8_t maximum_absolute_across_vector(uint8_t a, int8x16_t b) { return vmaxavq_s8(a, b); }
1438[[gnu::always_inline]] nce uint16_t maximum_absolute_across_vector(uint16_t a, int16x8_t b) { return vmaxavq_s16(a, b); }
1439[[gnu::always_inline]] nce uint32_t maximum_absolute_across_vector(uint32_t a, int32x4_t b) { return vmaxavq_s32(a, b); }
1440[[gnu::always_inline]] nce uint8_t maximum_absolute_across_vector(uint8_t a, int8x16_t b, mve_pred16_t p) { return vmaxavq_p_s8(a, b, p); }
1441[[gnu::always_inline]] nce uint16_t maximum_absolute_across_vector(uint16_t a, int16x8_t b, mve_pred16_t p) { return vmaxavq_p_s16(a, b, p); }
1442[[gnu::always_inline]] nce uint32_t maximum_absolute_across_vector(uint32_t a, int32x4_t b, mve_pred16_t p) { return vmaxavq_p_s32(a, b, p); }
1443[[gnu::always_inline]] nce uint32_t absolute_subtract_add(uint32_t a, int8x16_t b, int8x16_t c) { return vabavq_s8(a, b, c); }
1444[[gnu::always_inline]] nce uint32_t absolute_subtract_add(uint32_t a, int16x8_t b, int16x8_t c) { return vabavq_s16(a, b, c); }
1445[[gnu::always_inline]] nce uint32_t absolute_subtract_add(uint32_t a, int32x4_t b, int32x4_t c) { return vabavq_s32(a, b, c); }
1446[[gnu::always_inline]] nce uint32_t absolute_subtract_add(uint32_t a, uint8x16_t b, uint8x16_t c) { return vabavq_u8(a, b, c); }
1447[[gnu::always_inline]] nce uint32_t absolute_subtract_add(uint32_t a, uint16x8_t b, uint16x8_t c) { return vabavq_u16(a, b, c); }
1448[[gnu::always_inline]] nce uint32_t absolute_subtract_add(uint32_t a, uint32x4_t b, uint32x4_t c) { return vabavq_u32(a, b, c); }
1449[[gnu::always_inline]] nce uint32_t absolute_subtract_add(uint32_t a, int8x16_t b, int8x16_t c, mve_pred16_t p) { return vabavq_p_s8(a, b, c, p); }
1450[[gnu::always_inline]] nce uint32_t absolute_subtract_add(uint32_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) { return vabavq_p_s16(a, b, c, p); }
1451[[gnu::always_inline]] nce uint32_t absolute_subtract_add(uint32_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) { return vabavq_p_s32(a, b, c, p); }
1452[[gnu::always_inline]] nce uint32_t absolute_subtract_add(uint32_t a, uint8x16_t b, uint8x16_t c, mve_pred16_t p) { return vabavq_p_u8(a, b, c, p); }
1453[[gnu::always_inline]] nce uint32_t absolute_subtract_add(uint32_t a, uint16x8_t b, uint16x8_t c, mve_pred16_t p) { return vabavq_p_u16(a, b, c, p); }
1454[[gnu::always_inline]] nce uint32_t absolute_subtract_add(uint32_t a, uint32x4_t b, uint32x4_t c, mve_pred16_t p) { return vabavq_p_u32(a, b, c, p); }
1455[[gnu::always_inline]] nce int64_t reduce_add_long(int64_t a, int32x4_t b) { return vaddlvaq_s32(a, b); }
1456[[gnu::always_inline]] nce uint64_t reduce_add_long(uint64_t a, uint32x4_t b) { return vaddlvaq_u32(a, b); }
1457[[gnu::always_inline]] nce int64_t reduce_add_long(int64_t a, int32x4_t b, mve_pred16_t p) { return vaddlvaq_p_s32(a, b, p); }
1458[[gnu::always_inline]] nce uint64_t reduce_add_long(uint64_t a, uint32x4_t b, mve_pred16_t p) { return vaddlvaq_p_u32(a, b, p); }
1459[[gnu::always_inline]] nce int32_t reduce_add(int32_t a, int8x16_t b) { return vaddvaq_s8(a, b); }
1460[[gnu::always_inline]] nce int32_t reduce_add(int32_t a, int16x8_t b) { return vaddvaq_s16(a, b); }
1461[[gnu::always_inline]] nce int32_t reduce_add(int32_t a, int32x4_t b) { return vaddvaq_s32(a, b); }
1462[[gnu::always_inline]] nce uint32_t reduce_add(uint32_t a, uint8x16_t b) { return vaddvaq_u8(a, b); }
1463[[gnu::always_inline]] nce uint32_t reduce_add(uint32_t a, uint16x8_t b) { return vaddvaq_u16(a, b); }
1464[[gnu::always_inline]] nce uint32_t reduce_add(uint32_t a, uint32x4_t b) { return vaddvaq_u32(a, b); }
1465[[gnu::always_inline]] nce int32_t reduce_add(int32_t a, int8x16_t b, mve_pred16_t p) { return vaddvaq_p_s8(a, b, p); }
1466[[gnu::always_inline]] nce int32_t reduce_add(int32_t a, int16x8_t b, mve_pred16_t p) { return vaddvaq_p_s16(a, b, p); }
1467[[gnu::always_inline]] nce int32_t reduce_add(int32_t a, int32x4_t b, mve_pred16_t p) { return vaddvaq_p_s32(a, b, p); }
1468[[gnu::always_inline]] nce uint32_t reduce_add(uint32_t a, uint8x16_t b, mve_pred16_t p) { return vaddvaq_p_u8(a, b, p); }
1469[[gnu::always_inline]] nce uint32_t reduce_add(uint32_t a, uint16x8_t b, mve_pred16_t p) { return vaddvaq_p_u16(a, b, p); }
1470[[gnu::always_inline]] nce uint32_t reduce_add(uint32_t a, uint32x4_t b, mve_pred16_t p) { return vaddvaq_p_u32(a, b, p); }
1471[[gnu::always_inline]] nce int64x2_t multiply_long_bottom(int64x2_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmullbq_int_m_s32(inactive, a, b, p); }
1472[[gnu::always_inline]] nce int64x2_t multiply_long_top(int64x2_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vmulltq_int_m_s32(inactive, a, b, p); }
1473[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_bottom(int64x2_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { return vqdmullbq_m_n_s32(inactive, a, b, p); }
1474[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_bottom(int64x2_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqdmullbq_m_s32(inactive, a, b, p); }
1475[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_top(int64x2_t inactive, int32x4_t a, int32_t b, mve_pred16_t p) { return vqdmulltq_m_n_s32(inactive, a, b, p); }
1476[[gnu::always_inline]] nce int64x2_t multiply_double_saturate_long_top(int64x2_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p) { return vqdmulltq_m_s32(inactive, a, b, p); }
1477[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_accumulate(int32_t add, int8x16_t m1, int8x16_t m2) { return vmladavaq_s8(add, m1, m2); }
1478[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_accumulate(int32_t add, int16x8_t m1, int16x8_t m2) { return vmladavaq_s16(add, m1, m2); }
1479[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_accumulate(int32_t add, int32x4_t m1, int32x4_t m2) { return vmladavaq_s32(add, m1, m2); }
1480[[gnu::always_inline]] nce uint32_t multiply_add_dual_reduce_add_accumulate(uint32_t add, uint8x16_t m1, uint8x16_t m2) { return vmladavaq_u8(add, m1, m2); }
1481[[gnu::always_inline]] nce uint32_t multiply_add_dual_reduce_add_accumulate(uint32_t add, uint16x8_t m1, uint16x8_t m2) { return vmladavaq_u16(add, m1, m2); }
1482[[gnu::always_inline]] nce uint32_t multiply_add_dual_reduce_add_accumulate(uint32_t add, uint32x4_t m1, uint32x4_t m2) { return vmladavaq_u32(add, m1, m2); }
1483[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_accumulate(int32_t add, int8x16_t m1, int8x16_t m2, mve_pred16_t p) { return vmladavaq_p_s8(add, m1, m2, p); }
1484[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_accumulate(int32_t add, int16x8_t m1, int16x8_t m2, mve_pred16_t p) { return vmladavaq_p_s16(add, m1, m2, p); }
1485[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_accumulate(int32_t add, int32x4_t m1, int32x4_t m2, mve_pred16_t p) { return vmladavaq_p_s32(add, m1, m2, p); }
1486[[gnu::always_inline]] nce uint32_t multiply_add_dual_reduce_add_accumulate(uint32_t add, uint8x16_t m1, uint8x16_t m2, mve_pred16_t p) { return vmladavaq_p_u8(add, m1, m2, p); }
1487[[gnu::always_inline]] nce uint32_t multiply_add_dual_reduce_add_accumulate(uint32_t add, uint16x8_t m1, uint16x8_t m2, mve_pred16_t p) { return vmladavaq_p_u16(add, m1, m2, p); }
1488[[gnu::always_inline]] nce uint32_t multiply_add_dual_reduce_add_accumulate(uint32_t add, uint32x4_t m1, uint32x4_t m2, mve_pred16_t p) { return vmladavaq_p_u32(add, m1, m2, p); }
1489[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_accumulate_exchange_pairs(int32_t add, int8x16_t m1, int8x16_t m2) { return vmladavaxq_s8(add, m1, m2); }
1490[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_accumulate_exchange_pairs(int32_t add, int16x8_t m1, int16x8_t m2) { return vmladavaxq_s16(add, m1, m2); }
1491[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_accumulate_exchange_pairs(int32_t add, int32x4_t m1, int32x4_t m2) { return vmladavaxq_s32(add, m1, m2); }
1492[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_accumulate_exchange_pairs(int32_t add, int8x16_t m1, int8x16_t m2, mve_pred16_t p) { return vmladavaxq_p_s8(add, m1, m2, p); }
1493[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_accumulate_exchange_pairs(int32_t add, int16x8_t m1, int16x8_t m2, mve_pred16_t p) { return vmladavaxq_p_s16(add, m1, m2, p); }
1494[[gnu::always_inline]] nce int32_t multiply_add_dual_reduce_add_accumulate_exchange_pairs(int32_t add, int32x4_t m1, int32x4_t m2, mve_pred16_t p) { return vmladavaxq_p_s32(add, m1, m2, p); }
1495[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add_accumulate(int64_t add, int16x8_t m1, int16x8_t m2) { return vmlaldavaq_s16(add, m1, m2); }
1496[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add_accumulate(int64_t add, int32x4_t m1, int32x4_t m2) { return vmlaldavaq_s32(add, m1, m2); }
1497[[gnu::always_inline]] nce uint64_t multiply_add_long_dual_reduce_add_accumulate(uint64_t add, uint16x8_t m1, uint16x8_t m2) { return vmlaldavaq_u16(add, m1, m2); }
1498[[gnu::always_inline]] nce uint64_t multiply_add_long_dual_reduce_add_accumulate(uint64_t add, uint32x4_t m1, uint32x4_t m2) { return vmlaldavaq_u32(add, m1, m2); }
1499[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add_accumulate(int64_t add, int16x8_t m1, int16x8_t m2, mve_pred16_t p) { return vmlaldavaq_p_s16(add, m1, m2, p); }
1500[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add_accumulate(int64_t add, int32x4_t m1, int32x4_t m2, mve_pred16_t p) { return vmlaldavaq_p_s32(add, m1, m2, p); }
1501[[gnu::always_inline]] nce uint64_t multiply_add_long_dual_reduce_add_accumulate(uint64_t add, uint16x8_t m1, uint16x8_t m2, mve_pred16_t p) { return vmlaldavaq_p_u16(add, m1, m2, p); }
1502[[gnu::always_inline]] nce uint64_t multiply_add_long_dual_reduce_add_accumulate(uint64_t add, uint32x4_t m1, uint32x4_t m2, mve_pred16_t p) { return vmlaldavaq_p_u32(add, m1, m2, p); }
1503[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add_accumulate_exchange_pairs(int64_t add, int16x8_t m1, int16x8_t m2) { return vmlaldavaxq_s16(add, m1, m2); }
1504[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add_accumulate_exchange_pairs(int64_t add, int32x4_t m1, int32x4_t m2) { return vmlaldavaxq_s32(add, m1, m2); }
1505[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add_accumulate_exchange_pairs(int64_t add, int16x8_t m1, int16x8_t m2, mve_pred16_t p) { return vmlaldavaxq_p_s16(add, m1, m2, p); }
1506[[gnu::always_inline]] nce int64_t multiply_add_long_dual_reduce_add_accumulate_exchange_pairs(int64_t add, int32x4_t m1, int32x4_t m2, mve_pred16_t p) { return vmlaldavaxq_p_s32(add, m1, m2, p); }
1507[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_accumulate(int32_t a, int8x16_t b, int8x16_t c) { return vmlsdavaq_s8(a, b, c); }
1508[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_accumulate(int32_t a, int16x8_t b, int16x8_t c) { return vmlsdavaq_s16(a, b, c); }
1509[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_accumulate(int32_t a, int32x4_t b, int32x4_t c) { return vmlsdavaq_s32(a, b, c); }
1510[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_accumulate(int32_t a, int8x16_t b, int8x16_t c, mve_pred16_t p) { return vmlsdavaq_p_s8(a, b, c, p); }
1511[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_accumulate(int32_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) { return vmlsdavaq_p_s16(a, b, c, p); }
1512[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_accumulate(int32_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) { return vmlsdavaq_p_s32(a, b, c, p); }
1513[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_accumulate_exchange_pairs(int32_t a, int8x16_t b, int8x16_t c) { return vmlsdavaxq_s8(a, b, c); }
1514[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_accumulate_exchange_pairs(int32_t a, int16x8_t b, int16x8_t c) { return vmlsdavaxq_s16(a, b, c); }
1515[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_accumulate_exchange_pairs(int32_t a, int32x4_t b, int32x4_t c) { return vmlsdavaxq_s32(a, b, c); }
1516[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_accumulate_exchange_pairs(int32_t a, int8x16_t b, int8x16_t c, mve_pred16_t p) { return vmlsdavaxq_p_s8(a, b, c, p); }
1517[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_accumulate_exchange_pairs(int32_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) { return vmlsdavaxq_p_s16(a, b, c, p); }
1518[[gnu::always_inline]] nce int32_t multiply_subtract_dual_reduce_add_accumulate_exchange_pairs(int32_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) { return vmlsdavaxq_p_s32(a, b, c, p); }
1519[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add_accumulate(int64_t a, int16x8_t b, int16x8_t c) { return vmlsldavaq_s16(a, b, c); }
1520[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add_accumulate(int64_t a, int32x4_t b, int32x4_t c) { return vmlsldavaq_s32(a, b, c); }
1521[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add_accumulate(int64_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) { return vmlsldavaq_p_s16(a, b, c, p); }
1522[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add_accumulate(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) { return vmlsldavaq_p_s32(a, b, c, p); }
1523[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add_accumulate_exchange_pairs(int64_t a, int16x8_t b, int16x8_t c) { return vmlsldavaxq_s16(a, b, c); }
1524[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add_accumulate_exchange_pairs(int64_t a, int32x4_t b, int32x4_t c) { return vmlsldavaxq_s32(a, b, c); }
1525[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add_accumulate_exchange_pairs(int64_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) { return vmlsldavaxq_p_s16(a, b, c, p); }
1526[[gnu::always_inline]] nce int64_t multiply_subtract_long_dual_reduce_add_accumulate_exchange_pairs(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) { return vmlsldavaxq_p_s32(a, b, c, p); }
1527[[gnu::always_inline]] nce int64_t multiply_add_long_round_dual_reduce_add_high_accumulate(int64_t a, int32x4_t b, int32x4_t c) { return vrmlaldavhaq_s32(a, b, c); }
1528[[gnu::always_inline]] nce uint64_t multiply_add_long_round_dual_reduce_add_high_accumulate(uint64_t a, uint32x4_t b, uint32x4_t c) { return vrmlaldavhaq_u32(a, b, c); }
1529[[gnu::always_inline]] nce int64_t multiply_add_long_round_dual_reduce_add_high_accumulate(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) { return vrmlaldavhaq_p_s32(a, b, c, p); }
1530[[gnu::always_inline]] nce uint64_t multiply_add_long_round_dual_reduce_add_high_accumulate(uint64_t a, uint32x4_t b, uint32x4_t c, mve_pred16_t p) { return vrmlaldavhaq_p_u32(a, b, c, p); }
1531[[gnu::always_inline]] nce int64_t multiply_add_long_round_dual_reduce_add_high_accumulate_exchange_pairs(int64_t a, int32x4_t b, int32x4_t c) { return vrmlaldavhaxq_s32(a, b, c); }
1532[[gnu::always_inline]] nce int64_t multiply_add_long_round_dual_reduce_add_high_accumulate_exchange_pairs(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) { return vrmlaldavhaxq_p_s32(a, b, c, p); }
1533[[gnu::always_inline]] nce int64_t multiply_subtract_long_round_dual_reduce_add_high_accumulate(int64_t a, int32x4_t b, int32x4_t c) { return vrmlsldavhaq_s32(a, b, c); }
1534[[gnu::always_inline]] nce int64_t multiply_subtract_long_round_dual_reduce_add_high_accumulate(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) { return vrmlsldavhaq_p_s32(a, b, c, p); }
1535[[gnu::always_inline]] nce int64_t multiply_subtract_long_round_dual_reduce_add_high_accumulate_exchange_pairs(int64_t a, int32x4_t b, int32x4_t c) { return vrmlsldavhaxq_s32(a, b, c); }
1536[[gnu::always_inline]] nce int64_t multiply_subtract_long_round_dual_reduce_add_high_accumulate_exchange_pairs(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) { return vrmlsldavhaxq_p_s32(a, b, c, p); }
1537[[gnu::always_inline]] nce int16x8_t bitwise_not(const int16_t imm) { return vmvnq_n_s16(imm); }
1538[[gnu::always_inline]] nce int32x4_t bitwise_not(const int32_t imm) { return vmvnq_n_s32(imm); }
1539[[gnu::always_inline]] nce uint16x8_t bitwise_not(const uint16_t imm) { return vmvnq_n_u16(imm); }
1540[[gnu::always_inline]] nce uint32x4_t bitwise_not(const uint32_t imm) { return vmvnq_n_u32(imm); }
1541[[gnu::always_inline]] nce int16x8_t bitwise_not(const int16_t imm, mve_pred16_t p) { return vmvnq_x_n_s16(imm, p); }
1542[[gnu::always_inline]] nce int32x4_t bitwise_not(const int32_t imm, mve_pred16_t p) { return vmvnq_x_n_s32(imm, p); }
1543[[gnu::always_inline]] nce uint16x8_t bitwise_not(const uint16_t imm, mve_pred16_t p) { return vmvnq_x_n_u16(imm, p); }
1544[[gnu::always_inline]] nce uint32x4_t bitwise_not(const uint32_t imm, mve_pred16_t p) { return vmvnq_x_n_u32(imm, p); }
1545[[gnu::always_inline]] inline int8x16x2_t load2(int8_t const *addr) { return vld2q_s8(addr); }
1546[[gnu::always_inline]] inline int16x8x2_t load2(int16_t const *addr) { return vld2q_s16(addr); }
1547[[gnu::always_inline]] inline int32x4x2_t load2(int32_t const *addr) { return vld2q_s32(addr); }
1548[[gnu::always_inline]] inline uint8x16x2_t load2(uint8_t const *addr) { return vld2q_u8(addr); }
1549[[gnu::always_inline]] inline uint16x8x2_t load2(uint16_t const *addr) { return vld2q_u16(addr); }
1550[[gnu::always_inline]] inline uint32x4x2_t load2(uint32_t const *addr) { return vld2q_u32(addr); }
1551[[gnu::always_inline]] inline int8x16x4_t load4(int8_t const *addr) { return vld4q_s8(addr); }
1552[[gnu::always_inline]] inline int16x8x4_t load4(int16_t const *addr) { return vld4q_s16(addr); }
1553[[gnu::always_inline]] inline int32x4x4_t load4(int32_t const *addr) { return vld4q_s32(addr); }
1554[[gnu::always_inline]] inline uint8x16x4_t load4(uint8_t const *addr) { return vld4q_u8(addr); }
1555[[gnu::always_inline]] inline uint16x8x4_t load4(uint16_t const *addr) { return vld4q_u16(addr); }
1556[[gnu::always_inline]] inline uint32x4x4_t load4(uint32_t const *addr) { return vld4q_u32(addr); }
1557[[gnu::always_inline]] inline int8x16_t load1(int8_t const *base, mve_pred16_t p) { return vld1q_z_s8(base, p); }
1558[[gnu::always_inline]] inline int16x8_t load1(int16_t const *base, mve_pred16_t p) { return vld1q_z_s16(base, p); }
1559[[gnu::always_inline]] inline int32x4_t load1(int32_t const *base, mve_pred16_t p) { return vld1q_z_s32(base, p); }
1560[[gnu::always_inline]] inline uint8x16_t load1(uint8_t const *base, mve_pred16_t p) { return vld1q_z_u8(base, p); }
1561[[gnu::always_inline]] inline uint16x8_t load1(uint16_t const *base, mve_pred16_t p) { return vld1q_z_u16(base, p); }
1562[[gnu::always_inline]] inline uint32x4_t load1(uint32_t const *base, mve_pred16_t p) { return vld1q_z_u32(base, p); }
1563template <> [[gnu::always_inline]] nce int8x16_t load_byte(int8_t const *base) { return vldrbq_s8(base); }
1564template <> [[gnu::always_inline]] nce int16x8_t load_byte(int8_t const *base) { return vldrbq_s16(base); }
1565template <> [[gnu::always_inline]] nce int32x4_t load_byte(int8_t const *base) { return vldrbq_s32(base); }
1566template <> [[gnu::always_inline]] nce uint8x16_t load_byte(uint8_t const *base) { return vldrbq_u8(base); }
1567template <> [[gnu::always_inline]] nce uint16x8_t load_byte(uint8_t const *base) { return vldrbq_u16(base); }
1568template <> [[gnu::always_inline]] nce uint32x4_t load_byte(uint8_t const *base) { return vldrbq_u32(base); }
1569template <> [[gnu::always_inline]] nce int8x16_t load_byte(int8_t const *base, mve_pred16_t p) { return vldrbq_z_s8(base, p); }
1570template <> [[gnu::always_inline]] nce int16x8_t load_byte(int8_t const *base, mve_pred16_t p) { return vldrbq_z_s16(base, p); }
1571template <> [[gnu::always_inline]] nce int32x4_t load_byte(int8_t const *base, mve_pred16_t p) { return vldrbq_z_s32(base, p); }
1572template <> [[gnu::always_inline]] nce uint8x16_t load_byte(uint8_t const *base, mve_pred16_t p) { return vldrbq_z_u8(base, p); }
1573template <> [[gnu::always_inline]] nce uint16x8_t load_byte(uint8_t const *base, mve_pred16_t p) { return vldrbq_z_u16(base, p); }
1574template <> [[gnu::always_inline]] nce uint32x4_t load_byte(uint8_t const *base, mve_pred16_t p) { return vldrbq_z_u32(base, p); }
1575template <> [[gnu::always_inline]] nce int16x8_t load_halfword(int16_t const *base) { return vldrhq_s16(base); }
1576template <> [[gnu::always_inline]] nce int32x4_t load_halfword(int16_t const *base) { return vldrhq_s32(base); }
1577template <> [[gnu::always_inline]] nce uint16x8_t load_halfword(uint16_t const *base) { return vldrhq_u16(base); }
1578template <> [[gnu::always_inline]] nce uint32x4_t load_halfword(uint16_t const *base) { return vldrhq_u32(base); }
1579template <> [[gnu::always_inline]] nce int16x8_t load_halfword(int16_t const *base, mve_pred16_t p) { return vldrhq_z_s16(base, p); }
1580template <> [[gnu::always_inline]] nce int32x4_t load_halfword(int16_t const *base, mve_pred16_t p) { return vldrhq_z_s32(base, p); }
1581template <> [[gnu::always_inline]] nce uint16x8_t load_halfword(uint16_t const *base, mve_pred16_t p) { return vldrhq_z_u16(base, p); }
1582template <> [[gnu::always_inline]] nce uint32x4_t load_halfword(uint16_t const *base, mve_pred16_t p) { return vldrhq_z_u32(base, p); }
1583[[gnu::always_inline]] nce int32x4_t load_word(int32_t const *base) { return vldrwq_s32(base); }
1584[[gnu::always_inline]] nce uint32x4_t load_word(uint32_t const *base) { return vldrwq_u32(base); }
1585[[gnu::always_inline]] nce int32x4_t load_word(int32_t const *base, mve_pred16_t p) { return vldrwq_z_s32(base, p); }
1586[[gnu::always_inline]] nce uint32x4_t load_word(uint32_t const *base, mve_pred16_t p) { return vldrwq_z_u32(base, p); }
1587[[gnu::always_inline]] nce int16x8_t load_halfword_gather_offset(int16_t const *base, uint16x8_t offset) { return vldrhq_gather_offset_s16(base, offset); }
1588[[gnu::always_inline]] nce int32x4_t load_halfword_gather_offset(int16_t const *base, uint32x4_t offset) { return vldrhq_gather_offset_s32(base, offset); }
1589[[gnu::always_inline]] nce uint16x8_t load_halfword_gather_offset(uint16_t const *base, uint16x8_t offset) { return vldrhq_gather_offset_u16(base, offset); }
1590[[gnu::always_inline]] nce uint32x4_t load_halfword_gather_offset(uint16_t const *base, uint32x4_t offset) { return vldrhq_gather_offset_u32(base, offset); }
1591[[gnu::always_inline]] nce int16x8_t load_halfword_gather_offset(int16_t const *base, uint16x8_t offset, mve_pred16_t p) { return vldrhq_gather_offset_z_s16(base, offset, p); }
1592[[gnu::always_inline]] nce int32x4_t load_halfword_gather_offset(int16_t const *base, uint32x4_t offset, mve_pred16_t p) { return vldrhq_gather_offset_z_s32(base, offset, p); }
1593[[gnu::always_inline]] nce uint16x8_t load_halfword_gather_offset(uint16_t const *base, uint16x8_t offset, mve_pred16_t p) { return vldrhq_gather_offset_z_u16(base, offset, p); }
1594[[gnu::always_inline]] nce uint32x4_t load_halfword_gather_offset(uint16_t const *base, uint32x4_t offset, mve_pred16_t p) { return vldrhq_gather_offset_z_u32(base, offset, p); }
1595[[gnu::always_inline]] nce int16x8_t load_halfword_gather_shifted_offset(int16_t const *base, uint16x8_t offset) { return vldrhq_gather_shifted_offset_s16(base, offset); }
1596[[gnu::always_inline]] nce int32x4_t load_halfword_gather_shifted_offset(int16_t const *base, uint32x4_t offset) { return vldrhq_gather_shifted_offset_s32(base, offset); }
1597[[gnu::always_inline]] nce uint16x8_t load_halfword_gather_shifted_offset(uint16_t const *base, uint16x8_t offset) { return vldrhq_gather_shifted_offset_u16(base, offset); }
1598[[gnu::always_inline]] nce uint32x4_t load_halfword_gather_shifted_offset(uint16_t const *base, uint32x4_t offset) { return vldrhq_gather_shifted_offset_u32(base, offset); }
1599[[gnu::always_inline]] nce int16x8_t load_halfword_gather_shifted_offset(int16_t const *base, uint16x8_t offset, mve_pred16_t p) { return vldrhq_gather_shifted_offset_z_s16(base, offset, p); }
1600[[gnu::always_inline]] nce int32x4_t load_halfword_gather_shifted_offset(int16_t const *base, uint32x4_t offset, mve_pred16_t p) { return vldrhq_gather_shifted_offset_z_s32(base, offset, p); }
1601[[gnu::always_inline]] nce uint16x8_t load_halfword_gather_shifted_offset(uint16_t const *base, uint16x8_t offset, mve_pred16_t p) { return vldrhq_gather_shifted_offset_z_u16(base, offset, p); }
1602[[gnu::always_inline]] nce uint32x4_t load_halfword_gather_shifted_offset(uint16_t const *base, uint32x4_t offset, mve_pred16_t p) { return vldrhq_gather_shifted_offset_z_u32(base, offset, p); }
1603[[gnu::always_inline]] nce int8x16_t load_byte_gather_offset(int8_t const *base, uint8x16_t offset) { return vldrbq_gather_offset_s8(base, offset); }
1604[[gnu::always_inline]] nce int16x8_t load_byte_gather_offset(int8_t const *base, uint16x8_t offset) { return vldrbq_gather_offset_s16(base, offset); }
1605[[gnu::always_inline]] nce int32x4_t load_byte_gather_offset(int8_t const *base, uint32x4_t offset) { return vldrbq_gather_offset_s32(base, offset); }
1606[[gnu::always_inline]] nce uint8x16_t load_byte_gather_offset(uint8_t const *base, uint8x16_t offset) { return vldrbq_gather_offset_u8(base, offset); }
1607[[gnu::always_inline]] nce uint16x8_t load_byte_gather_offset(uint8_t const *base, uint16x8_t offset) { return vldrbq_gather_offset_u16(base, offset); }
1608[[gnu::always_inline]] nce uint32x4_t load_byte_gather_offset(uint8_t const *base, uint32x4_t offset) { return vldrbq_gather_offset_u32(base, offset); }
1609[[gnu::always_inline]] nce int8x16_t load_byte_gather_offset(int8_t const *base, uint8x16_t offset, mve_pred16_t p) { return vldrbq_gather_offset_z_s8(base, offset, p); }
1610[[gnu::always_inline]] nce int16x8_t load_byte_gather_offset(int8_t const *base, uint16x8_t offset, mve_pred16_t p) { return vldrbq_gather_offset_z_s16(base, offset, p); }
1611[[gnu::always_inline]] nce int32x4_t load_byte_gather_offset(int8_t const *base, uint32x4_t offset, mve_pred16_t p) { return vldrbq_gather_offset_z_s32(base, offset, p); }
1612[[gnu::always_inline]] nce uint8x16_t load_byte_gather_offset(uint8_t const *base, uint8x16_t offset, mve_pred16_t p) { return vldrbq_gather_offset_z_u8(base, offset, p); }
1613[[gnu::always_inline]] nce uint16x8_t load_byte_gather_offset(uint8_t const *base, uint16x8_t offset, mve_pred16_t p) { return vldrbq_gather_offset_z_u16(base, offset, p); }
1614[[gnu::always_inline]] nce uint32x4_t load_byte_gather_offset(uint8_t const *base, uint32x4_t offset, mve_pred16_t p) { return vldrbq_gather_offset_z_u32(base, offset, p); }
1615[[gnu::always_inline]] nce int32x4_t load_word_gather_offset(int32_t const *base, uint32x4_t offset) { return vldrwq_gather_offset_s32(base, offset); }
1616[[gnu::always_inline]] nce uint32x4_t load_word_gather_offset(uint32_t const *base, uint32x4_t offset) { return vldrwq_gather_offset_u32(base, offset); }
1617[[gnu::always_inline]] nce int32x4_t load_word_gather_offset(int32_t const *base, uint32x4_t offset, mve_pred16_t p) { return vldrwq_gather_offset_z_s32(base, offset, p); }
1618[[gnu::always_inline]] nce uint32x4_t load_word_gather_offset(uint32_t const *base, uint32x4_t offset, mve_pred16_t p) { return vldrwq_gather_offset_z_u32(base, offset, p); }
1619[[gnu::always_inline]] nce int32x4_t load_word_gather_shifted_offset(int32_t const *base, uint32x4_t offset) { return vldrwq_gather_shifted_offset_s32(base, offset); }
1620[[gnu::always_inline]] nce uint32x4_t load_word_gather_shifted_offset(uint32_t const *base, uint32x4_t offset) { return vldrwq_gather_shifted_offset_u32(base, offset); }
1621[[gnu::always_inline]] nce int32x4_t load_word_gather_shifted_offset(int32_t const *base, uint32x4_t offset, mve_pred16_t p) { return vldrwq_gather_shifted_offset_z_s32(base, offset, p); }
1622[[gnu::always_inline]] nce uint32x4_t load_word_gather_shifted_offset(uint32_t const *base, uint32x4_t offset, mve_pred16_t p) { return vldrwq_gather_shifted_offset_z_u32(base, offset, p); }
1623[[gnu::always_inline]] nce int64x2_t load_doubleword_gather_offset(int64_t const *base, uint64x2_t offset) { return vldrdq_gather_offset_s64(base, offset); }
1624[[gnu::always_inline]] nce uint64x2_t load_doubleword_gather_offset(uint64_t const *base, uint64x2_t offset) { return vldrdq_gather_offset_u64(base, offset); }
1625[[gnu::always_inline]] nce int64x2_t load_doubleword_gather_offset(int64_t const *base, uint64x2_t offset, mve_pred16_t p) { return vldrdq_gather_offset_z_s64(base, offset, p); }
1626[[gnu::always_inline]] nce uint64x2_t load_doubleword_gather_offset(uint64_t const *base, uint64x2_t offset, mve_pred16_t p) { return vldrdq_gather_offset_z_u64(base, offset, p); }
1627[[gnu::always_inline]] nce int64x2_t load_doubleword_gather_shifted_offset(int64_t const *base, uint64x2_t offset) { return vldrdq_gather_shifted_offset_s64(base, offset); }
1628[[gnu::always_inline]] nce uint64x2_t load_doubleword_gather_shifted_offset(uint64_t const *base, uint64x2_t offset) { return vldrdq_gather_shifted_offset_u64(base, offset); }
1629[[gnu::always_inline]] nce int64x2_t load_doubleword_gather_shifted_offset(int64_t const *base, uint64x2_t offset, mve_pred16_t p) { return vldrdq_gather_shifted_offset_z_s64(base, offset, p); }
1630[[gnu::always_inline]] nce uint64x2_t load_doubleword_gather_shifted_offset(uint64_t const *base, uint64x2_t offset, mve_pred16_t p) { return vldrdq_gather_shifted_offset_z_u64(base, offset, p); }
1631[[gnu::always_inline]] inline void store2(int8_t *addr, int8x16x2_t value) { return vst2q_s8(addr, value); }
1632[[gnu::always_inline]] inline void store2(int16_t *addr, int16x8x2_t value) { return vst2q_s16(addr, value); }
1633[[gnu::always_inline]] inline void store2(int32_t *addr, int32x4x2_t value) { return vst2q_s32(addr, value); }
1634[[gnu::always_inline]] inline void store2(uint8_t *addr, uint8x16x2_t value) { return vst2q_u8(addr, value); }
1635[[gnu::always_inline]] inline void store2(uint16_t *addr, uint16x8x2_t value) { return vst2q_u16(addr, value); }
1636[[gnu::always_inline]] inline void store2(uint32_t *addr, uint32x4x2_t value) { return vst2q_u32(addr, value); }
1637[[gnu::always_inline]] inline void store4(int8_t *addr, int8x16x4_t value) { return vst4q_s8(addr, value); }
1638[[gnu::always_inline]] inline void store4(int16_t *addr, int16x8x4_t value) { return vst4q_s16(addr, value); }
1639[[gnu::always_inline]] inline void store4(int32_t *addr, int32x4x4_t value) { return vst4q_s32(addr, value); }
1640[[gnu::always_inline]] inline void store4(uint8_t *addr, uint8x16x4_t value) { return vst4q_u8(addr, value); }
1641[[gnu::always_inline]] inline void store4(uint16_t *addr, uint16x8x4_t value) { return vst4q_u16(addr, value); }
1642[[gnu::always_inline]] inline void store4(uint32_t *addr, uint32x4x4_t value) { return vst4q_u32(addr, value); }
1643[[gnu::always_inline]] inline void store1(int8_t *base, int8x16_t value, mve_pred16_t p) { return vst1q_p_s8(base, value, p); }
1644[[gnu::always_inline]] inline void store1(int16_t *base, int16x8_t value, mve_pred16_t p) { return vst1q_p_s16(base, value, p); }
1645[[gnu::always_inline]] inline void store1(int32_t *base, int32x4_t value, mve_pred16_t p) { return vst1q_p_s32(base, value, p); }
1646[[gnu::always_inline]] inline void store1(uint8_t *base, uint8x16_t value, mve_pred16_t p) { return vst1q_p_u8(base, value, p); }
1647[[gnu::always_inline]] inline void store1(uint16_t *base, uint16x8_t value, mve_pred16_t p) { return vst1q_p_u16(base, value, p); }
1648[[gnu::always_inline]] inline void store1(uint32_t *base, uint32x4_t value, mve_pred16_t p) { return vst1q_p_u32(base, value, p); }
1649[[gnu::always_inline]] nce void store_byte(int8_t *base, int8x16_t value) { return vstrbq_s8(base, value); }
1650[[gnu::always_inline]] nce void store_byte(int8_t *base, int16x8_t value) { return vstrbq_s16(base, value); }
1651[[gnu::always_inline]] nce void store_byte(int8_t *base, int32x4_t value) { return vstrbq_s32(base, value); }
1652[[gnu::always_inline]] nce void store_byte(uint8_t *base, uint8x16_t value) { return vstrbq_u8(base, value); }
1653[[gnu::always_inline]] nce void store_byte(uint8_t *base, uint16x8_t value) { return vstrbq_u16(base, value); }
1654[[gnu::always_inline]] nce void store_byte(uint8_t *base, uint32x4_t value) { return vstrbq_u32(base, value); }
1655[[gnu::always_inline]] nce void store_byte(int8_t *base, int8x16_t value, mve_pred16_t p) { return vstrbq_p_s8(base, value, p); }
1656[[gnu::always_inline]] nce void store_byte(int8_t *base, int16x8_t value, mve_pred16_t p) { return vstrbq_p_s16(base, value, p); }
1657[[gnu::always_inline]] nce void store_byte(int8_t *base, int32x4_t value, mve_pred16_t p) { return vstrbq_p_s32(base, value, p); }
1658[[gnu::always_inline]] nce void store_byte(uint8_t *base, uint8x16_t value, mve_pred16_t p) { return vstrbq_p_u8(base, value, p); }
1659[[gnu::always_inline]] nce void store_byte(uint8_t *base, uint16x8_t value, mve_pred16_t p) { return vstrbq_p_u16(base, value, p); }
1660[[gnu::always_inline]] nce void store_byte(uint8_t *base, uint32x4_t value, mve_pred16_t p) { return vstrbq_p_u32(base, value, p); }
1661[[gnu::always_inline]] nce void store_halfword(int16_t *base, int16x8_t value) { return vstrhq_s16(base, value); }
1662[[gnu::always_inline]] nce void store_halfword(int16_t *base, int32x4_t value) { return vstrhq_s32(base, value); }
1663[[gnu::always_inline]] nce void store_halfword(uint16_t *base, uint16x8_t value) { return vstrhq_u16(base, value); }
1664[[gnu::always_inline]] nce void store_halfword(uint16_t *base, uint32x4_t value) { return vstrhq_u32(base, value); }
1665[[gnu::always_inline]] nce void store_halfword(int16_t *base, int16x8_t value, mve_pred16_t p) { return vstrhq_p_s16(base, value, p); }
1666[[gnu::always_inline]] nce void store_halfword(int16_t *base, int32x4_t value, mve_pred16_t p) { return vstrhq_p_s32(base, value, p); }
1667[[gnu::always_inline]] nce void store_halfword(uint16_t *base, uint16x8_t value, mve_pred16_t p) { return vstrhq_p_u16(base, value, p); }
1668[[gnu::always_inline]] nce void store_halfword(uint16_t *base, uint32x4_t value, mve_pred16_t p) { return vstrhq_p_u32(base, value, p); }
1669[[gnu::always_inline]] nce void store_word(int32_t *base, int32x4_t value) { return vstrwq_s32(base, value); }
1670[[gnu::always_inline]] nce void store_word(uint32_t *base, uint32x4_t value) { return vstrwq_u32(base, value); }
1671[[gnu::always_inline]] nce void store_word(int32_t *base, int32x4_t value, mve_pred16_t p) { return vstrwq_p_s32(base, value, p); }
1672[[gnu::always_inline]] nce void store_word(uint32_t *base, uint32x4_t value, mve_pred16_t p) { return vstrwq_p_u32(base, value, p); }
1673[[gnu::always_inline]] nce void store_byte_scatter_offset(int8_t *base, uint8x16_t offset, int8x16_t value) { return vstrbq_scatter_offset_s8(base, offset, value); }
1674[[gnu::always_inline]] nce void store_byte_scatter_offset(int8_t *base, uint16x8_t offset, int16x8_t value) { return vstrbq_scatter_offset_s16(base, offset, value); }
1675[[gnu::always_inline]] nce void store_byte_scatter_offset(int8_t *base, uint32x4_t offset, int32x4_t value) { return vstrbq_scatter_offset_s32(base, offset, value); }
1676[[gnu::always_inline]] nce void store_byte_scatter_offset(uint8_t *base, uint8x16_t offset, uint8x16_t value) { return vstrbq_scatter_offset_u8(base, offset, value); }
1677[[gnu::always_inline]] nce void store_byte_scatter_offset(uint8_t *base, uint16x8_t offset, uint16x8_t value) { return vstrbq_scatter_offset_u16(base, offset, value); }
1678[[gnu::always_inline]] nce void store_byte_scatter_offset(uint8_t *base, uint32x4_t offset, uint32x4_t value) { return vstrbq_scatter_offset_u32(base, offset, value); }
1679[[gnu::always_inline]] nce void store_byte_scatter_offset(int8_t *base, uint8x16_t offset, int8x16_t value, mve_pred16_t p) { return vstrbq_scatter_offset_p_s8(base, offset, value, p); }
1680[[gnu::always_inline]] nce void store_byte_scatter_offset(int8_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p) { return vstrbq_scatter_offset_p_s16(base, offset, value, p); }
1681[[gnu::always_inline]] nce void store_byte_scatter_offset(int8_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) { return vstrbq_scatter_offset_p_s32(base, offset, value, p); }
1682[[gnu::always_inline]] nce void store_byte_scatter_offset(uint8_t *base, uint8x16_t offset, uint8x16_t value, mve_pred16_t p) { return vstrbq_scatter_offset_p_u8(base, offset, value, p); }
1683[[gnu::always_inline]] nce void store_byte_scatter_offset(uint8_t *base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p) { return vstrbq_scatter_offset_p_u16(base, offset, value, p); }
1684[[gnu::always_inline]] nce void store_byte_scatter_offset(uint8_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p) { return vstrbq_scatter_offset_p_u32(base, offset, value, p); }
1685[[gnu::always_inline]] nce void store_halfword_scatter_offset(int16_t *base, uint16x8_t offset, int16x8_t value) { return vstrhq_scatter_offset_s16(base, offset, value); }
1686[[gnu::always_inline]] nce void store_halfword_scatter_offset(int16_t *base, uint32x4_t offset, int32x4_t value) { return vstrhq_scatter_offset_s32(base, offset, value); }
1687[[gnu::always_inline]] nce void store_halfword_scatter_offset(uint16_t *base, uint16x8_t offset, uint16x8_t value) { return vstrhq_scatter_offset_u16(base, offset, value); }
1688[[gnu::always_inline]] nce void store_halfword_scatter_offset(uint16_t *base, uint32x4_t offset, uint32x4_t value) { return vstrhq_scatter_offset_u32(base, offset, value); }
1689[[gnu::always_inline]] nce void store_halfword_scatter_offset(int16_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p) { return vstrhq_scatter_offset_p_s16(base, offset, value, p); }
1690[[gnu::always_inline]] nce void store_halfword_scatter_offset(int16_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) { return vstrhq_scatter_offset_p_s32(base, offset, value, p); }
1691[[gnu::always_inline]] nce void store_halfword_scatter_offset(uint16_t *base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p) { return vstrhq_scatter_offset_p_u16(base, offset, value, p); }
1692[[gnu::always_inline]] nce void store_halfword_scatter_offset(uint16_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p) { return vstrhq_scatter_offset_p_u32(base, offset, value, p); }
1693[[gnu::always_inline]] nce void store_halfword_scatter_shifted_offset(int16_t *base, uint16x8_t offset, int16x8_t value) { return vstrhq_scatter_shifted_offset_s16(base, offset, value); }
1694[[gnu::always_inline]] nce void store_halfword_scatter_shifted_offset(int16_t *base, uint32x4_t offset, int32x4_t value) { return vstrhq_scatter_shifted_offset_s32(base, offset, value); }
1695[[gnu::always_inline]] nce void store_halfword_scatter_shifted_offset(uint16_t *base, uint16x8_t offset, uint16x8_t value) { return vstrhq_scatter_shifted_offset_u16(base, offset, value); }
1696[[gnu::always_inline]] nce void store_halfword_scatter_shifted_offset(uint16_t *base, uint32x4_t offset, uint32x4_t value) { return vstrhq_scatter_shifted_offset_u32(base, offset, value); }
1697[[gnu::always_inline]] nce void store_halfword_scatter_shifted_offset(int16_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p) { return vstrhq_scatter_shifted_offset_p_s16(base, offset, value, p); }
1698[[gnu::always_inline]] nce void store_halfword_scatter_shifted_offset(int16_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) { return vstrhq_scatter_shifted_offset_p_s32(base, offset, value, p); }
1699[[gnu::always_inline]] nce void store_halfword_scatter_shifted_offset(uint16_t *base, uint16x8_t offset, uint16x8_t value, mve_pred16_t p) { return vstrhq_scatter_shifted_offset_p_u16(base, offset, value, p); }
1700[[gnu::always_inline]] nce void store_halfword_scatter_shifted_offset(uint16_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p) { return vstrhq_scatter_shifted_offset_p_u32(base, offset, value, p); }
1701[[gnu::always_inline]] nce void store_word_scatter_offset(int32_t *base, uint32x4_t offset, int32x4_t value) { return vstrwq_scatter_offset_s32(base, offset, value); }
1702[[gnu::always_inline]] nce void store_word_scatter_offset(uint32_t *base, uint32x4_t offset, uint32x4_t value) { return vstrwq_scatter_offset_u32(base, offset, value); }
1703[[gnu::always_inline]] nce void store_word_scatter_offset(int32_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) { return vstrwq_scatter_offset_p_s32(base, offset, value, p); }
1704[[gnu::always_inline]] nce void store_word_scatter_offset(uint32_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p) { return vstrwq_scatter_offset_p_u32(base, offset, value, p); }
1705[[gnu::always_inline]] nce void store_word_scatter_shifted_offset(int32_t *base, uint32x4_t offset, int32x4_t value) { return vstrwq_scatter_shifted_offset_s32(base, offset, value); }
1706[[gnu::always_inline]] nce void store_word_scatter_shifted_offset(uint32_t *base, uint32x4_t offset, uint32x4_t value) { return vstrwq_scatter_shifted_offset_u32(base, offset, value); }
1707[[gnu::always_inline]] nce void store_word_scatter_shifted_offset(int32_t *base, uint32x4_t offset, int32x4_t value, mve_pred16_t p) { return vstrwq_scatter_shifted_offset_p_s32(base, offset, value, p); }
1708[[gnu::always_inline]] nce void store_word_scatter_shifted_offset(uint32_t *base, uint32x4_t offset, uint32x4_t value, mve_pred16_t p) { return vstrwq_scatter_shifted_offset_p_u32(base, offset, value, p); }
1709[[gnu::always_inline]] nce void store_doubleword_scatter_offset(int64_t *base, uint64x2_t offset, int64x2_t value) { return vstrdq_scatter_offset_s64(base, offset, value); }
1710[[gnu::always_inline]] nce void store_doubleword_scatter_offset(uint64_t *base, uint64x2_t offset, uint64x2_t value) { return vstrdq_scatter_offset_u64(base, offset, value); }
1711[[gnu::always_inline]] nce void store_doubleword_scatter_offset(int64_t *base, uint64x2_t offset, int64x2_t value, mve_pred16_t p) { return vstrdq_scatter_offset_p_s64(base, offset, value, p); }
1712[[gnu::always_inline]] nce void store_doubleword_scatter_offset(uint64_t *base, uint64x2_t offset, uint64x2_t value, mve_pred16_t p) { return vstrdq_scatter_offset_p_u64(base, offset, value, p); }
1713[[gnu::always_inline]] nce void store_doubleword_scatter_shifted_offset(int64_t *base, uint64x2_t offset, int64x2_t value) { return vstrdq_scatter_shifted_offset_s64(base, offset, value); }
1714[[gnu::always_inline]] nce void store_doubleword_scatter_shifted_offset(uint64_t *base, uint64x2_t offset, uint64x2_t value) { return vstrdq_scatter_shifted_offset_u64(base, offset, value); }
1715[[gnu::always_inline]] nce void store_doubleword_scatter_shifted_offset(int64_t *base, uint64x2_t offset, int64x2_t value, mve_pred16_t p) { return vstrdq_scatter_shifted_offset_p_s64(base, offset, value, p); }
1716[[gnu::always_inline]] nce void store_doubleword_scatter_shifted_offset(uint64_t *base, uint64x2_t offset, uint64x2_t value, mve_pred16_t p) { return vstrdq_scatter_shifted_offset_p_u64(base, offset, value, p); }
1717[[gnu::always_inline]] nce uint64_t shift_left_round_saturate_long_unsigned(uint64_t value, int32_t shift) { return uqrshll(value, shift); }
1718[[gnu::always_inline]] nce uint64_t shift_left_round_saturate_long_saturate48_unsigned(uint64_t value, int32_t shift) { return uqrshll_sat48(value, shift); }
1719template <int shift>[[gnu::always_inline]] nce uint64_t shift_left_long_saturate_unsigned(uint64_t value) { return uqshll(value, shift); }
1720[[gnu::always_inline]] nce uint32_t shift_left_round_saturate_unsigned(uint32_t value, int32_t shift) { return uqrshl(value, shift); }
1721template <int shift>[[gnu::always_inline]] nce int64_t shift_left_long_saturate_signed(int64_t value) { return sqshll(value, shift); }
1722template <int shift>[[gnu::always_inline]] nce uint32_t shift_left_saturate_unsigned(uint32_t value) { return uqshl(value, shift); }
1723template <int shift>[[gnu::always_inline]] nce int32_t shift_left_saturate_signed(int32_t value) { return sqshl(value, shift); }
1724[[gnu::always_inline]] nce int64x2_t predicate_select(int64x2_t a, int64x2_t b, mve_pred16_t p) { return vpselq_s64(a, b, p); }
1725[[gnu::always_inline]] nce mve_pred16_t create_tail_predicate8(uint32_t a) { return vctp8q(a); }
1726[[gnu::always_inline]] nce mve_pred16_t create_tail_predicate16(uint32_t a) { return vctp16q(a); }
1727[[gnu::always_inline]] nce mve_pred16_t create_tail_predicate32(uint32_t a) { return vctp32q(a); }
1728[[gnu::always_inline]] nce mve_pred16_t create_tail_predicate64(uint32_t a) { return vctp64q(a); }
1729[[gnu::always_inline]] nce mve_pred16_t create_tail_predicate8(uint32_t a, mve_pred16_t p) { return vctp8q_m(a, p); }
1730[[gnu::always_inline]] nce mve_pred16_t create_tail_predicate16(uint32_t a, mve_pred16_t p) { return vctp16q_m(a, p); }
1731[[gnu::always_inline]] nce mve_pred16_t create_tail_predicate32(uint32_t a, mve_pred16_t p) { return vctp32q_m(a, p); }
1732[[gnu::always_inline]] nce mve_pred16_t create_tail_predicate64(uint32_t a, mve_pred16_t p) { return vctp64q_m(a, p); }
1733[[gnu::always_inline]] nce uint64_t left_shift_long(uint64_t value, int32_t shift) { return lsll(value, shift); }
1734[[gnu::always_inline]] nce int64_t shift_right_long_arithmetic(int64_t value, int32_t shift) { return asrl(value, shift); }
1735[[gnu::always_inline]] nce int64_t shift_right_saturate_round_long_signed(int64_t value, int32_t shift) { return sqrshrl(value, shift); }
1736[[gnu::always_inline]] nce int64_t shift_right_saturate_round_long_saturate48_signed(int64_t value, int32_t shift) { return sqrshrl_sat48(value, shift); }
1737[[gnu::always_inline]] nce int32_t shift_right_saturate_round_signed(int32_t value, int32_t shift) { return sqrshr(value, shift); }
1738template <int shift>[[gnu::always_inline]] nce uint64_t shift_right_round_long_unsigned(uint64_t value) { return urshrl(value, shift); }
1739template <int shift>[[gnu::always_inline]] nce int64_t shift_right_round_long_signed(int64_t value) { return srshrl(value, shift); }
1740template <int shift>[[gnu::always_inline]] nce uint32_t shift_right_round_unsigned(uint32_t value) { return urshr(value, shift); }
1741template <int shift>[[gnu::always_inline]] nce int32_t shift_right_round_signed(int32_t value) { return srshr(value, shift); }
1742// clang-format on
1743} // namespace mve
1744#endif
1745#undef nce