Argon 0.1.0
Loading...
Searching...
No Matches
float.hpp
1#pragma once
2#include <arm_mve.h>
3#include "neon_float.hpp"
4#ifdef __cplusplus
5#ifdef __clang__
6#define nce constexpr
7#else
8#define nce inline
9#endif
10
11namespace mve {
12// clang-format off
13template <typename T> nce T create(uint64_t a, uint64_t b);
14template <typename T> nce T uninitialized();
15template <typename T> nce T convert_round_to_nearest_with_ties_away_from_zero(float16x8_t a, mve_pred16_t p);
16template <typename T> nce T convert_round_to_nearest_with_ties_away_from_zero(float32x4_t a, mve_pred16_t p);
17template <typename T> nce T convert_round_to_nearest_with_ties_to_even(float16x8_t a, mve_pred16_t p);
18template <typename T> nce T convert_round_to_nearest_with_ties_to_even(float32x4_t a, mve_pred16_t p);
19template <typename T> nce T convert_round_toward_positive_infinity(float16x8_t a, mve_pred16_t p);
20template <typename T> nce T convert_round_toward_positive_infinity(float32x4_t a, mve_pred16_t p);
21template <typename T> nce T convert_round_toward_negative_infinity(float16x8_t a, mve_pred16_t p);
22template <typename T> nce T convert_round_toward_negative_infinity(float32x4_t a, mve_pred16_t p);
23template <typename T> nce T convert(float16x8_t inactive, int16x8_t a, mve_pred16_t p);
24template <typename T> nce T convert(float16x8_t inactive, uint16x8_t a, mve_pred16_t p);
25template <typename T> nce T convert(float32x4_t inactive, int32x4_t a, mve_pred16_t p);
26template <typename T> nce T convert(float32x4_t inactive, uint32x4_t a, mve_pred16_t p);
27template <typename T> nce T convert(uint16x8_t a, mve_pred16_t p);
28template <typename T> nce T convert(int16x8_t a, mve_pred16_t p);
29template <typename T> nce T convert(int32x4_t a, mve_pred16_t p);
30template <typename T> nce T convert(uint32x4_t a, mve_pred16_t p);
31template <typename T> nce T convert(int16x8_t inactive, float16x8_t a, mve_pred16_t p);
32template <typename T> nce T convert(int32x4_t inactive, float32x4_t a, mve_pred16_t p);
33template <typename T> nce T convert(uint16x8_t inactive, float16x8_t a, mve_pred16_t p);
34template <typename T> nce T convert(uint32x4_t inactive, float32x4_t a, mve_pred16_t p);
35template <typename T> nce T convert(float16x8_t a, mve_pred16_t p);
36template <typename T> nce T convert(float32x4_t a, mve_pred16_t p);
37[[gnu::always_inline]] nce uint16x8_t convert_round_to_nearest_with_ties_away_from_zero(uint16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtaq_m_u16_f16(inactive, a, p); }
38[[gnu::always_inline]] nce uint16x8_t convert_round_to_nearest_with_ties_to_even(uint16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtnq_m_u16_f16(inactive, a, p); }
39[[gnu::always_inline]] nce uint16x8_t convert_round_toward_positive_infinity(uint16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtpq_m_u16_f16(inactive, a, p); }
40[[gnu::always_inline]] nce uint16x8_t convert_round_toward_negative_infinity(uint16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtmq_m_u16_f16(inactive, a, p); }
41template <> [[gnu::always_inline]] nce uint16x8_t convert(uint16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtq_m_u16_f16(inactive, a, p); }
42template <int imm6>[[gnu::always_inline]] nce uint16x8_t convert(uint16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtq_m_n_u16_f16(inactive, a, imm6, p); }
43template <> [[gnu::always_inline]] nce float16x8_t convert(uint16x8_t a, mve_pred16_t p) { return vcvtq_x_f16_u16(a, p); }
44template <int imm6>[[gnu::always_inline]] nce float16x8_t convert(uint16x8_t a, mve_pred16_t p) { return vcvtq_x_n_f16_u16(a, imm6, p); }
45[[gnu::always_inline]] nce int16x8_t convert_round_to_nearest_with_ties_away_from_zero(int16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtaq_m_s16_f16(inactive, a, p); }
46[[gnu::always_inline]] nce int16x8_t convert_round_to_nearest_with_ties_to_even(int16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtnq_m_s16_f16(inactive, a, p); }
47[[gnu::always_inline]] nce int16x8_t convert_round_toward_positive_infinity(int16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtpq_m_s16_f16(inactive, a, p); }
48[[gnu::always_inline]] nce int16x8_t convert_round_toward_negative_infinity(int16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtmq_m_s16_f16(inactive, a, p); }
49template <> [[gnu::always_inline]] nce int16x8_t convert(int16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtq_m_s16_f16(inactive, a, p); }
50template <int imm6>[[gnu::always_inline]] nce int16x8_t convert(int16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtq_m_n_s16_f16(inactive, a, imm6, p); }
51template <> [[gnu::always_inline]] nce float16x8_t convert(int16x8_t a, mve_pred16_t p) { return vcvtq_x_f16_s16(a, p); }
52template <int imm6>[[gnu::always_inline]] nce float16x8_t convert(int16x8_t a, mve_pred16_t p) { return vcvtq_x_n_f16_s16(a, imm6, p); }
53[[gnu::always_inline]] nce int32x4_t convert_round_to_nearest_with_ties_away_from_zero(int32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vcvtaq_m_s32_f32(inactive, a, p); }
54[[gnu::always_inline]] nce int32x4_t convert_round_to_nearest_with_ties_to_even(int32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vcvtnq_m_s32_f32(inactive, a, p); }
55[[gnu::always_inline]] nce int32x4_t convert_round_toward_positive_infinity(int32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vcvtpq_m_s32_f32(inactive, a, p); }
56[[gnu::always_inline]] nce int32x4_t convert_round_toward_negative_infinity(int32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vcvtmq_m_s32_f32(inactive, a, p); }
57template <> [[gnu::always_inline]] nce int32x4_t convert(int32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vcvtq_m_s32_f32(inactive, a, p); }
58template <int imm6>[[gnu::always_inline]] nce int32x4_t convert(int32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vcvtq_m_n_s32_f32(inactive, a, imm6, p); }
59template <> [[gnu::always_inline]] nce float32x4_t convert(int32x4_t a, mve_pred16_t p) { return vcvtq_x_f32_s32(a, p); }
60template <int imm6>[[gnu::always_inline]] nce float32x4_t convert(int32x4_t a, mve_pred16_t p) { return vcvtq_x_n_f32_s32(a, imm6, p); }
61template <int offset>[[gnu::always_inline]] nce float32x4_t load_word_gather_base(uint32x4_t addr) { return vldrwq_gather_base_f32(addr, offset); }
62template <int offset>[[gnu::always_inline]] nce float32x4_t load_word_gather_base(uint32x4_t addr, mve_pred16_t p) { return vldrwq_gather_base_z_f32(addr, offset, p); }
63template <int offset>[[gnu::always_inline]] nce float32x4_t load_word_gather_base(uint32x4_t *addr) { return vldrwq_gather_base_wb_f32(addr, offset); }
64template <int offset>[[gnu::always_inline]] nce void store_word_scatter_base(uint32x4_t addr, float32x4_t value) { return vstrwq_scatter_base_f32(addr, offset, value); }
65template <int offset>[[gnu::always_inline]] nce void store_word_scatter_base(uint32x4_t addr, float32x4_t value, mve_pred16_t p) { return vstrwq_scatter_base_p_f32(addr, offset, value, p); }
66template <int offset>[[gnu::always_inline]] nce void store_word_scatter_base(uint32x4_t *addr, float32x4_t value) { return vstrwq_scatter_base_wb_f32(addr, offset, value); }
67template <int offset>[[gnu::always_inline]] nce void store_word_scatter_base(uint32x4_t *addr, float32x4_t value, mve_pred16_t p) { return vstrwq_scatter_base_wb_p_f32(addr, offset, value, p); }
68[[gnu::always_inline]] nce uint32x4_t convert_round_to_nearest_with_ties_away_from_zero(uint32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vcvtaq_m_u32_f32(inactive, a, p); }
69[[gnu::always_inline]] nce uint32x4_t convert_round_to_nearest_with_ties_to_even(uint32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vcvtnq_m_u32_f32(inactive, a, p); }
70[[gnu::always_inline]] nce uint32x4_t convert_round_toward_positive_infinity(uint32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vcvtpq_m_u32_f32(inactive, a, p); }
71[[gnu::always_inline]] nce uint32x4_t convert_round_toward_negative_infinity(uint32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vcvtmq_m_u32_f32(inactive, a, p); }
72template <> [[gnu::always_inline]] nce uint32x4_t convert(uint32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vcvtq_m_u32_f32(inactive, a, p); }
73template <int imm6>[[gnu::always_inline]] nce uint32x4_t convert(uint32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vcvtq_m_n_u32_f32(inactive, a, imm6, p); }
74template <int offset>[[gnu::always_inline]] nce float32x4_t load_word_gather_base(uint32x4_t *addr, mve_pred16_t p) { return vldrwq_gather_base_wb_z_f32(addr, offset, p); }
75template <> [[gnu::always_inline]] nce float32x4_t convert(uint32x4_t a, mve_pred16_t p) { return vcvtq_x_f32_u32(a, p); }
76template <int imm6>[[gnu::always_inline]] nce float32x4_t convert(uint32x4_t a, mve_pred16_t p) { return vcvtq_x_n_f32_u32(a, imm6, p); }
77[[gnu::always_inline]] nce float16x8_t reverse_32bit(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vrev32q_m_f16(inactive, a, p); }
78[[gnu::always_inline]] nce float16x8_t reverse_64bit(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vrev64q_m_f16(inactive, a, p); }
79[[gnu::always_inline]] nce mve_pred16_t compare_equal(float16x8_t a, float16x8_t b) { return vcmpeqq_f16(a, b); }
80[[gnu::always_inline]] nce mve_pred16_t compare_equal(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmpeqq_m_f16(a, b, p); }
81[[gnu::always_inline]] nce mve_pred16_t compare_not_equal(float16x8_t a, float16x8_t b) { return vcmpneq_f16(a, b); }
82[[gnu::always_inline]] nce mve_pred16_t compare_not_equal(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmpneq_m_f16(a, b, p); }
83[[gnu::always_inline]] nce mve_pred16_t compare_greater_than_or_equal(float16x8_t a, float16x8_t b) { return vcmpgeq_f16(a, b); }
84[[gnu::always_inline]] nce mve_pred16_t compare_greater_than_or_equal(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmpgeq_m_f16(a, b, p); }
85[[gnu::always_inline]] nce mve_pred16_t compare_greater_than(float16x8_t a, float16x8_t b) { return vcmpgtq_f16(a, b); }
86[[gnu::always_inline]] nce mve_pred16_t compare_greater_than(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmpgtq_m_f16(a, b, p); }
87[[gnu::always_inline]] nce mve_pred16_t compare_less_than_or_equal(float16x8_t a, float16x8_t b) { return vcmpleq_f16(a, b); }
88[[gnu::always_inline]] nce mve_pred16_t compare_less_than_or_equal(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmpleq_m_f16(a, b, p); }
89[[gnu::always_inline]] nce mve_pred16_t compare_less_than(float16x8_t a, float16x8_t b) { return vcmpltq_f16(a, b); }
90[[gnu::always_inline]] nce float16x8_t min(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vminnmq_m_f16(inactive, a, b, p); }
91[[gnu::always_inline]] nce float16x8_t max(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vmaxnmq_m_f16(inactive, a, b, p); }
92[[gnu::always_inline]] nce mve_pred16_t compare_less_than(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmpltq_m_f16(a, b, p); }
93[[gnu::always_inline]] nce float16x8_t min(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vminnmq_x_f16(a, b, p); }
94[[gnu::always_inline]] nce float16x8_t min_reduce_add(float16x8_t a, float16x8_t b) { return vminnmaq_f16(a, b); }
95[[gnu::always_inline]] nce float16x8_t min_reduce_add(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vminnmaq_m_f16(a, b, p); }
96[[gnu::always_inline]] nce float16x8_t max(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vmaxnmq_x_f16(a, b, p); }
97[[gnu::always_inline]] nce float16x8_t max_add(float16x8_t a, float16x8_t b) { return vmaxnmaq_f16(a, b); }
98[[gnu::always_inline]] nce float16x8_t max_add(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vmaxnmaq_m_f16(a, b, p); }
99[[gnu::always_inline]] nce float16x8_t reverse_32bit(float16x8_t a, mve_pred16_t p) { return vrev32q_x_f16(a, p); }
100[[gnu::always_inline]] nce float16x8_t reverse_64bit(float16x8_t a, mve_pred16_t p) { return vrev64q_x_f16(a, p); }
101[[gnu::always_inline]] nce float16x8_t duplicate(float16x8_t inactive, float16_t a, mve_pred16_t p) { return vdupq_m_n_f16(inactive, a, p); }
102[[gnu::always_inline]] nce float16x8_t uninitialized(float16x8_t t) { return vuninitializedq(t); }
103[[gnu::always_inline]] nce float16x8_t subtract_absolute(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vabdq_m_f16(inactive, a, b, p); }
104[[gnu::always_inline]] nce float16x8_t add(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vaddq_m_f16(inactive, a, b, p); }
105[[gnu::always_inline]] nce float16x8_t multiply(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vmulq_m_f16(inactive, a, b, p); }
106[[gnu::always_inline]] nce float16x8_t subtract(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vsubq_m_f16(inactive, a, b, p); }
107[[gnu::always_inline]] nce float16x8_t subtract_absolute(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vabdq_x_f16(a, b, p); }
108[[gnu::always_inline]] nce float16x8_t abs(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vabsq_m_f16(inactive, a, p); }
109[[gnu::always_inline]] nce float16x8_t add(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vaddq_x_f16(a, b, p); }
110[[gnu::always_inline]] nce float16x8_t multiply(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vmulq_x_f16(a, b, p); }
111[[gnu::always_inline]] nce float16x8_t subtract(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vsubq_x_f16(a, b, p); }
112[[gnu::always_inline]] nce float16x8_t add(float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) { return vaddq_m_n_f16(inactive, a, b, p); }
113[[gnu::always_inline]] nce float16x8_t multiply(float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) { return vmulq_m_n_f16(inactive, a, b, p); }
114[[gnu::always_inline]] nce float16x8_t multiply_add_scalar_fused(float16x8_t m1, float16x8_t m2, float16_t add) { return vfmasq_n_f16(m1, m2, add); }
115[[gnu::always_inline]] nce float16x8_t multiply_add_scalar_fused(float16x8_t m1, float16x8_t m2, float16_t add, mve_pred16_t p) { return vfmasq_m_n_f16(m1, m2, add, p); }
116[[gnu::always_inline]] nce float16x8_t subtract(float16x8_t inactive, float16x8_t a, float16_t b, mve_pred16_t p) { return vsubq_m_n_f16(inactive, a, b, p); }
117[[gnu::always_inline]] nce float16x8_t abs(float16x8_t a, mve_pred16_t p) { return vabsq_x_f16(a, p); }
118[[gnu::always_inline]] nce float16x8_t add(float16x8_t a, float16_t b) { return vaddq_n_f16(a, b); }
119[[gnu::always_inline]] nce float16x8_t add(float16x8_t a, float16_t b, mve_pred16_t p) { return vaddq_x_n_f16(a, b, p); }
120[[gnu::always_inline]] nce float16x8_t multiply(float16x8_t a, float16_t b, mve_pred16_t p) { return vmulq_x_n_f16(a, b, p); }
121[[gnu::always_inline]] nce float16x8_t subtract(float16x8_t a, float16_t b) { return vsubq_n_f16(a, b); }
122[[gnu::always_inline]] nce float16x8_t subtract(float16x8_t a, float16_t b, mve_pred16_t p) { return vsubq_x_n_f16(a, b, p); }
123[[gnu::always_inline]] nce float16x8_t round(float16x8_t a) { return vrndq_f16(a); }
124[[gnu::always_inline]] nce float16x8_t round(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vrndq_m_f16(inactive, a, p); }
125[[gnu::always_inline]] nce float16x8_t round(float16x8_t a, mve_pred16_t p) { return vrndq_x_f16(a, p); }
126[[gnu::always_inline]] nce float16x8_t round_to_nearest_with_ties_to_even(float16x8_t a) { return vrndnq_f16(a); }
127[[gnu::always_inline]] nce float16x8_t round_to_nearest_with_ties_to_even(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vrndnq_m_f16(inactive, a, p); }
128[[gnu::always_inline]] nce float16x8_t round_toward_negative_infinity(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vrndmq_m_f16(inactive, a, p); }
129[[gnu::always_inline]] nce float16x8_t round_toward_positive_infinity(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vrndpq_m_f16(inactive, a, p); }
130[[gnu::always_inline]] nce float16x8_t round_to_nearest_with_ties_to_even(float16x8_t a, mve_pred16_t p) { return vrndnq_x_f16(a, p); }
131[[gnu::always_inline]] nce float16x8_t round_toward_negative_infinity(float16x8_t a) { return vrndmq_f16(a); }
132[[gnu::always_inline]] nce float16x8_t round_toward_negative_infinity(float16x8_t a, mve_pred16_t p) { return vrndmq_x_f16(a, p); }
133[[gnu::always_inline]] nce float16x8_t round_toward_positive_infinity(float16x8_t a) { return vrndpq_f16(a); }
134[[gnu::always_inline]] nce float16x8_t bitwise_and(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vandq_m_f16(inactive, a, b, p); }
135[[gnu::always_inline]] nce float16x8_t round_toward_positive_infinity(float16x8_t a, mve_pred16_t p) { return vrndpq_x_f16(a, p); }
136[[gnu::always_inline]] nce float16x8_t round_to_nearest_with_ties_away_from_zero(float16x8_t a) { return vrndaq_f16(a); }
137[[gnu::always_inline]] nce float16x8_t round_to_nearest_with_ties_away_from_zero(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vrndaq_m_f16(inactive, a, p); }
138[[gnu::always_inline]] nce float16x8_t round_to_nearest_with_ties_away_from_zero(float16x8_t a, mve_pred16_t p) { return vrndaq_x_f16(a, p); }
139[[gnu::always_inline]] nce float16x8_t round_inexact(float16x8_t a) { return vrndxq_f16(a); }
140[[gnu::always_inline]] nce float16x8_t convert_bottom(float16x8_t a, float32x4_t b) { return vcvtbq_f16_f32(a, b); }
141[[gnu::always_inline]] nce float16x8_t convert_bottom(float16x8_t a, float32x4_t b, mve_pred16_t p) { return vcvtbq_m_f16_f32(a, b, p); }
142[[gnu::always_inline]] nce float16x8_t convert_top(float16x8_t a, float32x4_t b) { return vcvttq_f16_f32(a, b); }
143template <> [[gnu::always_inline]] nce int16x8_t convert_round_to_nearest_with_ties_away_from_zero(float16x8_t a, mve_pred16_t p) { return vcvtaq_x_s16_f16(a, p); }
144template <> [[gnu::always_inline]] nce uint16x8_t convert_round_to_nearest_with_ties_away_from_zero(float16x8_t a, mve_pred16_t p) { return vcvtaq_x_u16_f16(a, p); }
145template <> [[gnu::always_inline]] nce int16x8_t convert_round_to_nearest_with_ties_to_even(float16x8_t a, mve_pred16_t p) { return vcvtnq_x_s16_f16(a, p); }
146template <> [[gnu::always_inline]] nce uint16x8_t convert_round_to_nearest_with_ties_to_even(float16x8_t a, mve_pred16_t p) { return vcvtnq_x_u16_f16(a, p); }
147template <> [[gnu::always_inline]] nce int16x8_t convert_round_toward_positive_infinity(float16x8_t a, mve_pred16_t p) { return vcvtpq_x_s16_f16(a, p); }
148template <> [[gnu::always_inline]] nce uint16x8_t convert_round_toward_positive_infinity(float16x8_t a, mve_pred16_t p) { return vcvtpq_x_u16_f16(a, p); }
149template <> [[gnu::always_inline]] nce int16x8_t convert_round_toward_negative_infinity(float16x8_t a, mve_pred16_t p) { return vcvtmq_x_s16_f16(a, p); }
150template <> [[gnu::always_inline]] nce uint16x8_t convert_round_toward_negative_infinity(float16x8_t a, mve_pred16_t p) { return vcvtmq_x_u16_f16(a, p); }
151[[gnu::always_inline]] nce float32x4_t convert_bottom(float16x8_t a) { return vcvtbq_f32_f16(a); }
152[[gnu::always_inline]] nce float32x4_t convert_bottom(float16x8_t a, mve_pred16_t p) { return vcvtbq_x_f32_f16(a, p); }
153[[gnu::always_inline]] nce float32x4_t convert_top(float16x8_t a) { return vcvttq_f32_f16(a); }
154template <> [[gnu::always_inline]] nce float16x8_t convert(float16x8_t inactive, uint16x8_t a, mve_pred16_t p) { return vcvtq_m_f16_u16(inactive, a, p); }
155template <int imm6>[[gnu::always_inline]] nce float16x8_t convert(float16x8_t inactive, uint16x8_t a, mve_pred16_t p) { return vcvtq_m_n_f16_u16(inactive, a, imm6, p); }
156template <> [[gnu::always_inline]] nce float16x8_t convert(float16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vcvtq_m_f16_s16(inactive, a, p); }
157template <int imm6>[[gnu::always_inline]] nce float16x8_t convert(float16x8_t inactive, int16x8_t a, mve_pred16_t p) { return vcvtq_m_n_f16_s16(inactive, a, imm6, p); }
158[[gnu::always_inline]] nce float16x8_t bitwise_clear(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vbicq_m_f16(inactive, a, b, p); }
159[[gnu::always_inline]] nce float16x8_t bitwise_xor(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return veorq_m_f16(inactive, a, b, p); }
160[[gnu::always_inline]] nce float16x8_t bitwise_or_not(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vornq_m_f16(inactive, a, b, p); }
161[[gnu::always_inline]] nce float16x8_t complex_add_rotate_90(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcaddq_rot90_m_f16(inactive, a, b, p); }
162[[gnu::always_inline]] nce float16x8_t bitwise_xor(float16x8_t a, float16x8_t b, mve_pred16_t p) { return veorq_x_f16(a, b, p); }
163[[gnu::always_inline]] nce float16x8_t bitwise_or_not(float16x8_t a, float16x8_t b) { return vornq_f16(a, b); }
164[[gnu::always_inline]] nce float16x8_t bitwise_or_not(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vornq_x_f16(a, b, p); }
165[[gnu::always_inline]] nce float16x8_t bitwise_or(float16x8_t a, float16x8_t b) { return vorrq_f16(a, b); }
166[[gnu::always_inline]] nce float16x8_t bitwise_or(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vorrq_m_f16(inactive, a, b, p); }
167[[gnu::always_inline]] nce float16x8_t complex_add_rotate_270(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcaddq_rot270_m_f16(inactive, a, b, p); }
168[[gnu::always_inline]] nce float16x8_t complex_multiply_add(float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) { return vcmlaq_m_f16(a, b, c, p); }
169[[gnu::always_inline]] nce float16x8_t complex_multiply_add_rotate_90(float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) { return vcmlaq_rot90_m_f16(a, b, c, p); }
170[[gnu::always_inline]] nce float16x8_t complex_multiply_add_rotate_180(float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) { return vcmlaq_rot180_m_f16(a, b, c, p); }
171[[gnu::always_inline]] nce float16x8_t complex_multiply_add_rotate_270(float16x8_t a, float16x8_t b, float16x8_t c, mve_pred16_t p) { return vcmlaq_rot270_m_f16(a, b, c, p); }
172[[gnu::always_inline]] nce float16x8_t multiply_complex(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmulq_m_f16(inactive, a, b, p); }
173[[gnu::always_inline]] nce float16x8_t multiply_complex_rotate_90(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmulq_rot90_m_f16(inactive, a, b, p); }
174[[gnu::always_inline]] nce float16x8_t multiply_complex_rotate_180(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmulq_rot180_m_f16(inactive, a, b, p); }
175[[gnu::always_inline]] nce float16x8_t round_inexact(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vrndxq_m_f16(inactive, a, p); }
176[[gnu::always_inline]] nce float16x8_t bitwise_clear(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vbicq_x_f16(a, b, p); }
177[[gnu::always_inline]] nce float16x8_t negate(float16x8_t inactive, float16x8_t a, mve_pred16_t p) { return vnegq_m_f16(inactive, a, p); }
178[[gnu::always_inline]] nce float16x8_t bitwise_and(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vandq_x_f16(a, b, p); }
179[[gnu::always_inline]] nce float16x8_t bitwise_or(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vorrq_x_f16(a, b, p); }
180[[gnu::always_inline]] nce float16x8_t complex_add_rotate_90(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcaddq_rot90_x_f16(a, b, p); }
181[[gnu::always_inline]] nce float16x8_t complex_add_rotate_270(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcaddq_rot270_x_f16(a, b, p); }
182[[gnu::always_inline]] nce float16x8_t multiply_complex(float16x8_t a, float16x8_t b) { return vcmulq_f16(a, b); }
183[[gnu::always_inline]] nce float16x8_t multiply_complex_rotate_90(float16x8_t a, float16x8_t b) { return vcmulq_rot90_f16(a, b); }
184[[gnu::always_inline]] nce float16x8_t multiply_complex_rotate_180(float16x8_t a, float16x8_t b) { return vcmulq_rot180_f16(a, b); }
185[[gnu::always_inline]] nce float16x8_t multiply_complex_rotate_270(float16x8_t a, float16x8_t b) { return vcmulq_rot270_f16(a, b); }
186[[gnu::always_inline]] nce float16x8_t multiply_complex_rotate_270(float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmulq_rot270_m_f16(inactive, a, b, p); }
187[[gnu::always_inline]] nce float16x8_t multiply_complex(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmulq_x_f16(a, b, p); }
188[[gnu::always_inline]] nce float16x8_t multiply_complex_rotate_90(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmulq_rot90_x_f16(a, b, p); }
189[[gnu::always_inline]] nce float16x8_t multiply_complex_rotate_180(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmulq_rot180_x_f16(a, b, p); }
190[[gnu::always_inline]] nce float16x8_t multiply_complex_rotate_270(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vcmulq_rot270_x_f16(a, b, p); }
191[[gnu::always_inline]] nce float16x8_t predicate_select(float16x8_t a, float16x8_t b, mve_pred16_t p) { return vpselq_f16(a, b, p); }
192[[gnu::always_inline]] nce float16x8_t bit_reverse_shift_right(float16x8_t inactive, float16x8_t a, int32_t b, mve_pred16_t p) { return vbrsrq_m_n_f16(inactive, a, b, p); }
193[[gnu::always_inline]] nce float16x8_t convert_top(float16x8_t a, float32x4_t b, mve_pred16_t p) { return vcvttq_m_f16_f32(a, b, p); }
194[[gnu::always_inline]] nce float16x8_t round_inexact(float16x8_t a, mve_pred16_t p) { return vrndxq_x_f16(a, p); }
195[[gnu::always_inline]] nce float16x8_t negate(float16x8_t a, mve_pred16_t p) { return vnegq_x_f16(a, p); }
196[[gnu::always_inline]] nce float32x4_t convert_top(float16x8_t a, mve_pred16_t p) { return vcvttq_x_f32_f16(a, p); }
197template <> [[gnu::always_inline]] nce int16x8_t convert(float16x8_t a, mve_pred16_t p) { return vcvtq_x_s16_f16(a, p); }
198template <> [[gnu::always_inline]] nce uint16x8_t convert(float16x8_t a, mve_pred16_t p) { return vcvtq_x_u16_f16(a, p); }
199template <int imm6>[[gnu::always_inline]] nce int16x8_t convert(float16x8_t a, mve_pred16_t p) { return vcvtq_x_n_s16_f16(a, imm6, p); }
200template <int imm6>[[gnu::always_inline]] nce uint16x8_t convert(float16x8_t a, mve_pred16_t p) { return vcvtq_x_n_u16_f16(a, imm6, p); }
201[[gnu::always_inline]] nce mve_pred16_t compare_equal(float16x8_t a, float16_t b) { return vcmpeqq_n_f16(a, b); }
202[[gnu::always_inline]] nce mve_pred16_t compare_equal(float16x8_t a, float16_t b, mve_pred16_t p) { return vcmpeqq_m_n_f16(a, b, p); }
203[[gnu::always_inline]] nce mve_pred16_t compare_not_equal(float16x8_t a, float16_t b) { return vcmpneq_n_f16(a, b); }
204[[gnu::always_inline]] nce mve_pred16_t compare_not_equal(float16x8_t a, float16_t b, mve_pred16_t p) { return vcmpneq_m_n_f16(a, b, p); }
205[[gnu::always_inline]] nce mve_pred16_t compare_greater_than_or_equal(float16x8_t a, float16_t b) { return vcmpgeq_n_f16(a, b); }
206[[gnu::always_inline]] nce mve_pred16_t compare_greater_than_or_equal(float16x8_t a, float16_t b, mve_pred16_t p) { return vcmpgeq_m_n_f16(a, b, p); }
207[[gnu::always_inline]] nce mve_pred16_t compare_greater_than(float16x8_t a, float16_t b) { return vcmpgtq_n_f16(a, b); }
208[[gnu::always_inline]] nce mve_pred16_t compare_greater_than(float16x8_t a, float16_t b, mve_pred16_t p) { return vcmpgtq_m_n_f16(a, b, p); }
209[[gnu::always_inline]] nce mve_pred16_t compare_less_than_or_equal(float16x8_t a, float16_t b) { return vcmpleq_n_f16(a, b); }
210[[gnu::always_inline]] nce mve_pred16_t compare_less_than_or_equal(float16x8_t a, float16_t b, mve_pred16_t p) { return vcmpleq_m_n_f16(a, b, p); }
211[[gnu::always_inline]] nce mve_pred16_t compare_less_than(float16x8_t a, float16_t b) { return vcmpltq_n_f16(a, b); }
212[[gnu::always_inline]] nce mve_pred16_t compare_less_than(float16x8_t a, float16_t b, mve_pred16_t p) { return vcmpltq_m_n_f16(a, b, p); }
213[[gnu::always_inline]] nce float16x8_t bit_reverse_shift_right(float16x8_t a, int32_t b) { return vbrsrq_n_f16(a, b); }
214[[gnu::always_inline]] nce float16x8_t bit_reverse_shift_right(float16x8_t a, int32_t b, mve_pred16_t p) { return vbrsrq_x_n_f16(a, b, p); }
215template <> [[gnu::always_inline]] nce float32x4_t convert(float32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vcvtq_m_f32_s32(inactive, a, p); }
216template <int imm6>[[gnu::always_inline]] nce float32x4_t convert(float32x4_t inactive, int32x4_t a, mve_pred16_t p) { return vcvtq_m_n_f32_s32(inactive, a, imm6, p); }
217template <> [[gnu::always_inline]] nce float32x4_t convert(float32x4_t inactive, uint32x4_t a, mve_pred16_t p) { return vcvtq_m_f32_u32(inactive, a, p); }
218template <int imm6>[[gnu::always_inline]] nce float32x4_t convert(float32x4_t inactive, uint32x4_t a, mve_pred16_t p) { return vcvtq_m_n_f32_u32(inactive, a, imm6, p); }
219[[gnu::always_inline]] nce float32x4_t convert_bottom(float32x4_t inactive, float16x8_t a, mve_pred16_t p) { return vcvtbq_m_f32_f16(inactive, a, p); }
220[[gnu::always_inline]] nce float32x4_t convert_top(float32x4_t inactive, float16x8_t a, mve_pred16_t p) { return vcvttq_m_f32_f16(inactive, a, p); }
221[[gnu::always_inline]] nce float32x4_t reverse_64bit(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vrev64q_m_f32(inactive, a, p); }
222[[gnu::always_inline]] nce mve_pred16_t compare_equal(float32x4_t a, float32x4_t b) { return vcmpeqq_f32(a, b); }
223[[gnu::always_inline]] nce float32x4_t reverse_64bit(float32x4_t a, mve_pred16_t p) { return vrev64q_x_f32(a, p); }
224[[gnu::always_inline]] nce float32x4_t duplicate(float32x4_t inactive, float32_t a, mve_pred16_t p) { return vdupq_m_n_f32(inactive, a, p); }
225[[gnu::always_inline]] nce float32x4_t uninitialized(float32x4_t t) { return vuninitializedq(t); }
226[[gnu::always_inline]] nce mve_pred16_t compare_equal(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmpeqq_m_f32(a, b, p); }
227[[gnu::always_inline]] nce mve_pred16_t compare_not_equal(float32x4_t a, float32x4_t b) { return vcmpneq_f32(a, b); }
228[[gnu::always_inline]] nce mve_pred16_t compare_not_equal(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmpneq_m_f32(a, b, p); }
229[[gnu::always_inline]] nce mve_pred16_t compare_greater_than_or_equal(float32x4_t a, float32x4_t b) { return vcmpgeq_f32(a, b); }
230[[gnu::always_inline]] nce mve_pred16_t compare_greater_than_or_equal(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmpgeq_m_f32(a, b, p); }
231[[gnu::always_inline]] nce mve_pred16_t compare_greater_than(float32x4_t a, float32x4_t b) { return vcmpgtq_f32(a, b); }
232[[gnu::always_inline]] nce mve_pred16_t compare_less_than_or_equal(float32x4_t a, float32x4_t b) { return vcmpleq_f32(a, b); }
233[[gnu::always_inline]] nce mve_pred16_t compare_less_than_or_equal(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmpleq_m_f32(a, b, p); }
234[[gnu::always_inline]] nce mve_pred16_t compare_less_than(float32x4_t a, float32x4_t b) { return vcmpltq_f32(a, b); }
235[[gnu::always_inline]] nce float32x4_t min(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vminnmq_m_f32(inactive, a, b, p); }
236[[gnu::always_inline]] nce mve_pred16_t compare_less_than(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmpltq_m_f32(a, b, p); }
237[[gnu::always_inline]] nce float32x4_t min(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vminnmq_x_f32(a, b, p); }
238[[gnu::always_inline]] nce float32x4_t min_reduce_add(float32x4_t a, float32x4_t b) { return vminnmaq_f32(a, b); }
239[[gnu::always_inline]] nce float32x4_t max(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vmaxnmq_m_f32(inactive, a, b, p); }
240[[gnu::always_inline]] nce mve_pred16_t compare_greater_than(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmpgtq_m_f32(a, b, p); }
241[[gnu::always_inline]] nce float32x4_t min_reduce_add(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vminnmaq_m_f32(a, b, p); }
242[[gnu::always_inline]] nce float32x4_t max(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vmaxnmq_x_f32(a, b, p); }
243[[gnu::always_inline]] nce float32x4_t max_add(float32x4_t a, float32x4_t b) { return vmaxnmaq_f32(a, b); }
244[[gnu::always_inline]] nce float32x4_t subtract_absolute(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vabdq_m_f32(inactive, a, b, p); }
245[[gnu::always_inline]] nce float32x4_t add(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vaddq_m_f32(inactive, a, b, p); }
246[[gnu::always_inline]] nce float32x4_t multiply(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vmulq_m_f32(inactive, a, b, p); }
247[[gnu::always_inline]] nce float32x4_t subtract(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vsubq_m_f32(inactive, a, b, p); }
248[[gnu::always_inline]] nce float32x4_t bitwise_clear(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vbicq_m_f32(inactive, a, b, p); }
249[[gnu::always_inline]] nce float32x4_t bitwise_and(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vandq_m_f32(inactive, a, b, p); }
250[[gnu::always_inline]] nce float32x4_t bitwise_xor(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return veorq_m_f32(inactive, a, b, p); }
251[[gnu::always_inline]] nce float32x4_t bitwise_or_not(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vornq_m_f32(inactive, a, b, p); }
252[[gnu::always_inline]] nce float32x4_t complex_add_rotate_90(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcaddq_rot90_m_f32(inactive, a, b, p); }
253[[gnu::always_inline]] nce float32x4_t bitwise_xor(float32x4_t a, float32x4_t b, mve_pred16_t p) { return veorq_x_f32(a, b, p); }
254[[gnu::always_inline]] nce float32x4_t bitwise_or_not(float32x4_t a, float32x4_t b) { return vornq_f32(a, b); }
255[[gnu::always_inline]] nce float32x4_t bitwise_or_not(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vornq_x_f32(a, b, p); }
256[[gnu::always_inline]] nce float32x4_t bitwise_or(float32x4_t a, float32x4_t b) { return vorrq_f32(a, b); }
257[[gnu::always_inline]] nce float32x4_t bitwise_or(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vorrq_m_f32(inactive, a, b, p); }
258[[gnu::always_inline]] nce float32x4_t complex_add_rotate_270(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcaddq_rot270_m_f32(inactive, a, b, p); }
259[[gnu::always_inline]] nce float32x4_t complex_multiply_add(float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) { return vcmlaq_m_f32(a, b, c, p); }
260[[gnu::always_inline]] nce float32x4_t complex_multiply_add_rotate_90(float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) { return vcmlaq_rot90_m_f32(a, b, c, p); }
261[[gnu::always_inline]] nce float32x4_t complex_multiply_add_rotate_180(float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) { return vcmlaq_rot180_m_f32(a, b, c, p); }
262[[gnu::always_inline]] nce float32x4_t complex_multiply_add_rotate_270(float32x4_t a, float32x4_t b, float32x4_t c, mve_pred16_t p) { return vcmlaq_rot270_m_f32(a, b, c, p); }
263[[gnu::always_inline]] nce float32x4_t multiply_complex(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmulq_m_f32(inactive, a, b, p); }
264[[gnu::always_inline]] nce float32x4_t multiply_complex_rotate_90(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmulq_rot90_m_f32(inactive, a, b, p); }
265[[gnu::always_inline]] nce float32x4_t multiply_complex_rotate_180(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmulq_rot180_m_f32(inactive, a, b, p); }
266[[gnu::always_inline]] nce float32x4_t max_add(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vmaxnmaq_m_f32(a, b, p); }
267[[gnu::always_inline]] nce float32x4_t subtract_absolute(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vabdq_x_f32(a, b, p); }
268[[gnu::always_inline]] nce float32x4_t abs(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vabsq_m_f32(inactive, a, p); }
269[[gnu::always_inline]] nce float32x4_t add(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vaddq_x_f32(a, b, p); }
270[[gnu::always_inline]] nce float32x4_t multiply(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vmulq_x_f32(a, b, p); }
271[[gnu::always_inline]] nce float32x4_t subtract(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vsubq_x_f32(a, b, p); }
272[[gnu::always_inline]] nce float32x4_t round(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vrndq_m_f32(inactive, a, p); }
273[[gnu::always_inline]] nce float32x4_t round_to_nearest_with_ties_to_even(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vrndnq_m_f32(inactive, a, p); }
274[[gnu::always_inline]] nce float32x4_t round_toward_negative_infinity(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vrndmq_m_f32(inactive, a, p); }
275[[gnu::always_inline]] nce float32x4_t round_toward_positive_infinity(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vrndpq_m_f32(inactive, a, p); }
276[[gnu::always_inline]] nce float32x4_t round_to_nearest_with_ties_away_from_zero(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vrndaq_m_f32(inactive, a, p); }
277[[gnu::always_inline]] nce float32x4_t round_inexact(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vrndxq_m_f32(inactive, a, p); }
278[[gnu::always_inline]] nce float32x4_t bitwise_clear(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vbicq_x_f32(a, b, p); }
279[[gnu::always_inline]] nce float32x4_t negate(float32x4_t inactive, float32x4_t a, mve_pred16_t p) { return vnegq_m_f32(inactive, a, p); }
280[[gnu::always_inline]] nce float32x4_t bitwise_and(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vandq_x_f32(a, b, p); }
281[[gnu::always_inline]] nce float32x4_t bitwise_or(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vorrq_x_f32(a, b, p); }
282[[gnu::always_inline]] nce float32x4_t complex_add_rotate_90(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcaddq_rot90_x_f32(a, b, p); }
283[[gnu::always_inline]] nce float32x4_t complex_add_rotate_270(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcaddq_rot270_x_f32(a, b, p); }
284[[gnu::always_inline]] nce float32x4_t multiply_complex(float32x4_t a, float32x4_t b) { return vcmulq_f32(a, b); }
285[[gnu::always_inline]] nce float32x4_t multiply_complex_rotate_90(float32x4_t a, float32x4_t b) { return vcmulq_rot90_f32(a, b); }
286[[gnu::always_inline]] nce float32x4_t multiply_complex_rotate_180(float32x4_t a, float32x4_t b) { return vcmulq_rot180_f32(a, b); }
287[[gnu::always_inline]] nce float32x4_t multiply_complex_rotate_270(float32x4_t a, float32x4_t b) { return vcmulq_rot270_f32(a, b); }
288[[gnu::always_inline]] nce float32x4_t multiply_complex_rotate_270(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmulq_rot270_m_f32(inactive, a, b, p); }
289[[gnu::always_inline]] nce float32x4_t multiply_complex(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmulq_x_f32(a, b, p); }
290[[gnu::always_inline]] nce float32x4_t multiply_complex_rotate_90(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmulq_rot90_x_f32(a, b, p); }
291[[gnu::always_inline]] nce float32x4_t multiply_complex_rotate_180(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmulq_rot180_x_f32(a, b, p); }
292[[gnu::always_inline]] nce float32x4_t multiply_complex_rotate_270(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vcmulq_rot270_x_f32(a, b, p); }
293[[gnu::always_inline]] nce float32x4_t predicate_select(float32x4_t a, float32x4_t b, mve_pred16_t p) { return vpselq_f32(a, b, p); }
294[[gnu::always_inline]] nce float32x4_t add(float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) { return vaddq_m_n_f32(inactive, a, b, p); }
295[[gnu::always_inline]] nce float32x4_t multiply(float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) { return vmulq_m_n_f32(inactive, a, b, p); }
296[[gnu::always_inline]] nce float32x4_t multiply_add_scalar_fused(float32x4_t m1, float32x4_t m2, float32_t add) { return vfmasq_n_f32(m1, m2, add); }
297[[gnu::always_inline]] nce float32x4_t multiply_add_scalar_fused(float32x4_t m1, float32x4_t m2, float32_t add, mve_pred16_t p) { return vfmasq_m_n_f32(m1, m2, add, p); }
298[[gnu::always_inline]] nce float32x4_t subtract(float32x4_t inactive, float32x4_t a, float32_t b, mve_pred16_t p) { return vsubq_m_n_f32(inactive, a, b, p); }
299[[gnu::always_inline]] nce float32x4_t bit_reverse_shift_right(float32x4_t inactive, float32x4_t a, int32_t b, mve_pred16_t p) { return vbrsrq_m_n_f32(inactive, a, b, p); }
300[[gnu::always_inline]] nce float32x4_t abs(float32x4_t a, mve_pred16_t p) { return vabsq_x_f32(a, p); }
301[[gnu::always_inline]] nce float32x4_t round(float32x4_t a, mve_pred16_t p) { return vrndq_x_f32(a, p); }
302[[gnu::always_inline]] nce float32x4_t round_to_nearest_with_ties_to_even(float32x4_t a, mve_pred16_t p) { return vrndnq_x_f32(a, p); }
303[[gnu::always_inline]] nce float32x4_t round_toward_negative_infinity(float32x4_t a, mve_pred16_t p) { return vrndmq_x_f32(a, p); }
304[[gnu::always_inline]] nce float32x4_t round_toward_positive_infinity(float32x4_t a, mve_pred16_t p) { return vrndpq_x_f32(a, p); }
305[[gnu::always_inline]] nce float32x4_t round_to_nearest_with_ties_away_from_zero(float32x4_t a, mve_pred16_t p) { return vrndaq_x_f32(a, p); }
306[[gnu::always_inline]] nce float32x4_t round_inexact(float32x4_t a, mve_pred16_t p) { return vrndxq_x_f32(a, p); }
307[[gnu::always_inline]] nce float32x4_t negate(float32x4_t a, mve_pred16_t p) { return vnegq_x_f32(a, p); }
308template <> [[gnu::always_inline]] nce int32x4_t convert_round_to_nearest_with_ties_away_from_zero(float32x4_t a, mve_pred16_t p) { return vcvtaq_x_s32_f32(a, p); }
309template <> [[gnu::always_inline]] nce uint32x4_t convert_round_to_nearest_with_ties_away_from_zero(float32x4_t a, mve_pred16_t p) { return vcvtaq_x_u32_f32(a, p); }
310template <> [[gnu::always_inline]] nce int32x4_t convert_round_to_nearest_with_ties_to_even(float32x4_t a, mve_pred16_t p) { return vcvtnq_x_s32_f32(a, p); }
311template <> [[gnu::always_inline]] nce uint32x4_t convert_round_to_nearest_with_ties_to_even(float32x4_t a, mve_pred16_t p) { return vcvtnq_x_u32_f32(a, p); }
312template <> [[gnu::always_inline]] nce int32x4_t convert_round_toward_positive_infinity(float32x4_t a, mve_pred16_t p) { return vcvtpq_x_s32_f32(a, p); }
313template <> [[gnu::always_inline]] nce uint32x4_t convert_round_toward_positive_infinity(float32x4_t a, mve_pred16_t p) { return vcvtpq_x_u32_f32(a, p); }
314template <> [[gnu::always_inline]] nce int32x4_t convert_round_toward_negative_infinity(float32x4_t a, mve_pred16_t p) { return vcvtmq_x_s32_f32(a, p); }
315template <> [[gnu::always_inline]] nce uint32x4_t convert_round_toward_negative_infinity(float32x4_t a, mve_pred16_t p) { return vcvtmq_x_u32_f32(a, p); }
316template <> [[gnu::always_inline]] nce int32x4_t convert(float32x4_t a, mve_pred16_t p) { return vcvtq_x_s32_f32(a, p); }
317template <> [[gnu::always_inline]] nce uint32x4_t convert(float32x4_t a, mve_pred16_t p) { return vcvtq_x_u32_f32(a, p); }
318template <int imm6>[[gnu::always_inline]] nce int32x4_t convert(float32x4_t a, mve_pred16_t p) { return vcvtq_x_n_s32_f32(a, imm6, p); }
319template <int imm6>[[gnu::always_inline]] nce uint32x4_t convert(float32x4_t a, mve_pred16_t p) { return vcvtq_x_n_u32_f32(a, imm6, p); }
320[[gnu::always_inline]] nce mve_pred16_t compare_equal(float32x4_t a, float32_t b) { return vcmpeqq_n_f32(a, b); }
321[[gnu::always_inline]] nce mve_pred16_t compare_equal(float32x4_t a, float32_t b, mve_pred16_t p) { return vcmpeqq_m_n_f32(a, b, p); }
322[[gnu::always_inline]] nce mve_pred16_t compare_not_equal(float32x4_t a, float32_t b) { return vcmpneq_n_f32(a, b); }
323[[gnu::always_inline]] nce mve_pred16_t compare_not_equal(float32x4_t a, float32_t b, mve_pred16_t p) { return vcmpneq_m_n_f32(a, b, p); }
324[[gnu::always_inline]] nce mve_pred16_t compare_greater_than_or_equal(float32x4_t a, float32_t b) { return vcmpgeq_n_f32(a, b); }
325[[gnu::always_inline]] nce mve_pred16_t compare_greater_than_or_equal(float32x4_t a, float32_t b, mve_pred16_t p) { return vcmpgeq_m_n_f32(a, b, p); }
326[[gnu::always_inline]] nce mve_pred16_t compare_greater_than(float32x4_t a, float32_t b) { return vcmpgtq_n_f32(a, b); }
327[[gnu::always_inline]] nce mve_pred16_t compare_greater_than(float32x4_t a, float32_t b, mve_pred16_t p) { return vcmpgtq_m_n_f32(a, b, p); }
328[[gnu::always_inline]] nce mve_pred16_t compare_less_than_or_equal(float32x4_t a, float32_t b) { return vcmpleq_n_f32(a, b); }
329[[gnu::always_inline]] nce mve_pred16_t compare_less_than_or_equal(float32x4_t a, float32_t b, mve_pred16_t p) { return vcmpleq_m_n_f32(a, b, p); }
330[[gnu::always_inline]] nce mve_pred16_t compare_less_than(float32x4_t a, float32_t b) { return vcmpltq_n_f32(a, b); }
331[[gnu::always_inline]] nce mve_pred16_t compare_less_than(float32x4_t a, float32_t b, mve_pred16_t p) { return vcmpltq_m_n_f32(a, b, p); }
332[[gnu::always_inline]] nce float32x4_t add(float32x4_t a, float32_t b) { return vaddq_n_f32(a, b); }
333[[gnu::always_inline]] nce float32x4_t add(float32x4_t a, float32_t b, mve_pred16_t p) { return vaddq_x_n_f32(a, b, p); }
334[[gnu::always_inline]] nce float32x4_t multiply(float32x4_t a, float32_t b, mve_pred16_t p) { return vmulq_x_n_f32(a, b, p); }
335[[gnu::always_inline]] nce float32x4_t subtract(float32x4_t a, float32_t b) { return vsubq_n_f32(a, b); }
336[[gnu::always_inline]] nce float32x4_t subtract(float32x4_t a, float32_t b, mve_pred16_t p) { return vsubq_x_n_f32(a, b, p); }
337[[gnu::always_inline]] nce float32x4_t bit_reverse_shift_right(float32x4_t a, int32_t b) { return vbrsrq_n_f32(a, b); }
338[[gnu::always_inline]] nce float32x4_t bit_reverse_shift_right(float32x4_t a, int32_t b, mve_pred16_t p) { return vbrsrq_x_n_f32(a, b, p); }
339template <> [[gnu::always_inline]] nce float16x8_t create(uint64_t a, uint64_t b) { return vcreateq_f16(a, b); }
340template <> [[gnu::always_inline]] nce float32x4_t create(uint64_t a, uint64_t b) { return vcreateq_f32(a, b); }
341[[gnu::always_inline]] nce float16x8_t duplicate(float16_t a, mve_pred16_t p) { return vdupq_x_n_f16(a, p); }
342[[gnu::always_inline]] nce float32x4_t duplicate(float32_t a, mve_pred16_t p) { return vdupq_x_n_f32(a, p); }
343template <> [[gnu::always_inline]] nce float16x8_t uninitialized() { return vuninitializedq_f16(); }
344template <> [[gnu::always_inline]] nce float32x4_t uninitialized() { return vuninitializedq_f32(); }
345[[gnu::always_inline]] nce float16_t min_reduce_min(float16_t a, float16x8_t b) { return vminnmvq_f16(a, b); }
346[[gnu::always_inline]] nce float32_t min_reduce_min(float32_t a, float32x4_t b) { return vminnmvq_f32(a, b); }
347[[gnu::always_inline]] nce float16_t min_reduce_min(float16_t a, float16x8_t b, mve_pred16_t p) { return vminnmvq_p_f16(a, b, p); }
348[[gnu::always_inline]] nce float32_t min_reduce_min(float32_t a, float32x4_t b, mve_pred16_t p) { return vminnmvq_p_f32(a, b, p); }
349[[gnu::always_inline]] nce float16_t min_reduce_min_add(float16_t a, float16x8_t b) { return vminnmavq_f16(a, b); }
350[[gnu::always_inline]] nce float32_t min_reduce_min_add(float32_t a, float32x4_t b) { return vminnmavq_f32(a, b); }
351[[gnu::always_inline]] nce float16_t min_reduce_min_add(float16_t a, float16x8_t b, mve_pred16_t p) { return vminnmavq_p_f16(a, b, p); }
352[[gnu::always_inline]] nce float32_t min_reduce_min_add(float32_t a, float32x4_t b, mve_pred16_t p) { return vminnmavq_p_f32(a, b, p); }
353[[gnu::always_inline]] nce float16_t max_reduce_max(float16_t a, float16x8_t b) { return vmaxnmvq_f16(a, b); }
354[[gnu::always_inline]] nce float32_t max_reduce_max(float32_t a, float32x4_t b) { return vmaxnmvq_f32(a, b); }
355[[gnu::always_inline]] nce float16_t max_reduce_max(float16_t a, float16x8_t b, mve_pred16_t p) { return vmaxnmvq_p_f16(a, b, p); }
356[[gnu::always_inline]] nce float32_t max_reduce_max(float32_t a, float32x4_t b, mve_pred16_t p) { return vmaxnmvq_p_f32(a, b, p); }
357[[gnu::always_inline]] nce float16_t max_reduce_max_add(float16_t a, float16x8_t b) { return vmaxnmavq_f16(a, b); }
358[[gnu::always_inline]] nce float32_t max_reduce_max_add(float32_t a, float32x4_t b) { return vmaxnmavq_f32(a, b); }
359[[gnu::always_inline]] nce float16_t max_reduce_max_add(float16_t a, float16x8_t b, mve_pred16_t p) { return vmaxnmavq_p_f16(a, b, p); }
360[[gnu::always_inline]] nce float32_t max_reduce_max_add(float32_t a, float32x4_t b, mve_pred16_t p) { return vmaxnmavq_p_f32(a, b, p); }
361[[gnu::always_inline]] inline float16x8x2_t load2(float16_t const *addr) { return vld2q_f16(addr); }
362[[gnu::always_inline]] inline float32x4x2_t load2(float32_t const *addr) { return vld2q_f32(addr); }
363[[gnu::always_inline]] inline float16x8x4_t load4(float16_t const *addr) { return vld4q_f16(addr); }
364[[gnu::always_inline]] inline float32x4x4_t load4(float32_t const *addr) { return vld4q_f32(addr); }
365[[gnu::always_inline]] inline float16x8_t load1(float16_t const *base, mve_pred16_t p) { return vld1q_z_f16(base, p); }
366[[gnu::always_inline]] inline float32x4_t load1(float32_t const *base, mve_pred16_t p) { return vld1q_z_f32(base, p); }
367[[gnu::always_inline]] nce float16x8_t load_halfword(float16_t const *base) { return vldrhq_f16(base); }
368[[gnu::always_inline]] nce float16x8_t load_halfword(float16_t const *base, mve_pred16_t p) { return vldrhq_z_f16(base, p); }
369[[gnu::always_inline]] nce float32x4_t load_word(float32_t const *base) { return vldrwq_f32(base); }
370[[gnu::always_inline]] nce float32x4_t load_word(float32_t const *base, mve_pred16_t p) { return vldrwq_z_f32(base, p); }
371[[gnu::always_inline]] nce float16x8_t load_halfword_gather_offset(float16_t const *base, uint16x8_t offset) { return vldrhq_gather_offset_f16(base, offset); }
372[[gnu::always_inline]] nce float16x8_t load_halfword_gather_offset(float16_t const *base, uint16x8_t offset, mve_pred16_t p) { return vldrhq_gather_offset_z_f16(base, offset, p); }
373[[gnu::always_inline]] nce float16x8_t load_halfword_gather_shifted_offset(float16_t const *base, uint16x8_t offset) { return vldrhq_gather_shifted_offset_f16(base, offset); }
374[[gnu::always_inline]] nce float16x8_t load_halfword_gather_shifted_offset(float16_t const *base, uint16x8_t offset, mve_pred16_t p) { return vldrhq_gather_shifted_offset_z_f16(base, offset, p); }
375[[gnu::always_inline]] nce float32x4_t load_word_gather_offset(float32_t const *base, uint32x4_t offset) { return vldrwq_gather_offset_f32(base, offset); }
376[[gnu::always_inline]] nce float32x4_t load_word_gather_offset(float32_t const *base, uint32x4_t offset, mve_pred16_t p) { return vldrwq_gather_offset_z_f32(base, offset, p); }
377[[gnu::always_inline]] nce float32x4_t load_word_gather_shifted_offset(float32_t const *base, uint32x4_t offset) { return vldrwq_gather_shifted_offset_f32(base, offset); }
378[[gnu::always_inline]] nce float32x4_t load_word_gather_shifted_offset(float32_t const *base, uint32x4_t offset, mve_pred16_t p) { return vldrwq_gather_shifted_offset_z_f32(base, offset, p); }
379[[gnu::always_inline]] inline void store2(float16_t *addr, float16x8x2_t value) { return vst2q_f16(addr, value); }
380[[gnu::always_inline]] inline void store2(float32_t *addr, float32x4x2_t value) { return vst2q_f32(addr, value); }
381[[gnu::always_inline]] inline void store4(float16_t *addr, float16x8x4_t value) { return vst4q_f16(addr, value); }
382[[gnu::always_inline]] inline void store4(float32_t *addr, float32x4x4_t value) { return vst4q_f32(addr, value); }
383[[gnu::always_inline]] inline void store1(float16_t *base, float16x8_t value, mve_pred16_t p) { return vst1q_p_f16(base, value, p); }
384[[gnu::always_inline]] inline void store1(float32_t *base, float32x4_t value, mve_pred16_t p) { return vst1q_p_f32(base, value, p); }
385[[gnu::always_inline]] nce void store_halfword(float16_t *base, float16x8_t value) { return vstrhq_f16(base, value); }
386[[gnu::always_inline]] nce void store_halfword(float16_t *base, float16x8_t value, mve_pred16_t p) { return vstrhq_p_f16(base, value, p); }
387[[gnu::always_inline]] nce void store_word(float32_t *base, float32x4_t value) { return vstrwq_f32(base, value); }
388[[gnu::always_inline]] nce void store_word(float32_t *base, float32x4_t value, mve_pred16_t p) { return vstrwq_p_f32(base, value, p); }
389[[gnu::always_inline]] nce void store_halfword_scatter_offset(float16_t *base, uint16x8_t offset, float16x8_t value) { return vstrhq_scatter_offset_f16(base, offset, value); }
390[[gnu::always_inline]] nce void store_halfword_scatter_offset(float16_t *base, uint16x8_t offset, float16x8_t value, mve_pred16_t p) { return vstrhq_scatter_offset_p_f16(base, offset, value, p); }
391[[gnu::always_inline]] nce void store_halfword_scatter_shifted_offset(float16_t *base, uint16x8_t offset, float16x8_t value) { return vstrhq_scatter_shifted_offset_f16(base, offset, value); }
392[[gnu::always_inline]] nce void store_halfword_scatter_shifted_offset(float16_t *base, uint16x8_t offset, float16x8_t value, mve_pred16_t p) { return vstrhq_scatter_shifted_offset_p_f16(base, offset, value, p); }
393[[gnu::always_inline]] nce void store_word_scatter_offset(float32_t *base, uint32x4_t offset, float32x4_t value) { return vstrwq_scatter_offset_f32(base, offset, value); }
394[[gnu::always_inline]] nce void store_word_scatter_offset(float32_t *base, uint32x4_t offset, float32x4_t value, mve_pred16_t p) { return vstrwq_scatter_offset_p_f32(base, offset, value, p); }
395[[gnu::always_inline]] nce void store_word_scatter_shifted_offset(float32_t *base, uint32x4_t offset, float32x4_t value) { return vstrwq_scatter_shifted_offset_f32(base, offset, value); }
396[[gnu::always_inline]] nce void store_word_scatter_shifted_offset(float32_t *base, uint32x4_t offset, float32x4_t value, mve_pred16_t p) { return vstrwq_scatter_shifted_offset_p_f32(base, offset, value, p); }
397// clang-format on
398} // namespace mve
399#endif
400#undef nce