1#ifdef __ARM_FEATURE_FP8
2template <
typename T> nce T reinterpret(mfloat8x8_t a);
3template <
typename T> nce T reinterpret(mfloat8x16_t a);
4template <
typename T> nce T duplicate(mfloat8_t value);
5template <
typename T> nce T move(mfloat8_t value);
6template <
typename T> nce T duplicate(mfloat8x8_t vec);
7template <
typename T> nce T duplicate(mfloat8x16_t vec);
8template <
typename T> nce T get(mfloat8x16_t a);
9template <
typename T> nce T load1(mfloat8_t
const *ptr);
10template <
typename T> nce T load1_duplicate(mfloat8_t
const *ptr);
11template <
typename T> nce T load2(mfloat8_t
const *ptr);
12template <
typename T> nce T load4(mfloat8_t
const *ptr);
13template <
typename T> nce T load2_duplicate(mfloat8_t
const *ptr);
14template <
typename T> nce T load3_duplicate(mfloat8_t
const *ptr);
15template <
typename T> nce T load4_duplicate(mfloat8_t
const *ptr);
16template <
typename T> nce T load1_x2(mfloat8_t
const *ptr);
17template <
typename T> nce T load1_x3(mfloat8_t
const *ptr);
18template <
typename T> nce T load1_x4(mfloat8_t
const *ptr);
19template <
typename T> nce T store1(mfloat8_t *ptr, mfloat8x8_t val);
20template <
typename T> nce T store1(mfloat8_t *ptr, mfloat8x16_t val);
21template <
typename T> nce T store2(mfloat8_t *ptr, mfloat8x8x2_t val);
22template <
typename T> nce T store2(mfloat8_t *ptr, mfloat8x16x2_t val);
23template <
typename T> nce T store3(mfloat8_t *ptr, mfloat8x8x3_t val);
24template <
typename T> nce T store3(mfloat8_t *ptr, mfloat8x16x3_t val);
25template <
typename T> nce T store4(mfloat8_t *ptr, mfloat8x8x4_t val);
26template <
typename T> nce T store4(mfloat8_t *ptr, mfloat8x16x4_t val);
28[[gnu::always_inline]] nce float16x4_t scale_exponent(float16x4_t vn, int16x4_t vm) {
return vscale_f16(vn, vm); }
29[[gnu::always_inline]] nce float16x8_t scale_exponent(float16x8_t vn, int16x8_t vm) {
return vscaleq_f16(vn, vm); }
30[[gnu::always_inline]] nce float32x2_t scale_exponent(float32x2_t vn, int32x2_t vm) {
return vscale_f32(vn, vm); }
31[[gnu::always_inline]] nce float32x4_t scale_exponent(float32x4_t vn, int32x4_t vm) {
return vscaleq_f32(vn, vm); }
32[[gnu::always_inline]] nce float64x2_t scale_exponent(float64x2_t vn, int64x2_t vm) {
return vscaleq_f64(vn, vm); }
34[[gnu::always_inline]]
inline void store1_x4(int8_t *ptr, int8x8x4_t val) {
return vst1_mf8_x4(ptr, val); }
35[[gnu::always_inline]]
inline void store1_x4(int8_t *ptr, int8x16x4_t val) {
return vst1q_mf8_x4(ptr, val); }
36template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(uint8x8_t a) {
return vreinterpret_mf8_u8(a); }
37template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(uint8x16_t a) {
return vreinterpretq_mf8_u8(a); }
38template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(int8x8_t a) {
return vreinterpret_mf8_s8(a); }
39template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(int8x16_t a) {
return vreinterpretq_mf8_s8(a); }
40template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(uint16x4_t a) {
return vreinterpret_mf8_u16(a); }
41template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(uint16x8_t a) {
return vreinterpretq_mf8_u16(a); }
42template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(int16x4_t a) {
return vreinterpret_mf8_s16(a); }
43template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(int16x8_t a) {
return vreinterpretq_mf8_s16(a); }
44template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(int32x2_t a) {
return vreinterpret_mf8_s32(a); }
45template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(int32x4_t a) {
return vreinterpretq_mf8_s32(a); }
46template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(uint64x1_t a) {
return vreinterpret_mf8_u64(a); }
47[[gnu::always_inline]] nce mfloat8x16_t reinterpret(uint64x2_t a) {
return vreinterpretq_mf8_u64(a); }
48template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(uint32x2_t a) {
return vreinterpret_mf8_u32(a); }
49template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(uint32x4_t a) {
return vreinterpretq_mf8_u32(a); }
50template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(float16x4_t a) {
return vreinterpret_mf8_f16(a); }
51template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(float16x8_t a) {
return vreinterpretq_mf8_f16(a); }
52template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(float32x2_t a) {
return vreinterpret_mf8_f32(a); }
53template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(float32x4_t a) {
return vreinterpretq_mf8_f32(a); }
54template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(float64x1_t a) {
return vreinterpret_mf8_f64(a); }
55template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(float64x2_t a) {
return vreinterpretq_mf8_f64(a); }
56template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(poly16x4_t a) {
return vreinterpret_mf8_p16(a); }
57template <> [[gnu::always_inline]] nce int8x8_t reinterpret(mfloat8x8_t a) {
return vreinterpret_s8_mf8(a); }
58template <> [[gnu::always_inline]] nce int16x4_t reinterpret(mfloat8x8_t a) {
return vreinterpret_s16_mf8(a); }
59template <> [[gnu::always_inline]] nce int32x2_t reinterpret(mfloat8x8_t a) {
return vreinterpret_s32_mf8(a); }
60template <> [[gnu::always_inline]] nce float32x2_t reinterpret(mfloat8x8_t a) {
return vreinterpret_f32_mf8(a); }
61template <> [[gnu::always_inline]] nce uint8x8_t reinterpret(mfloat8x8_t a) {
return vreinterpret_u8_mf8(a); }
62template <> [[gnu::always_inline]] nce uint16x4_t reinterpret(mfloat8x8_t a) {
return vreinterpret_u16_mf8(a); }
63template <> [[gnu::always_inline]] nce uint32x2_t reinterpret(mfloat8x8_t a) {
return vreinterpret_u32_mf8(a); }
64template <> [[gnu::always_inline]] nce poly16x4_t reinterpret(mfloat8x8_t a) {
return vreinterpret_p16_mf8(a); }
65template <> [[gnu::always_inline]] nce uint64x1_t reinterpret(mfloat8x8_t a) {
return vreinterpret_u64_mf8(a); }
66template <> [[gnu::always_inline]] nce int64x1_t reinterpret(mfloat8x8_t a) {
return vreinterpret_s64_mf8(a); }
67template <> [[gnu::always_inline]] nce float64x1_t reinterpret(mfloat8x8_t a) {
return vreinterpret_f64_mf8(a); }
68template <> [[gnu::always_inline]] nce poly64x1_t reinterpret(mfloat8x8_t a) {
return vreinterpret_p64_mf8(a); }
69template <> [[gnu::always_inline]] nce float16x4_t reinterpret(mfloat8x8_t a) {
return vreinterpret_f16_mf8(a); }
70template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(int64x1_t a) {
return vreinterpret_mf8_s64(a); }
71template <> [[gnu::always_inline]] nce int8x16_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_s8_mf8(a); }
72template <> [[gnu::always_inline]] nce int16x8_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_s16_mf8(a); }
73template <> [[gnu::always_inline]] nce int32x4_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_s32_mf8(a); }
74template <> [[gnu::always_inline]] nce float32x4_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_f32_mf8(a); }
75template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_u8_mf8(a); }
76template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_u16_mf8(a); }
77template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_u32_mf8(a); }
78template <> [[gnu::always_inline]] nce poly16x8_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_p16_mf8(a); }
79template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_u64_mf8(a); }
80template <> [[gnu::always_inline]] nce int64x2_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_s64_mf8(a); }
81template <> [[gnu::always_inline]] nce float64x2_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_f64_mf8(a); }
82template <> [[gnu::always_inline]] nce poly64x2_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_p64_mf8(a); }
83template <> [[gnu::always_inline]] nce poly128_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_p128_mf8(a); }
84template <> [[gnu::always_inline]] nce float16x8_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_f16_mf8(a); }
85template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(poly16x8_t a) {
return vreinterpretq_mf8_p16(a); }
86template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(int64x2_t a) {
return vreinterpretq_mf8_s64(a); }
87template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(poly64x2_t a) {
return vreinterpretq_mf8_p64(a); }
88template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(poly128_t a) {
return vreinterpretq_mf8_p128(a); }
89template <
int lane1,
int lane2>[[gnu::always_inline]] nce mfloat8x8_t copy_lane(mfloat8x8_t a, mfloat8x8_t b) {
return vcopy_lane_mf8(a, lane1, b, lane2); }
90template <
int lane1,
int lane2>[[gnu::always_inline]] nce mfloat8x16_t copy_lane(mfloat8x16_t a, mfloat8x8_t b) {
return vcopyq_lane_mf8(a, lane1, b, lane2); }
91template <
int lane1,
int lane2>[[gnu::always_inline]] nce mfloat8x8_t copy_lane(mfloat8x8_t a, mfloat8x16_t b) {
return vcopy_laneq_mf8(a, lane1, b, lane2); }
92template <
int lane1,
int lane2>[[gnu::always_inline]] nce mfloat8x16_t copy_lane(mfloat8x16_t a, mfloat8x16_t b) {
return vcopyq_laneq_mf8(a, lane1, b, lane2); }
93template <> [[gnu::always_inline]] nce mfloat8x8_t create(uint64_t a) {
return vcreate_mf8(a); }
94template <> [[gnu::always_inline]] nce mfloat8x8_t duplicate(mfloat8_t value) {
return vdup_n_mf8(value); }
95template <> [[gnu::always_inline]] nce mfloat8x16_t duplicate(mfloat8_t value) {
return vdupq_n_mf8(value); }
96template <> [[gnu::always_inline]] nce mfloat8x8_t move(mfloat8_t value) {
return vmov_n_mf8(value); }
97template <> [[gnu::always_inline]] nce mfloat8x16_t move(mfloat8_t value) {
return vmovq_n_mf8(value); }
98template <
int lane>[[gnu::always_inline]] nce mfloat8x8_t duplicate_lane(mfloat8x8_t vec) {
return vdup_lane_mf8(vec, lane); }
99template <
int lane>[[gnu::always_inline]] nce mfloat8x16_t duplicate_lane(mfloat8x8_t vec) {
return vdupq_lane_mf8(vec, lane); }
100template <
int lane>[[gnu::always_inline]] nce mfloat8x8_t duplicate_lane(mfloat8x16_t vec) {
return vdup_laneq_mf8(vec, lane); }
101template <
int lane>[[gnu::always_inline]] nce mfloat8x16_t duplicate_lane(mfloat8x16_t vec) {
return vdupq_laneq_mf8(vec, lane); }
102[[gnu::always_inline]] nce mfloat8x16_t combine(mfloat8x8_t low, mfloat8x8_t high) {
return vcombine_mf8(low, high); }
103template <> [[gnu::always_inline]] nce mfloat8x8_t get_high(mfloat8x16_t a) {
return vget_high_mf8(a); }
104template <> [[gnu::always_inline]] nce mfloat8x8_t get_low(mfloat8x16_t a) {
return vget_low_mf8(a); }
105template <
int lane>[[gnu::always_inline]] nce mfloat8_t duplicate_lane(mfloat8x8_t vec) {
return vdupb_lane_mf8(vec, lane); }
106template <
int lane>[[gnu::always_inline]] nce mfloat8_t duplicate_lane(mfloat8x16_t vec) {
return vdupb_laneq_mf8(vec, lane); }
107template <
int n>[[gnu::always_inline]] nce mfloat8x8_t extract(mfloat8x8_t a, mfloat8x8_t b) {
return vext_mf8(a, b, n); }
108template <
int n>[[gnu::always_inline]] nce mfloat8x16_t extract(mfloat8x16_t a, mfloat8x16_t b) {
return vextq_mf8(a, b, n); }
109[[gnu::always_inline]] nce mfloat8x8_t reverse_64bit(mfloat8x8_t vec) {
return vrev64_mf8(vec); }
110[[gnu::always_inline]] nce mfloat8x16_t reverse_64bit(mfloat8x16_t vec) {
return vrev64q_mf8(vec); }
111[[gnu::always_inline]] nce mfloat8x8_t reverse_32bit(mfloat8x8_t vec) {
return vrev32_mf8(vec); }
112[[gnu::always_inline]] nce mfloat8x16_t reverse_32bit(mfloat8x16_t vec) {
return vrev32q_mf8(vec); }
113[[gnu::always_inline]] nce mfloat8x8_t reverse_16bit(mfloat8x8_t vec) {
return vrev16_mf8(vec); }
114[[gnu::always_inline]] nce mfloat8x16_t reverse_16bit(mfloat8x16_t vec) {
return vrev16q_mf8(vec); }
115[[gnu::always_inline]] nce mfloat8x8_t zip1(mfloat8x8_t a, mfloat8x8_t b) {
return vzip1_mf8(a, b); }
116[[gnu::always_inline]] nce mfloat8x16_t zip1(mfloat8x16_t a, mfloat8x16_t b) {
return vzip1q_mf8(a, b); }
117[[gnu::always_inline]] nce mfloat8x8_t zip2(mfloat8x8_t a, mfloat8x8_t b) {
return vzip2_mf8(a, b); }
118[[gnu::always_inline]] nce mfloat8x16_t zip2(mfloat8x16_t a, mfloat8x16_t b) {
return vzip2q_mf8(a, b); }
119[[gnu::always_inline]] nce mfloat8x8x2_t zip(mfloat8x8_t a, mfloat8x8_t b) {
return vzip_mf8(a, b); }
120[[gnu::always_inline]] nce mfloat8x16x2_t zip(mfloat8x16_t a, mfloat8x16_t b) {
return vzipq_mf8(a, b); }
121[[gnu::always_inline]] nce mfloat8x8_t unzip1(mfloat8x8_t a, mfloat8x8_t b) {
return vuzp1_mf8(a, b); }
122[[gnu::always_inline]] nce mfloat8x16_t unzip1(mfloat8x16_t a, mfloat8x16_t b) {
return vuzp1q_mf8(a, b); }
123[[gnu::always_inline]] nce mfloat8x8_t unzip2(mfloat8x8_t a, mfloat8x8_t b) {
return vuzp2_mf8(a, b); }
124[[gnu::always_inline]] nce mfloat8x16_t unzip2(mfloat8x16_t a, mfloat8x16_t b) {
return vuzp2q_mf8(a, b); }
125[[gnu::always_inline]] nce mfloat8x8x2_t unzip(mfloat8x8_t a, mfloat8x8_t b) {
return vuzp_mf8(a, b); }
126[[gnu::always_inline]] nce mfloat8x16x2_t unzip(mfloat8x16_t a, mfloat8x16_t b) {
return vuzpq_mf8(a, b); }
127[[gnu::always_inline]] nce mfloat8x8_t transpose_step_1(mfloat8x8_t a, mfloat8x8_t b) {
return vtrn1_mf8(a, b); }
128[[gnu::always_inline]] nce mfloat8x16_t transpose_step_1(mfloat8x16_t a, mfloat8x16_t b) {
return vtrn1q_mf8(a, b); }
129[[gnu::always_inline]] nce mfloat8x8_t transpose_step_2(mfloat8x8_t a, mfloat8x8_t b) {
return vtrn2_mf8(a, b); }
130[[gnu::always_inline]] nce mfloat8x16_t transpose_step_2(mfloat8x16_t a, mfloat8x16_t b) {
return vtrn2q_mf8(a, b); }
131[[gnu::always_inline]] nce mfloat8x8x2_t transpose(mfloat8x8_t a, mfloat8x8_t b) {
return vtrn_mf8(a, b); }
132[[gnu::always_inline]] nce mfloat8x16x2_t transpose(mfloat8x16_t a, mfloat8x16_t b) {
return vtrnq_mf8(a, b); }
133template <
int lane>[[gnu::always_inline]] nce mfloat8x8_t set_lane(mfloat8_t a, mfloat8x8_t v) {
return vset_lane_mf8(a, v, lane); }
134template <
int lane>[[gnu::always_inline]] nce mfloat8x16_t set_lane(mfloat8_t a, mfloat8x16_t v) {
return vsetq_lane_mf8(a, v, lane); }
135template <> [[gnu::always_inline]]
inline mfloat8x8_t load1(mfloat8_t
const *ptr) {
return vld1_mf8(ptr); }
136template <> [[gnu::always_inline]]
inline mfloat8x16_t load1(mfloat8_t
const *ptr) {
return vld1q_mf8(ptr); }
137template <
int lane>[[gnu::always_inline]] nce mfloat8x8_t load1_lane(mfloat8_t
const *ptr, mfloat8x8_t src) {
return vld1_lane_mf8(ptr, src, lane); }
138template <
int lane>[[gnu::always_inline]] nce mfloat8x16_t load1_lane(mfloat8_t
const *ptr, mfloat8x16_t src) {
return vld1q_lane_mf8(ptr, src, lane); }
139template <> [[gnu::always_inline]]
inline mfloat8x8_t load1_duplicate(mfloat8_t
const *ptr) {
return vld1_dup_mf8(ptr); }
140template <> [[gnu::always_inline]]
inline mfloat8x16_t load1_duplicate(mfloat8_t
const *ptr) {
return vld1q_dup_mf8(ptr); }
141template <> [[gnu::always_inline]]
inline mfloat8x8x2_t load2(mfloat8_t
const *ptr) {
return vld2_mf8(ptr); }
142template <> [[gnu::always_inline]]
inline mfloat8x16x2_t load2(mfloat8_t
const *ptr) {
return vld2q_mf8(ptr); }
143template <> [[gnu::always_inline]]
inline mfloat8x8x3_t load3(int8_t
const *ptr) {
return vld3_mf8(ptr); }
144template <> [[gnu::always_inline]]
inline mfloat8x16x3_t load3(int8_t
const *ptr) {
return vld3q_mf8(ptr); }
145template <> [[gnu::always_inline]]
inline mfloat8x8x4_t load4(mfloat8_t
const *ptr) {
return vld4_mf8(ptr); }
146template <> [[gnu::always_inline]]
inline mfloat8x16x4_t load4(mfloat8_t
const *ptr) {
return vld4q_mf8(ptr); }
147template <> [[gnu::always_inline]]
inline mfloat8x8x2_t load2_duplicate(mfloat8_t
const *ptr) {
return vld2_dup_mf8(ptr); }
148template <> [[gnu::always_inline]]
inline mfloat8x16x2_t load2_duplicate(mfloat8_t
const *ptr) {
return vld2q_dup_mf8(ptr); }
149template <> [[gnu::always_inline]]
inline mfloat8x8x3_t load3_duplicate(mfloat8_t
const *ptr) {
return vld3_dup_mf8(ptr); }
150template <> [[gnu::always_inline]]
inline mfloat8x16x3_t load3_duplicate(mfloat8_t
const *ptr) {
return vld3q_dup_mf8(ptr); }
151template <> [[gnu::always_inline]]
inline mfloat8x8x4_t load4_duplicate(mfloat8_t
const *ptr) {
return vld4_dup_mf8(ptr); }
152template <> [[gnu::always_inline]]
inline mfloat8x16x4_t load4_duplicate(mfloat8_t
const *ptr) {
return vld4q_dup_mf8(ptr); }
153template <
int lane>[[gnu::always_inline]] nce mfloat8x8x2_t load2_lane(mfloat8_t
const *ptr, mfloat8x8x2_t src) {
return vld2_lane_mf8(ptr, src, lane); }
154template <
int lane>[[gnu::always_inline]] nce mfloat8x16x2_t load2_lane(mfloat8_t
const *ptr, mfloat8x16x2_t src) {
return vld2q_lane_mf8(ptr, src, lane); }
155template <
int lane>[[gnu::always_inline]] nce mfloat8x8x3_t load3_lane(mfloat8_t
const *ptr, mfloat8x8x3_t src) {
return vld3_lane_mf8(ptr, src, lane); }
156template <
int lane>[[gnu::always_inline]] nce mfloat8x16x3_t load3_lane(mfloat8_t
const *ptr, mfloat8x16x3_t src) {
return vld3q_lane_mf8(ptr, src, lane); }
157template <
int lane>[[gnu::always_inline]] nce mfloat8x8x4_t load4_lane(mfloat8_t
const *ptr, mfloat8x8x4_t src) {
return vld4_lane_mf8(ptr, src, lane); }
158template <
int lane>[[gnu::always_inline]] nce mfloat8x16x4_t load4_lane(mfloat8_t
const *ptr, mfloat8x16x4_t src) {
return vld4q_lane_mf8(ptr, src, lane); }
159template <> [[gnu::always_inline]]
inline mfloat8x8x2_t load1_x2(mfloat8_t
const *ptr) {
return vld1_mf8_x2(ptr); }
160template <> [[gnu::always_inline]]
inline mfloat8x16x2_t load1_x2(mfloat8_t
const *ptr) {
return vld1q_mf8_x2(ptr); }
161template <> [[gnu::always_inline]]
inline mfloat8x8x3_t load1_x3(mfloat8_t
const *ptr) {
return vld1_mf8_x3(ptr); }
162template <> [[gnu::always_inline]]
inline mfloat8x16x3_t load1_x3(mfloat8_t
const *ptr) {
return vld1q_mf8_x3(ptr); }
163template <> [[gnu::always_inline]]
inline mfloat8x8x4_t load1_x4(mfloat8_t
const *ptr) {
return vld1_mf8_x4(ptr); }
164template <> [[gnu::always_inline]]
inline mfloat8x16x4_t load1_x4(mfloat8_t
const *ptr) {
return vld1q_mf8_x4(ptr); }
165template <> [[gnu::always_inline]]
inline void store1(mfloat8_t *ptr, mfloat8x8_t val) {
return vst1_mf8(ptr, val); }
166template <> [[gnu::always_inline]]
inline void store1(mfloat8_t *ptr, mfloat8x16_t val) {
return vst1q_mf8(ptr, val); }
167template <
int lane>[[gnu::always_inline]] nce
void store1_lane(mfloat8_t *ptr, mfloat8x8_t val) {
return vst1_lane_mf8(ptr, val, lane); }
168template <
int lane>[[gnu::always_inline]] nce
void store1_lane(mfloat8_t *ptr, mfloat8x16_t val) {
return vst1q_lane_mf8(ptr, val, lane); }
169template <> [[gnu::always_inline]]
inline void store2(mfloat8_t *ptr, mfloat8x8x2_t val) {
return vst2_mf8(ptr, val); }
170template <> [[gnu::always_inline]]
inline void store2(mfloat8_t *ptr, mfloat8x16x2_t val) {
return vst2q_mf8(ptr, val); }
171template <> [[gnu::always_inline]]
inline void store3(mfloat8_t *ptr, mfloat8x8x3_t val) {
return vst3_mf8(ptr, val); }
172template <> [[gnu::always_inline]]
inline void store3(mfloat8_t *ptr, mfloat8x16x3_t val) {
return vst3q_mf8(ptr, val); }
173template <> [[gnu::always_inline]]
inline void store4(mfloat8_t *ptr, mfloat8x8x4_t val) {
return vst4_mf8(ptr, val); }
174template <> [[gnu::always_inline]]
inline void store4(mfloat8_t *ptr, mfloat8x16x4_t val) {
return vst4q_mf8(ptr, val); }
175template <
int lane>[[gnu::always_inline]] nce
void store2_lane(mfloat8_t *ptr, mfloat8x8x2_t val) {
return vst2_lane_mf8(ptr, val, lane); }
176template <
int lane>[[gnu::always_inline]] nce
void store3_lane(mfloat8_t *ptr, mfloat8x8x3_t val) {
return vst3_lane_mf8(ptr, val, lane); }
177template <
int lane>[[gnu::always_inline]] nce
void store4_lane(mfloat8_t *ptr, mfloat8x8x4_t val) {
return vst4_lane_mf8(ptr, val, lane); }
178template <
int lane>[[gnu::always_inline]] nce
void store2_lane(mfloat8_t *ptr, mfloat8x16x2_t val) {
return vst2q_lane_mf8(ptr, val, lane); }
179template <
int lane>[[gnu::always_inline]] nce
void store3_lane(mfloat8_t *ptr, mfloat8x16x3_t val) {
return vst3q_lane_mf8(ptr, val, lane); }
180template <
int lane>[[gnu::always_inline]] nce
void store4_lane(mfloat8_t *ptr, mfloat8x16x4_t val) {
return vst4q_lane_mf8(ptr, val, lane); }
181[[gnu::always_inline]]
inline void store1_x2(mfloat8_t *ptr, mfloat8x8x2_t val) {
return vst1_mf8_x2(ptr, val); }
182[[gnu::always_inline]]
inline void store1_x2(mfloat8_t *ptr, mfloat8x16x2_t val) {
return vst1q_mf8_x2(ptr, val); }
183[[gnu::always_inline]]
inline void store1_x3(mfloat8_t *ptr, mfloat8x8x3_t val) {
return vst1_mf8_x3(ptr, val); }
184[[gnu::always_inline]]
inline void store1_x3(mfloat8_t *ptr, mfloat8x16x3_t val) {
return vst1q_mf8_x3(ptr, val); }
185[[gnu::always_inline]] nce mfloat8x8_t table_lookup1(mfloat8x8_t a, uint8x8_t idx) {
return vtbl1_mf8(a, idx); }
186[[gnu::always_inline]] nce mfloat8x8_t table_extend1(mfloat8x8_t a, mfloat8x8_t b, uint8x8_t idx) {
return vtbx1_mf8(a, b, idx); }
187[[gnu::always_inline]] nce mfloat8x8_t table_lookup2(mfloat8x8x2_t a, uint8x8_t idx) {
return vtbl2_mf8(a, idx); }
188[[gnu::always_inline]] nce mfloat8x8_t table_lookup3(mfloat8x8x3_t a, uint8x8_t idx) {
return vtbl3_mf8(a, idx); }
189[[gnu::always_inline]] nce mfloat8x8_t table_lookup4(mfloat8x8x4_t a, uint8x8_t idx) {
return vtbl4_mf8(a, idx); }
190[[gnu::always_inline]] nce mfloat8x8_t table_lookup1_saturate(mfloat8x16_t t, uint8x8_t idx) {
return vqtbl1_mf8(t, idx); }
191[[gnu::always_inline]] nce mfloat8x16_t table_lookup1_saturate(mfloat8x16_t t, uint8x16_t idx) {
return vqtbl1q_mf8(t, idx); }
192[[gnu::always_inline]] nce mfloat8x8_t table_lookup2_saturate(mfloat8x16x2_t t, uint8x8_t idx) {
return vqtbl2_mf8(t, idx); }
193[[gnu::always_inline]] nce mfloat8x16_t table_lookup2_saturate(mfloat8x16x2_t t, uint8x16_t idx) {
return vqtbl2q_mf8(t, idx); }
194[[gnu::always_inline]] nce mfloat8x8_t table_lookup3_saturate(mfloat8x16x3_t t, uint8x8_t idx) {
return vqtbl3_mf8(t, idx); }
195[[gnu::always_inline]] nce mfloat8x16_t table_lookup3_saturate(mfloat8x16x3_t t, uint8x16_t idx) {
return vqtbl3q_mf8(t, idx); }
196[[gnu::always_inline]] nce mfloat8x8_t table_lookup4_saturate(mfloat8x16x4_t t, uint8x8_t idx) {
return vqtbl4_mf8(t, idx); }
197[[gnu::always_inline]] nce mfloat8x16_t table_lookup4_saturate(mfloat8x16x4_t t, uint8x16_t idx) {
return vqtbl4q_mf8(t, idx); }
198[[gnu::always_inline]] nce mfloat8x8_t table_extend2(mfloat8x8_t a, mfloat8x8x2_t b, uint8x8_t idx) {
return vtbx2_mf8(a, b, idx); }
199[[gnu::always_inline]] nce mfloat8x8_t table_extend3(mfloat8x8_t a, mfloat8x8x3_t b, uint8x8_t idx) {
return vtbx3_mf8(a, b, idx); }
200[[gnu::always_inline]] nce mfloat8x8_t table_extend4(mfloat8x8_t a, mfloat8x8x4_t b, uint8x8_t idx) {
return vtbx4_mf8(a, b, idx); }
201[[gnu::always_inline]] nce mfloat8x8_t table_extend1_saturate(mfloat8x8_t a, mfloat8x16_t t, uint8x8_t idx) {
return vqtbx1_mf8(a, t, idx); }
202[[gnu::always_inline]] nce mfloat8x16_t table_extend1_saturate(mfloat8x16_t a, mfloat8x16_t t, uint8x16_t idx) {
return vqtbx1q_mf8(a, t, idx); }
203[[gnu::always_inline]] nce mfloat8x8_t table_extend2_saturate(mfloat8x8_t a, mfloat8x16x2_t t, uint8x8_t idx) {
return vqtbx2_mf8(a, t, idx); }
204[[gnu::always_inline]] nce mfloat8x16_t table_extend2_saturate(mfloat8x16_t a, mfloat8x16x2_t t, uint8x16_t idx) {
return vqtbx2q_mf8(a, t, idx); }
205[[gnu::always_inline]] nce mfloat8x8_t table_extend3_saturate(mfloat8x8_t a, mfloat8x16x3_t t, uint8x8_t idx) {
return vqtbx3_mf8(a, t, idx); }
206[[gnu::always_inline]] nce mfloat8x16_t table_extend3_saturate(mfloat8x16_t a, mfloat8x16x3_t t, uint8x16_t idx) {
return vqtbx3q_mf8(a, t, idx); }
207[[gnu::always_inline]] nce mfloat8x8_t table_extend4_saturate(mfloat8x8_t a, mfloat8x16x4_t t, uint8x8_t idx) {
return vqtbx4_mf8(a, t, idx); }
208[[gnu::always_inline]] nce mfloat8x16_t table_extend4_saturate(mfloat8x16_t a, mfloat8x16x4_t t, uint8x16_t idx) {
return vqtbx4q_mf8(a, t, idx); }
209template <> [[gnu::always_inline]] nce bfloat16x4_t reinterpret(mfloat8x8_t a) {
return vreinterpret_bf16_mf8(a); }
210template <> [[gnu::always_inline]] nce bfloat16x8_t reinterpret(mfloat8x16_t a) {
return vreinterpretq_bf16_mf8(a); }
211template <> [[gnu::always_inline]] nce mfloat8x8_t reinterpret(bfloat16x4_t a) {
return vreinterpret_mf8_bf16(a); }
212template <> [[gnu::always_inline]] nce mfloat8x16_t reinterpret(bfloat16x8_t a) {
return vreinterpretq_mf8_bf16(a); }