Argon 0.1.0
Loading...
Searching...
No Matches
vfpv4_float.hpp
1#pragma once
2#include "vfpv3_float.hpp"
3
4#ifdef __ARM_FEATURE_MVE
5#define simd mve
6#else
7#define simd neon
8#endif
9
10#ifdef __cplusplus
11
12#ifdef __clang__
13#define nce constexpr
14#else
15#define nce inline
16#endif
17
18namespace simd {
19// clang-format off
20template <typename T> nce T reinterpret(uint64x2_t a);
21template <typename T> nce T reinterpret(float16x8_t a);
22template <typename T> nce T reinterpret(uint8x16_t a);
23template <typename T> nce T reinterpret(int8x16_t a);
24template <typename T> nce T reinterpret(uint16x8_t a);
25template <typename T> nce T reinterpret(int16x8_t a);
26template <typename T> nce T reinterpret(int32x4_t a);
27template <typename T> nce T reinterpret(uint32x4_t a);
28template <typename T> nce T reinterpret(float32x4_t a);
29template <typename T> nce T reinterpret(int64x2_t a);
30
31template <> [[gnu::always_inline]] nce float16x8_t reinterpret(uint64x2_t a) { return vreinterpretq_f16_u64(a); }
32template <> [[gnu::always_inline]] nce int8x16_t reinterpret(float16x8_t a) { return vreinterpretq_s8_f16(a); }
33template <> [[gnu::always_inline]] nce int16x8_t reinterpret(float16x8_t a) { return vreinterpretq_s16_f16(a); }
34template <> [[gnu::always_inline]] nce int32x4_t reinterpret(float16x8_t a) { return vreinterpretq_s32_f16(a); }
35template <> [[gnu::always_inline]] nce float32x4_t reinterpret(float16x8_t a) { return vreinterpretq_f32_f16(a); }
36template <> [[gnu::always_inline]] nce uint8x16_t reinterpret(float16x8_t a) { return vreinterpretq_u8_f16(a); }
37template <> [[gnu::always_inline]] nce uint16x8_t reinterpret(float16x8_t a) { return vreinterpretq_u16_f16(a); }
38template <> [[gnu::always_inline]] nce uint32x4_t reinterpret(float16x8_t a) { return vreinterpretq_u32_f16(a); }
39template <> [[gnu::always_inline]] nce uint64x2_t reinterpret(float16x8_t a) { return vreinterpretq_u64_f16(a); }
40template <> [[gnu::always_inline]] nce int64x2_t reinterpret(float16x8_t a) { return vreinterpretq_s64_f16(a); }
41template <> [[gnu::always_inline]] nce float16x8_t reinterpret(uint8x16_t a) { return vreinterpretq_f16_u8(a); }
42template <> [[gnu::always_inline]] nce float16x8_t reinterpret(int8x16_t a) { return vreinterpretq_f16_s8(a); }
43template <> [[gnu::always_inline]] nce float16x8_t reinterpret(uint16x8_t a) { return vreinterpretq_f16_u16(a); }
44template <> [[gnu::always_inline]] nce float16x8_t reinterpret(int16x8_t a) { return vreinterpretq_f16_s16(a); }
45template <> [[gnu::always_inline]] nce float16x8_t reinterpret(int32x4_t a) { return vreinterpretq_f16_s32(a); }
46template <> [[gnu::always_inline]] nce float16x8_t reinterpret(uint32x4_t a) { return vreinterpretq_f16_u32(a); }
47template <> [[gnu::always_inline]] nce float16x8_t reinterpret(float32x4_t a) { return vreinterpretq_f16_f32(a); }
48template <> [[gnu::always_inline]] nce float16x8_t reinterpret(int64x2_t a) { return vreinterpretq_f16_s64(a); }
49template <int lane>[[gnu::always_inline]] nce float16_t get_lane(float16x8_t v) { return vgetq_lane_f16(v, lane); }
50template <int lane>[[gnu::always_inline]] nce float16x8_t set_lane(float16_t a, float16x8_t v) { return vsetq_lane_f16(a, v, lane); }
51// clang-format on
52} // namespace simd
53#endif // __cplusplus
54#undef nce
55#undef simd