28template <simd::simd_concept simd_t>
34template <simd::simd_concept simd_t>
35constexpr void store_avx512(
void * mem_addr, simd_t
const & simd_vec);
40template <simd::simd_concept simd_t>
46template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
52template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
58template <u
int8_t index, simd::simd_concept simd_t>
64template <u
int8_t index, simd::simd_concept simd_t>
70template <u
int8_t index, simd::simd_concept simd_t>
84template <simd::simd_concept simd_t>
87 return reinterpret_cast<simd_t
>(_mm512_loadu_si512(mem_addr));
90template <simd::simd_concept simd_t>
91constexpr void store_avx512(
void * mem_addr, simd_t
const & simd_vec)
93 _mm512_storeu_si512(mem_addr,
reinterpret_cast<__m512i
const &
>(simd_vec));
96# if defined(__AVX512BW__)
97template <simd::simd_concept simd_t>
115 for (
int i = 0; i < 32; ++i)
117 tmp1[i] = _mm512_unpacklo_epi8(
reinterpret_cast<__m512i
const &
>(matrix[2 * i]),
118 reinterpret_cast<__m512i
const &
>(matrix[2 * i + 1]));
119 tmp1[i + 32] = _mm512_unpackhi_epi8(
reinterpret_cast<__m512i
const &
>(matrix[2 * i]),
120 reinterpret_cast<__m512i
const &
>(matrix[2 * i + 1]));
125 for (
int i = 0; i < 32; ++i)
127 tmp2[i] = _mm512_unpacklo_epi16(tmp1[2 * i], tmp1[2 * i + 1]);
128 tmp2[i + 32] = _mm512_unpackhi_epi16(tmp1[2 * i], tmp1[2 * i + 1]);
131 for (
int i = 0; i < 32; ++i)
133 tmp1[i] = _mm512_unpacklo_epi32(tmp2[2 * i], tmp2[2 * i + 1]);
134 tmp1[i + 32] = _mm512_unpackhi_epi32(tmp2[2 * i], tmp2[2 * i + 1]);
137 for (
int i = 0; i < 32; ++i)
139 tmp2[i] = _mm512_unpacklo_epi64(tmp1[2 * i], tmp1[2 * i + 1]);
140 tmp2[i + 32] = _mm512_unpackhi_epi64(tmp1[2 * i], tmp1[2 * i + 1]);
145 auto _mm512_unpacklo_epi128 = [](__m512i
const & a, __m512i
const & b)
148 return _mm512_permutex2var_epi64(a,
reinterpret_cast<__m512i
const &
>(*lo_mask.data()), b);
151 auto _mm521_unpackhi_epi128 = [](__m512i
const & a, __m512i
const & b)
154 return _mm512_permutex2var_epi64(a,
reinterpret_cast<__m512i
const &
>(*hi_mask.data()), b);
157 for (
int i = 0; i < 32; ++i)
159 tmp1[i] = _mm512_unpacklo_epi128(tmp2[2 * i], tmp2[2 * i + 1]);
160 tmp1[i + 32] = _mm521_unpackhi_epi128(tmp2[2 * i], tmp2[2 * i + 1]);
164 auto _mm512_unpacklo_epi256 = [](__m512i
const & a, __m512i
const & b)
166 return _mm512_shuffle_i64x2(a, b, 0b0100'0100);
169 auto _mm521_unpackhi_epi256 = [](__m512i
const & a, __m512i
const & b)
171 return _mm512_shuffle_i64x2(a, b, 0b1110'1110);
179 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26, 6, 22, 14, 30,
181 1, 17, 9, 25, 5, 21, 13, 29, 3, 19, 11, 27, 7, 23, 15, 31,
183 32, 48, 40, 56, 36, 52, 44, 60, 34, 50, 42, 58, 38, 54, 46, 62,
185 33, 49, 41, 57, 37, 53, 45, 61, 35, 51, 43, 59, 39, 55, 47, 63};
188 for (
int i = 0; i < 32; ++i)
190 int const idx = i * 2;
191 matrix[reverse_idx_mask[idx]] =
reinterpret_cast<simd_t
>(_mm512_unpacklo_epi256(tmp1[idx], tmp1[idx + 1]));
192 matrix[reverse_idx_mask[idx + 1]] =
reinterpret_cast<simd_t
>(_mm521_unpackhi_epi256(tmp1[idx], tmp1[idx + 1]));
197template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
200 __m512i
const & tmp =
reinterpret_cast<__m512i
const &
>(src);
201 if constexpr (simd_traits<source_simd_t>::length == 64)
203 if constexpr (simd_traits<target_simd_t>::length == 32)
204 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi8_epi16(_mm512_castsi512_si256(tmp)));
205 if constexpr (simd_traits<target_simd_t>::length == 16)
206 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi8_epi32(_mm512_castsi512_si128(tmp)));
207 if constexpr (simd_traits<target_simd_t>::length == 8)
208 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi8_epi64(_mm512_castsi512_si128(tmp)));
210 else if constexpr (simd_traits<source_simd_t>::length == 32)
212 if constexpr (simd_traits<target_simd_t>::length == 16)
213 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi16_epi32(_mm512_castsi512_si256(tmp)));
214 if constexpr (simd_traits<target_simd_t>::length == 8)
215 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi16_epi64(_mm512_castsi512_si128(tmp)));
219 static_assert(simd_traits<source_simd_t>::length == 16,
"Expected 32 bit scalar type.");
220 return reinterpret_cast<target_simd_t
>(_mm512_cvtepi32_epi64(_mm512_castsi512_si256(tmp)));
224template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
227 __m512i
const & tmp =
reinterpret_cast<__m512i
const &
>(src);
228 if constexpr (simd_traits<source_simd_t>::length == 64)
230 if constexpr (simd_traits<target_simd_t>::length == 32)
231 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu8_epi16(_mm512_castsi512_si256(tmp)));
232 if constexpr (simd_traits<target_simd_t>::length == 16)
233 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu8_epi32(_mm512_castsi512_si128(tmp)));
234 if constexpr (simd_traits<target_simd_t>::length == 8)
235 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu8_epi64(_mm512_castsi512_si128(tmp)));
237 else if constexpr (simd_traits<source_simd_t>::length == 32)
239 if constexpr (simd_traits<target_simd_t>::length == 16)
240 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu16_epi32(_mm512_castsi512_si256(tmp)));
241 if constexpr (simd_traits<target_simd_t>::length == 8)
242 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu16_epi64(_mm512_castsi512_si128(tmp)));
246 static_assert(simd_traits<source_simd_t>::length == 16,
"Expected 32 bit scalar type.");
247 return reinterpret_cast<target_simd_t
>(_mm512_cvtepu32_epi64(_mm512_castsi512_si256(tmp)));
251template <u
int8_t index, simd::simd_concept simd_t>
254 return reinterpret_cast<simd_t
>(
255 _mm512_castsi256_si512(_mm512_extracti64x4_epi64(
reinterpret_cast<__m512i
const &
>(src), index)));
258# if defined(__AVX512DQ__)
259template <u
int8_t index, simd::simd_concept simd_t>
262 return reinterpret_cast<simd_t
>(
263 _mm512_castsi128_si512(_mm512_extracti64x2_epi64(
reinterpret_cast<__m512i
const &
>(src), index)));
266template <u
int8_t index, simd::simd_concept simd_t>
269 __m512i tmp =
reinterpret_cast<__m512i
const &
>(src);
272 if constexpr (index % 2 == 1)
273 tmp = _mm512_shuffle_epi32(tmp,
static_cast<_MM_PERM_ENUM
>(0b0100'1110));
275 return reinterpret_cast<simd_t
>(_mm512_castsi128_si512(_mm512_extracti64x2_epi64(tmp, index / 2)));
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
Provides intrinsics include for builtin simd.
Defines the requirements of a matrix (e.g. score matrices, trace matrices).
Definition matrix_concept.hpp:52
The internal SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
void transpose_matrix_avx512(std::array< simd_t, simd_traits< simd_t >::length > &matrix)
Transposes the given simd vector matrix.
constexpr simd_t extract_half_avx512(simd_t const &src)
Extracts one half of the given simd vector and stores the result in the lower half of the target vect...
constexpr simd_t load_avx512(void const *mem_addr)
Load simd_t size bits of integral data from memory.
constexpr target_simd_t upcast_signed_avx512(source_simd_t const &src)
Upcasts the given vector into the target vector using signed extension of packed values.
constexpr void store_avx512(void *mem_addr, simd_t const &simd_vec)
Store simd_t size bits of integral data into memory.
constexpr simd_t extract_quarter_avx512(simd_t const &src)
Extracts one quarter of the given simd vector and stores it in the lower quarter of the target vector...
constexpr target_simd_t upcast_unsigned_avx512(source_simd_t const &src)
Upcasts the given vector into the target vector using unsigned extension of packed values.
constexpr simd_t extract_eighth_avx512(simd_t const &src)
Extracts one eighth of the given simd vector and stores it in the lower eighth of the target vector.
Provides seqan3::simd::simd_traits.
seqan3::simd::simd_traits is the trait class that provides uniform interface to the properties of sim...
Definition simd_traits.hpp:38
Provides seqan3::simd::simd_concept.