26namespace seqan3::detail
31template <simd::simd_concept simd_t>
32constexpr simd_t load_avx2(
void const * mem_addr);
37template <simd::simd_concept simd_t>
38constexpr void store_avx2(
void * mem_addr, simd_t
const & simd_vec);
43template <simd::simd_concept simd_t>
44inline void transpose_matrix_avx2(
std::array<simd_t, simd_traits<simd_t>::length> & matrix);
49template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
50constexpr target_simd_t upcast_signed_avx2(source_simd_t
const & src);
55template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
56constexpr target_simd_t upcast_unsigned_avx2(source_simd_t
const & src);
61template <u
int8_t index, simd::simd_concept simd_t>
62constexpr simd_t extract_half_avx2(simd_t
const & src);
67template <u
int8_t index, simd::simd_concept simd_t>
68constexpr simd_t extract_quarter_avx2(simd_t
const & src);
73template <u
int8_t index, simd::simd_concept simd_t>
74constexpr simd_t extract_eighth_avx2(simd_t
const & src);
84namespace seqan3::detail
87template <simd::simd_concept simd_t>
88constexpr simd_t load_avx2(
void const * mem_addr)
90 return reinterpret_cast<simd_t
>(_mm256_loadu_si256(
reinterpret_cast<__m256i
const *
>(mem_addr)));
93template <simd::simd_concept simd_t>
94constexpr void store_avx2(
void * mem_addr, simd_t
const & simd_vec)
96 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(mem_addr),
reinterpret_cast<__m256i
const &
>(simd_vec));
99template <simd::simd_concept simd_t>
100inline void transpose_matrix_avx2(
std::array<simd_t, simd_traits<simd_t>::length> & matrix)
103 auto _mm256_unpacklo_epi128 = [](__m256i
const & a, __m256i
const & b)
105 return _mm256_permute2x128_si256(a, b, 0x20);
108 auto _mm256_unpackhi_epi128 = [](__m256i
const & a, __m256i
const & b)
110 return _mm256_permute2x128_si256(a, b, 0x31);
114 static const uint8_t bit_rev[] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15,
115 16, 24, 20, 28, 18, 26, 22, 30, 17, 25, 21, 29, 19, 27, 23, 31};
119 for (
int i = 0; i < 16; ++i)
121 tmp1[i] = _mm256_unpacklo_epi8(
reinterpret_cast<__m256i
const &
>(matrix[2 * i]),
122 reinterpret_cast<__m256i
const &
>(matrix[2 * i + 1]));
123 tmp1[i + 16] = _mm256_unpackhi_epi8(
reinterpret_cast<__m256i
const &
>(matrix[2 * i]),
124 reinterpret_cast<__m256i
const &
>(matrix[2 * i + 1]));
127 for (
int i = 0; i < 16; ++i)
129 tmp2[i] = _mm256_unpacklo_epi16(tmp1[2 * i], tmp1[2 * i + 1]);
130 tmp2[i + 16] = _mm256_unpackhi_epi16(tmp1[2 * i], tmp1[2 * i + 1]);
132 for (
int i = 0; i < 16; ++i)
134 tmp1[i] = _mm256_unpacklo_epi32(tmp2[2 * i], tmp2[2 * i + 1]);
135 tmp1[i + 16] = _mm256_unpackhi_epi32(tmp2[2 * i], tmp2[2 * i + 1]);
137 for (
int i = 0; i < 16; ++i)
139 tmp2[i] = _mm256_unpacklo_epi64(tmp1[2 * i], tmp1[2 * i + 1]);
140 tmp2[i + 16] = _mm256_unpackhi_epi64(tmp1[2 * i], tmp1[2 * i + 1]);
142 for (
int i = 0; i < 16; ++i)
144 matrix[bit_rev[i]] =
reinterpret_cast<simd_t
>(_mm256_unpacklo_epi128(tmp2[2 * i], tmp2[2 * i + 1]));
145 matrix[bit_rev[i + 16]] =
reinterpret_cast<simd_t
>(_mm256_unpackhi_epi128(tmp2[2 * i], tmp2[2 * i + 1]));
149template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
150constexpr target_simd_t upcast_signed_avx2(source_simd_t
const & src)
152 __m128i
const & tmp = _mm256_castsi256_si128(
reinterpret_cast<__m256i
const &
>(src));
153 if constexpr (simd_traits<source_simd_t>::length == 32)
155 if constexpr (simd_traits<target_simd_t>::length == 16)
156 return reinterpret_cast<target_simd_t
>(_mm256_cvtepi8_epi16(tmp));
157 if constexpr (simd_traits<target_simd_t>::length == 8)
158 return reinterpret_cast<target_simd_t
>(_mm256_cvtepi8_epi32(tmp));
159 if constexpr (simd_traits<target_simd_t>::length == 4)
160 return reinterpret_cast<target_simd_t
>(_mm256_cvtepi8_epi64(tmp));
162 else if constexpr (simd_traits<source_simd_t>::length == 16)
164 if constexpr (simd_traits<target_simd_t>::length == 8)
165 return reinterpret_cast<target_simd_t
>(_mm256_cvtepi16_epi32(tmp));
166 if constexpr (simd_traits<target_simd_t>::length == 4)
167 return reinterpret_cast<target_simd_t
>(_mm256_cvtepi16_epi64(tmp));
171 static_assert(simd_traits<source_simd_t>::length == 8,
"Expected 32 bit scalar type.");
172 return reinterpret_cast<target_simd_t
>(_mm256_cvtepi32_epi64(tmp));
176template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
177constexpr target_simd_t upcast_unsigned_avx2(source_simd_t
const & src)
179 __m128i
const & tmp = _mm256_castsi256_si128(
reinterpret_cast<__m256i
const &
>(src));
180 if constexpr (simd_traits<source_simd_t>::length == 32)
182 if constexpr (simd_traits<target_simd_t>::length == 16)
183 return reinterpret_cast<target_simd_t
>(_mm256_cvtepu8_epi16(tmp));
184 if constexpr (simd_traits<target_simd_t>::length == 8)
185 return reinterpret_cast<target_simd_t
>(_mm256_cvtepu8_epi32(tmp));
186 if constexpr (simd_traits<target_simd_t>::length == 4)
187 return reinterpret_cast<target_simd_t
>(_mm256_cvtepu8_epi64(tmp));
189 else if constexpr (simd_traits<source_simd_t>::length == 16)
191 if constexpr (simd_traits<target_simd_t>::length == 8)
192 return reinterpret_cast<target_simd_t
>(_mm256_cvtepu16_epi32(tmp));
193 if constexpr (simd_traits<target_simd_t>::length == 4)
194 return reinterpret_cast<target_simd_t
>(_mm256_cvtepu16_epi64(tmp));
198 static_assert(simd_traits<source_simd_t>::length == 8,
"Expected 32 bit scalar type.");
199 return reinterpret_cast<target_simd_t
>(_mm256_cvtepu32_epi64(tmp));
203template <u
int8_t index, simd::simd_concept simd_t>
204constexpr simd_t extract_half_avx2(simd_t
const & src)
206 return reinterpret_cast<simd_t
>(
207 _mm256_castsi128_si256(_mm256_extracti128_si256(
reinterpret_cast<__m256i
const &
>(src), index)));
210template <u
int8_t index, simd::simd_concept simd_t>
211constexpr simd_t extract_quarter_avx2(simd_t
const & src)
213 return reinterpret_cast<simd_t
>(_mm256_castsi128_si256(
214 _mm_cvtsi64x_si128(_mm256_extract_epi64(
reinterpret_cast<__m256i
const &
>(src), index))));
217template <u
int8_t index, simd::simd_concept simd_t>
218constexpr simd_t extract_eighth_avx2(simd_t
const & src)
220 return reinterpret_cast<simd_t
>(
221 _mm256_castsi128_si256(_mm_cvtsi32_si128(_mm256_extract_epi32(
reinterpret_cast<__m256i
const &
>(src), index))));
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
Provides intrinsics include for builtin simd.
Provides seqan3::simd::simd_traits.
Provides seqan3::simd::simd_concept.