26 namespace seqan3::detail
31 template <simd::simd_concept simd_t>
32 constexpr simd_t load_avx2(
void const * mem_addr);
37 template <simd::simd_concept simd_t>
38 constexpr
void store_avx2(
void * mem_addr, simd_t
const & simd_vec);
43 template <simd::simd_concept simd_t>
44 inline void transpose_matrix_avx2(
std::array<simd_t, simd_traits<simd_t>::length> & matrix);
49 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
50 constexpr target_simd_t upcast_signed_avx2(source_simd_t
const & src);
55 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
56 constexpr target_simd_t upcast_unsigned_avx2(source_simd_t
const & src);
61 template <u
int8_t index, simd::simd_concept simd_t>
62 constexpr simd_t extract_half_avx2(simd_t
const & src);
67 template <u
int8_t index, simd::simd_concept simd_t>
68 constexpr simd_t extract_quarter_avx2(simd_t
const & src);
73 template <u
int8_t index, simd::simd_concept simd_t>
74 constexpr simd_t extract_eighth_avx2(simd_t
const & src);
84 namespace seqan3::detail
87 template <simd::simd_concept simd_t>
88 constexpr simd_t load_avx2(
void const * mem_addr)
90 return reinterpret_cast<simd_t
>(_mm256_loadu_si256(
reinterpret_cast<__m256i
const *
>(mem_addr)));
93 template <simd::simd_concept simd_t>
94 constexpr
void store_avx2(
void * mem_addr, simd_t
const & simd_vec)
96 _mm256_storeu_si256(
reinterpret_cast<__m256i *
>(mem_addr),
reinterpret_cast<__m256i
const &
>(simd_vec));
99 template <simd::simd_concept simd_t>
100 inline void transpose_matrix_avx2(
std::array<simd_t, simd_traits<simd_t>::length> & matrix)
103 auto _mm256_unpacklo_epi128 = [] (__m256i
const & a, __m256i
const & b)
105 return _mm256_permute2x128_si256(a, b, 0x20);
108 auto _mm256_unpackhi_epi128 = [] (__m256i
const & a, __m256i
const & b)
110 return _mm256_permute2x128_si256(a, b, 0x31);
114 static const uint8_t bit_rev[] = { 0, 8, 4,12, 2,10, 6,14, 1, 9, 5,13, 3,11, 7,15,
115 16,24,20,28,18,26,22,30,17,25,21,29,19,27,23,31};
119 for (
int i = 0; i < 16; ++i)
121 tmp1[i] = _mm256_unpacklo_epi8(
122 reinterpret_cast<const __m256i &
>(matrix[2*i]),
123 reinterpret_cast<const __m256i &
>(matrix[2*i+1])
125 tmp1[i+16] = _mm256_unpackhi_epi8(
126 reinterpret_cast<const __m256i &
>(matrix[2*i]),
127 reinterpret_cast<const __m256i &
>(matrix[2*i+1])
131 for (
int i = 0; i < 16; ++i)
133 tmp2[i] = _mm256_unpacklo_epi16(tmp1[2*i], tmp1[2*i+1]);
134 tmp2[i+16] = _mm256_unpackhi_epi16(tmp1[2*i], tmp1[2*i+1]);
136 for (
int i = 0; i < 16; ++i)
138 tmp1[i] = _mm256_unpacklo_epi32(tmp2[2*i], tmp2[2*i+1]);
139 tmp1[i+16] = _mm256_unpackhi_epi32(tmp2[2*i], tmp2[2*i+1]);
141 for (
int i = 0; i < 16; ++i)
143 tmp2[i] = _mm256_unpacklo_epi64(tmp1[2*i], tmp1[2*i+1]);
144 tmp2[i+16] = _mm256_unpackhi_epi64(tmp1[2*i], tmp1[2*i+1]);
146 for (
int i = 0; i < 16; ++i)
148 matrix[bit_rev[i]] =
reinterpret_cast<simd_t
>(_mm256_unpacklo_epi128(tmp2[2*i],tmp2[2*i+1]));
149 matrix[bit_rev[i+16]] =
reinterpret_cast<simd_t
>(_mm256_unpackhi_epi128(tmp2[2*i],tmp2[2*i+1]));
153 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
154 constexpr target_simd_t upcast_signed_avx2(source_simd_t
const & src)
156 __m128i
const & tmp = _mm256_castsi256_si128(
reinterpret_cast<__m256i
const &
>(src));
157 if constexpr (simd_traits<source_simd_t>::length == 32)
159 if constexpr (simd_traits<target_simd_t>::length == 16)
160 return reinterpret_cast<target_simd_t>(_mm256_cvtepi8_epi16(tmp));
161 if constexpr (simd_traits<target_simd_t>::length == 8)
162 return reinterpret_cast<target_simd_t>(_mm256_cvtepi8_epi32(tmp));
163 if constexpr (simd_traits<target_simd_t>::length == 4)
164 return reinterpret_cast<target_simd_t>(_mm256_cvtepi8_epi64(tmp));
166 else if constexpr (simd_traits<source_simd_t>::length == 16)
168 if constexpr (simd_traits<target_simd_t>::length == 8)
169 return reinterpret_cast<target_simd_t>(_mm256_cvtepi16_epi32(tmp));
170 if constexpr (simd_traits<target_simd_t>::length == 4)
171 return reinterpret_cast<target_simd_t>(_mm256_cvtepi16_epi64(tmp));
175 static_assert(simd_traits<source_simd_t>::length == 8,
"Expected 32 bit scalar type.");
176 return reinterpret_cast<target_simd_t
>(_mm256_cvtepi32_epi64(tmp));
180 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
181 constexpr target_simd_t upcast_unsigned_avx2(source_simd_t
const & src)
183 __m128i
const & tmp = _mm256_castsi256_si128(
reinterpret_cast<__m256i
const &
>(src));
184 if constexpr (simd_traits<source_simd_t>::length == 32)
186 if constexpr (simd_traits<target_simd_t>::length == 16)
187 return reinterpret_cast<target_simd_t>(_mm256_cvtepu8_epi16(tmp));
188 if constexpr (simd_traits<target_simd_t>::length == 8)
189 return reinterpret_cast<target_simd_t>(_mm256_cvtepu8_epi32(tmp));
190 if constexpr (simd_traits<target_simd_t>::length == 4)
191 return reinterpret_cast<target_simd_t>(_mm256_cvtepu8_epi64(tmp));
193 else if constexpr (simd_traits<source_simd_t>::length == 16)
195 if constexpr (simd_traits<target_simd_t>::length == 8)
196 return reinterpret_cast<target_simd_t>(_mm256_cvtepu16_epi32(tmp));
197 if constexpr (simd_traits<target_simd_t>::length == 4)
198 return reinterpret_cast<target_simd_t>(_mm256_cvtepu16_epi64(tmp));
202 static_assert(simd_traits<source_simd_t>::length == 8,
"Expected 32 bit scalar type.");
203 return reinterpret_cast<target_simd_t
>(_mm256_cvtepu32_epi64(tmp));
207 template <u
int8_t index, simd::simd_concept simd_t>
208 constexpr simd_t extract_half_avx2(simd_t
const & src)
210 return reinterpret_cast<simd_t
>(_mm256_castsi128_si256(
211 _mm256_extracti128_si256(
reinterpret_cast<__m256i
const &
>(src), index)));
214 template <u
int8_t index, simd::simd_concept simd_t>
215 constexpr simd_t extract_quarter_avx2(simd_t
const & src)
217 return reinterpret_cast<simd_t
>(_mm256_castsi128_si256(
218 _mm_cvtsi64x_si128(_mm256_extract_epi64(
reinterpret_cast<__m256i
const &
>(src), index))));
221 template <u
int8_t index, simd::simd_concept simd_t>
222 constexpr simd_t extract_eighth_avx2(simd_t
const & src)
224 return reinterpret_cast<simd_t
>(_mm256_castsi128_si256(
225 _mm_cvtsi32_si128(_mm256_extract_epi32(
reinterpret_cast<__m256i
const &
>(src), index))));
Provides seqan3::simd::simd_concept.
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
Provides intrinsics include for builtin simd.
Provides seqan3::simd::simd_traits.