26namespace seqan3::detail
31template <simd::simd_concept simd_t>
32constexpr simd_t load_sse4(
void const * mem_addr);
37template <simd::simd_concept simd_t>
38constexpr void store_sse4(
void * mem_addr, simd_t
const & simd_vec);
43template <simd::simd_concept simd_t>
44inline void transpose_matrix_sse4(
std::array<simd_t, simd_traits<simd_t>::length> & matrix);
49template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
50constexpr target_simd_t upcast_signed_sse4(source_simd_t
const & src);
55template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
56constexpr target_simd_t upcast_unsigned_sse4(source_simd_t
const & src);
61template <u
int8_t index, simd::simd_concept simd_t>
62constexpr simd_t extract_half_sse4(simd_t
const & src);
67template <u
int8_t index, simd::simd_concept simd_t>
68constexpr simd_t extract_quarter_sse4(simd_t
const & src);
73template <u
int8_t index, simd::simd_concept simd_t>
74constexpr simd_t extract_eighth_sse4(simd_t
const & src);
84namespace seqan3::detail
87template <simd::simd_concept simd_t>
88constexpr simd_t load_sse4(
void const * mem_addr)
90 return reinterpret_cast<simd_t
>(_mm_loadu_si128(
reinterpret_cast<__m128i
const *
>(mem_addr)));
93template <simd::simd_concept simd_t>
94constexpr void store_sse4(
void * mem_addr, simd_t
const & simd_vec)
96 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(mem_addr),
reinterpret_cast<__m128i
const &
>(simd_vec));
99template <simd::simd_concept simd_t>
100inline void transpose_matrix_sse4(
std::array<simd_t, simd_traits<simd_t>::length> & matrix)
102 static_assert(simd_traits<simd_t>::length == simd_traits<simd_t>::max_length,
"Expects byte scalar type.");
103 static_assert(is_native_builtin_simd_v<simd_t>,
"The passed simd vector is not a native SSE4 simd vector type.");
104 static_assert(is_builtin_simd_v<simd_t>,
"The passed simd vector is not a builtin vector type.");
108 constexpr std::array<char, 16> bit_reverse{0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
118 for (
int i = 0; i < 8; ++i)
120 tmp1[i] = _mm_unpacklo_epi8(
reinterpret_cast<__m128i &
>(matrix[2 * i]),
121 reinterpret_cast<__m128i &
>(matrix[2 * i + 1]));
122 tmp1[i + 8] = _mm_unpackhi_epi8(
reinterpret_cast<__m128i &
>(matrix[2 * i]),
123 reinterpret_cast<__m128i &
>(matrix[2 * i + 1]));
133 for (
int i = 0; i < 8; ++i)
135 tmp2[i] = _mm_unpacklo_epi16(tmp1[2 * i], tmp1[2 * i + 1]);
136 tmp2[i + 8] = _mm_unpackhi_epi16(tmp1[2 * i], tmp1[2 * i + 1]);
150 for (
int i = 0; i < 8; ++i)
152 tmp1[i] = _mm_unpacklo_epi32(tmp2[2 * i], tmp2[2 * i + 1]);
153 tmp1[i + 8] = _mm_unpackhi_epi32(tmp2[2 * i], tmp2[2 * i + 1]);
160 for (
int i = 0; i < 8; ++i)
162 matrix[bit_reverse[i]] =
reinterpret_cast<simd_t
>(_mm_unpacklo_epi64(tmp1[2 * i], tmp1[2 * i + 1]));
163 matrix[bit_reverse[i + 8]] =
reinterpret_cast<simd_t
>(_mm_unpackhi_epi64(tmp1[2 * i], tmp1[2 * i + 1]));
167template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
168constexpr target_simd_t upcast_signed_sse4(source_simd_t
const & src)
170 if constexpr (simd_traits<source_simd_t>::length == 16)
172 if constexpr (simd_traits<target_simd_t>::length == 8)
173 return reinterpret_cast<target_simd_t
>(_mm_cvtepi8_epi16(
reinterpret_cast<__m128i
const &
>(src)));
174 if constexpr (simd_traits<target_simd_t>::length == 4)
175 return reinterpret_cast<target_simd_t
>(_mm_cvtepi8_epi32(
reinterpret_cast<__m128i
const &
>(src)));
176 if constexpr (simd_traits<target_simd_t>::length == 2)
177 return reinterpret_cast<target_simd_t
>(_mm_cvtepi8_epi64(
reinterpret_cast<__m128i
const &
>(src)));
179 else if constexpr (simd_traits<source_simd_t>::length == 8)
181 if constexpr (simd_traits<target_simd_t>::length == 4)
182 return reinterpret_cast<target_simd_t
>(_mm_cvtepi16_epi32(
reinterpret_cast<__m128i
const &
>(src)));
183 if constexpr (simd_traits<target_simd_t>::length == 2)
184 return reinterpret_cast<target_simd_t
>(_mm_cvtepi16_epi64(
reinterpret_cast<__m128i
const &
>(src)));
188 static_assert(simd_traits<source_simd_t>::length == 4,
"Expected 32 bit scalar type.");
189 return reinterpret_cast<target_simd_t
>(_mm_cvtepi32_epi64(
reinterpret_cast<__m128i
const &
>(src)));
193template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
194constexpr target_simd_t upcast_unsigned_sse4(source_simd_t
const & src)
196 if constexpr (simd_traits<source_simd_t>::length == 16)
198 if constexpr (simd_traits<target_simd_t>::length == 8)
199 return reinterpret_cast<target_simd_t
>(_mm_cvtepu8_epi16(
reinterpret_cast<__m128i
const &
>(src)));
200 if constexpr (simd_traits<target_simd_t>::length == 4)
201 return reinterpret_cast<target_simd_t
>(_mm_cvtepu8_epi32(
reinterpret_cast<__m128i
const &
>(src)));
202 if constexpr (simd_traits<target_simd_t>::length == 2)
203 return reinterpret_cast<target_simd_t
>(_mm_cvtepu8_epi64(
reinterpret_cast<__m128i
const &
>(src)));
205 else if constexpr (simd_traits<source_simd_t>::length == 8)
207 if constexpr (simd_traits<target_simd_t>::length == 4)
208 return reinterpret_cast<target_simd_t
>(_mm_cvtepu16_epi32(
reinterpret_cast<__m128i
const &
>(src)));
209 if constexpr (simd_traits<target_simd_t>::length == 2)
210 return reinterpret_cast<target_simd_t
>(_mm_cvtepu16_epi64(
reinterpret_cast<__m128i
const &
>(src)));
214 static_assert(simd_traits<source_simd_t>::length == 4,
"Expected 32 bit scalar type.");
215 return reinterpret_cast<target_simd_t
>(_mm_cvtepu32_epi64(
reinterpret_cast<__m128i
const &
>(src)));
219template <u
int8_t index, simd::simd_concept simd_t>
220constexpr simd_t extract_half_sse4(simd_t
const & src)
222 return reinterpret_cast<simd_t
>(_mm_srli_si128(
reinterpret_cast<__m128i
const &
>(src), (index) << 3));
225template <u
int8_t index, simd::simd_concept simd_t>
226constexpr simd_t extract_quarter_sse4(simd_t
const & src)
228 return reinterpret_cast<simd_t
>(_mm_srli_si128(
reinterpret_cast<__m128i
const &
>(src), index << 2));
231template <u
int8_t index, simd::simd_concept simd_t>
232constexpr simd_t extract_eighth_sse4(simd_t
const & src)
234 return reinterpret_cast<simd_t
>(_mm_srli_si128(
reinterpret_cast<__m128i
const &
>(src), index << 1));
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
Provides intrinsics include for builtin simd.
Provides seqan3::simd::simd_traits.
Provides seqan3::simd::simd_concept.