28template <simd::simd_concept simd_t>
34template <simd::simd_concept simd_t>
35constexpr void store_sse4(
void * mem_addr, simd_t
const & simd_vec);
40template <simd::simd_concept simd_t>
46template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
52template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
58template <u
int8_t index, simd::simd_concept simd_t>
64template <u
int8_t index, simd::simd_concept simd_t>
70template <u
int8_t index, simd::simd_concept simd_t>
84template <simd::simd_concept simd_t>
85constexpr simd_t
load_sse4(
void const * mem_addr)
87 return reinterpret_cast<simd_t
>(_mm_loadu_si128(
reinterpret_cast<__m128i
const *
>(mem_addr)));
90template <simd::simd_concept simd_t>
91constexpr void store_sse4(
void * mem_addr, simd_t
const & simd_vec)
93 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(mem_addr),
reinterpret_cast<__m128i
const &
>(simd_vec));
96template <simd::simd_concept simd_t>
99 static_assert(simd_traits<simd_t>::length == simd_traits<simd_t>::max_length,
"Expects byte scalar type.");
100 static_assert(is_native_builtin_simd_v<simd_t>,
"The passed simd vector is not a native SSE4 simd vector type.");
101 static_assert(is_builtin_simd_v<simd_t>,
"The passed simd vector is not a builtin vector type.");
105 constexpr std::array<char, 16> bit_reverse{0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
115 for (
int i = 0; i < 8; ++i)
117 tmp1[i] = _mm_unpacklo_epi8(
reinterpret_cast<__m128i &
>(matrix[2 * i]),
118 reinterpret_cast<__m128i &
>(matrix[2 * i + 1]));
119 tmp1[i + 8] = _mm_unpackhi_epi8(
reinterpret_cast<__m128i &
>(matrix[2 * i]),
120 reinterpret_cast<__m128i &
>(matrix[2 * i + 1]));
130 for (
int i = 0; i < 8; ++i)
132 tmp2[i] = _mm_unpacklo_epi16(tmp1[2 * i], tmp1[2 * i + 1]);
133 tmp2[i + 8] = _mm_unpackhi_epi16(tmp1[2 * i], tmp1[2 * i + 1]);
147 for (
int i = 0; i < 8; ++i)
149 tmp1[i] = _mm_unpacklo_epi32(tmp2[2 * i], tmp2[2 * i + 1]);
150 tmp1[i + 8] = _mm_unpackhi_epi32(tmp2[2 * i], tmp2[2 * i + 1]);
157 for (
int i = 0; i < 8; ++i)
159 matrix[bit_reverse[i]] =
reinterpret_cast<simd_t
>(_mm_unpacklo_epi64(tmp1[2 * i], tmp1[2 * i + 1]));
160 matrix[bit_reverse[i + 8]] =
reinterpret_cast<simd_t
>(_mm_unpackhi_epi64(tmp1[2 * i], tmp1[2 * i + 1]));
164template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
167 if constexpr (simd_traits<source_simd_t>::length == 16)
169 if constexpr (simd_traits<target_simd_t>::length == 8)
170 return reinterpret_cast<target_simd_t
>(_mm_cvtepi8_epi16(
reinterpret_cast<__m128i
const &
>(src)));
171 if constexpr (simd_traits<target_simd_t>::length == 4)
172 return reinterpret_cast<target_simd_t
>(_mm_cvtepi8_epi32(
reinterpret_cast<__m128i
const &
>(src)));
173 if constexpr (simd_traits<target_simd_t>::length == 2)
174 return reinterpret_cast<target_simd_t
>(_mm_cvtepi8_epi64(
reinterpret_cast<__m128i
const &
>(src)));
176 else if constexpr (simd_traits<source_simd_t>::length == 8)
178 if constexpr (simd_traits<target_simd_t>::length == 4)
179 return reinterpret_cast<target_simd_t
>(_mm_cvtepi16_epi32(
reinterpret_cast<__m128i
const &
>(src)));
180 if constexpr (simd_traits<target_simd_t>::length == 2)
181 return reinterpret_cast<target_simd_t
>(_mm_cvtepi16_epi64(
reinterpret_cast<__m128i
const &
>(src)));
185 static_assert(simd_traits<source_simd_t>::length == 4,
"Expected 32 bit scalar type.");
186 return reinterpret_cast<target_simd_t
>(_mm_cvtepi32_epi64(
reinterpret_cast<__m128i
const &
>(src)));
190template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
193 if constexpr (simd_traits<source_simd_t>::length == 16)
195 if constexpr (simd_traits<target_simd_t>::length == 8)
196 return reinterpret_cast<target_simd_t
>(_mm_cvtepu8_epi16(
reinterpret_cast<__m128i
const &
>(src)));
197 if constexpr (simd_traits<target_simd_t>::length == 4)
198 return reinterpret_cast<target_simd_t
>(_mm_cvtepu8_epi32(
reinterpret_cast<__m128i
const &
>(src)));
199 if constexpr (simd_traits<target_simd_t>::length == 2)
200 return reinterpret_cast<target_simd_t
>(_mm_cvtepu8_epi64(
reinterpret_cast<__m128i
const &
>(src)));
202 else if constexpr (simd_traits<source_simd_t>::length == 8)
204 if constexpr (simd_traits<target_simd_t>::length == 4)
205 return reinterpret_cast<target_simd_t
>(_mm_cvtepu16_epi32(
reinterpret_cast<__m128i
const &
>(src)));
206 if constexpr (simd_traits<target_simd_t>::length == 2)
207 return reinterpret_cast<target_simd_t
>(_mm_cvtepu16_epi64(
reinterpret_cast<__m128i
const &
>(src)));
211 static_assert(simd_traits<source_simd_t>::length == 4,
"Expected 32 bit scalar type.");
212 return reinterpret_cast<target_simd_t
>(_mm_cvtepu32_epi64(
reinterpret_cast<__m128i
const &
>(src)));
216template <u
int8_t index, simd::simd_concept simd_t>
219 return reinterpret_cast<simd_t
>(_mm_srli_si128(
reinterpret_cast<__m128i
const &
>(src), (index) << 3));
222template <u
int8_t index, simd::simd_concept simd_t>
225 return reinterpret_cast<simd_t
>(_mm_srli_si128(
reinterpret_cast<__m128i
const &
>(src), index << 2));
228template <u
int8_t index, simd::simd_concept simd_t>
231 return reinterpret_cast<simd_t
>(_mm_srli_si128(
reinterpret_cast<__m128i
const &
>(src), index << 1));
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
Provides intrinsics include for builtin simd.
Defines the requirements of a matrix (e.g. score matrices, trace matrices).
Definition matrix_concept.hpp:58
The internal SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
void transpose_matrix_sse4(std::array< simd_t, simd_traits< simd_t >::length > &matrix)
Transposes the given simd vector matrix.
constexpr simd_t extract_eighth_sse4(simd_t const &src)
Extracts one eighth of the given simd vector and stores it in the lower eighth of the target vector.
constexpr simd_t extract_half_sse4(simd_t const &src)
Extracts one half of the given simd vector and stores the result in the lower half of the target vect...
constexpr target_simd_t upcast_signed_sse4(source_simd_t const &src)
Upcasts the given vector into the target vector using signed extension of packed values.
constexpr void store_sse4(void *mem_addr, simd_t const &simd_vec)
Store simd_t size bits of integral data into memory.
constexpr target_simd_t upcast_unsigned_sse4(source_simd_t const &src)
Upcasts the given vector into the target vector using unsigned extension of packed values.
constexpr simd_t extract_quarter_sse4(simd_t const &src)
Extracts one quarter of the given simd vector and stores it in the lower quarter of the target vector...
constexpr simd_t load_sse4(void const *mem_addr)
Load simd_t size bits of integral data from memory.
Provides seqan3::simd::simd_traits.
seqan3::simd::simd_traits is the trait class that provides uniform interface to the properties of sim...
Definition simd_traits.hpp:38
Provides seqan3::simd::simd_concept.