23 namespace seqan3::detail
28 template <simd::simd_concept simd_t>
29 constexpr simd_t load_sse4(
void const * mem_addr);
34 template <simd::simd_concept simd_t>
35 inline void transpose_matrix_sse4(
std::array<simd_t, simd_traits<simd_t>::length> & matrix);
40 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
41 constexpr target_simd_t upcast_signed_sse4(source_simd_t
const & src);
46 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
47 constexpr target_simd_t upcast_unsigned_sse4(source_simd_t
const & src);
52 template <u
int8_t index, simd::simd_concept simd_t>
53 constexpr simd_t extract_halve_sse4(simd_t
const & src);
58 template <u
int8_t index, simd::simd_concept simd_t>
59 constexpr simd_t extract_quarter_sse4(simd_t
const & src);
64 template <u
int8_t index, simd::simd_concept simd_t>
65 constexpr simd_t extract_eighth_sse4(simd_t
const & src);
75 namespace seqan3::detail
78 template <simd::simd_concept simd_t>
79 constexpr simd_t load_sse4(
void const * mem_addr)
81 return reinterpret_cast<simd_t>(_mm_loadu_si128(reinterpret_cast<__m128i const *>(mem_addr)));
84 template <simd::simd_concept simd_t>
85 inline void transpose_matrix_sse4(
std::array<simd_t, simd_traits<simd_t>::length> & matrix)
87 static_assert(simd_traits<simd_t>::length == simd_traits<simd_t>::max_length,
"Expects byte scalar type.");
88 static_assert(is_native_builtin_simd_v<simd_t>,
"The passed simd vector is not a native SSE4 simd vector type.");
89 static_assert(is_builtin_simd_v<simd_t>,
"The passed simd vector is not a builtin vector type.");
93 constexpr
std::array<char, 16> bit_reverse{0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15};
103 for (
int i = 0; i < 8; ++i)
105 tmp1[i] = _mm_unpacklo_epi8(reinterpret_cast<__m128i &>(matrix[2*i]),
106 reinterpret_cast<__m128i &>(matrix[2*i+1]));
107 tmp1[i+8] = _mm_unpackhi_epi8(reinterpret_cast<__m128i &>(matrix[2*i]),
108 reinterpret_cast<__m128i &>(matrix[2*i+1]));
118 for (
int i = 0; i < 8; ++i)
120 tmp2[i] = _mm_unpacklo_epi16(tmp1[2*i], tmp1[2*i+1]);
121 tmp2[i+8] = _mm_unpackhi_epi16(tmp1[2*i], tmp1[2*i+1]);
135 for (
int i = 0; i < 8; ++i)
137 tmp1[i] = _mm_unpacklo_epi32(tmp2[2*i], tmp2[2*i+1]);
138 tmp1[i+8] = _mm_unpackhi_epi32(tmp2[2*i], tmp2[2*i+1]);
145 for (
int i = 0; i < 8; ++i)
147 matrix[bit_reverse[i]] = reinterpret_cast<simd_t>(_mm_unpacklo_epi64(tmp1[2*i], tmp1[2*i+1]));
148 matrix[bit_reverse[i+8]] = reinterpret_cast<simd_t>(_mm_unpackhi_epi64(tmp1[2*i], tmp1[2*i+1]));
152 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
153 constexpr target_simd_t upcast_signed_sse4(source_simd_t
const & src)
155 if constexpr (simd_traits<source_simd_t>::length == 16)
157 if constexpr (simd_traits<target_simd_t>::length == 8)
158 return reinterpret_cast<target_simd_t>(_mm_cvtepi8_epi16(reinterpret_cast<__m128i const &>(src)));
159 if constexpr (simd_traits<target_simd_t>::length == 4)
160 return reinterpret_cast<target_simd_t>(_mm_cvtepi8_epi32(reinterpret_cast<__m128i const &>(src)));
161 if constexpr (simd_traits<target_simd_t>::length == 2)
162 return reinterpret_cast<target_simd_t>(_mm_cvtepi8_epi64(reinterpret_cast<__m128i const &>(src)));
164 else if constexpr (simd_traits<source_simd_t>::length == 8)
166 if constexpr (simd_traits<target_simd_t>::length == 4)
167 return reinterpret_cast<target_simd_t>(_mm_cvtepi16_epi32(reinterpret_cast<__m128i const &>(src)));
168 if constexpr (simd_traits<target_simd_t>::length == 2)
169 return reinterpret_cast<target_simd_t>(_mm_cvtepi16_epi64(reinterpret_cast<__m128i const &>(src)));
173 static_assert(simd_traits<source_simd_t>::length == 4,
"Expected 32 bit scalar type.");
174 return reinterpret_cast<target_simd_t>(_mm_cvtepi32_epi64(reinterpret_cast<__m128i const &>(src)));
178 template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
179 constexpr target_simd_t upcast_unsigned_sse4(source_simd_t
const & src)
181 if constexpr (simd_traits<source_simd_t>::length == 16)
183 if constexpr (simd_traits<target_simd_t>::length == 8)
184 return reinterpret_cast<target_simd_t>(_mm_cvtepu8_epi16(reinterpret_cast<__m128i const &>(src)));
185 if constexpr (simd_traits<target_simd_t>::length == 4)
186 return reinterpret_cast<target_simd_t>(_mm_cvtepu8_epi32(reinterpret_cast<__m128i const &>(src)));
187 if constexpr (simd_traits<target_simd_t>::length == 2)
188 return reinterpret_cast<target_simd_t>(_mm_cvtepu8_epi64(reinterpret_cast<__m128i const &>(src)));
190 else if constexpr (simd_traits<source_simd_t>::length == 8)
192 if constexpr (simd_traits<target_simd_t>::length == 4)
193 return reinterpret_cast<target_simd_t>(_mm_cvtepu16_epi32(reinterpret_cast<__m128i const &>(src)));
194 if constexpr (simd_traits<target_simd_t>::length == 2)
195 return reinterpret_cast<target_simd_t>(_mm_cvtepu16_epi64(reinterpret_cast<__m128i const &>(src)));
199 static_assert(simd_traits<source_simd_t>::length == 4,
"Expected 32 bit scalar type.");
200 return reinterpret_cast<target_simd_t>(_mm_cvtepu32_epi64(reinterpret_cast<__m128i const &>(src)));
204 template <u
int8_t index, simd::simd_concept simd_t>
205 constexpr simd_t extract_halve_sse4(simd_t
const & src)
207 return reinterpret_cast<simd_t>(_mm_srli_si128(reinterpret_cast<__m128i const &>(src), (index) << 3));
210 template <u
int8_t index, simd::simd_concept simd_t>
211 constexpr simd_t extract_quarter_sse4(simd_t
const & src)
213 return reinterpret_cast<simd_t>(_mm_srli_si128(reinterpret_cast<__m128i const &>(src), index << 2));
216 template <u
int8_t index, simd::simd_concept simd_t>
217 constexpr simd_t extract_eighth_sse4(simd_t
const & src)
219 return reinterpret_cast<simd_t>(_mm_srli_si128(reinterpret_cast<__m128i const &>(src), index << 1));