SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
algorithm.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <array>
13#include <cassert>
14#include <concepts>
15#include <utility>
16
23
24namespace seqan3::detail
25{
26
29template <simd::simd_concept simd_t, size_t... I>
30constexpr simd_t fill_impl(typename simd_traits<simd_t>::scalar_type const scalar, std::index_sequence<I...>) noexcept
31{
32 return simd_t{((void)I, scalar)...};
33}
34
37template <simd::simd_concept simd_t, typename scalar_t, scalar_t... I>
39{
40 return simd_t{static_cast<scalar_t>(offset + I)...};
41}
42
57template <size_t divisor, simd_concept simd_t>
58constexpr simd_t extract_impl(simd_t const & src, uint8_t const mask)
59{
60 simd_t dst{};
61 constexpr size_t chunk = simd_traits<simd_t>::length / divisor;
62 size_t offset = chunk * mask;
63 for (size_t i = 0; i < chunk; ++i)
64 dst[i] = src[i + offset];
65
66 return dst;
67}
68
77template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
78constexpr target_simd_t upcast_signed(source_simd_t const & src)
79{
81 "Target vector has different byte size.");
82
83 if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4
84 return upcast_signed_sse4<target_simd_t>(src);
85 else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2
86 return upcast_signed_avx2<target_simd_t>(src);
87 else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512
88 return upcast_signed_avx512<target_simd_t>(src);
89 else
90 static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");
91}
92
101template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
102constexpr target_simd_t upcast_unsigned(source_simd_t const & src)
103{
105 "Target vector has different byte size.");
106
107 if constexpr (simd_traits<source_simd_t>::max_length == 16) // SSE4
108 return upcast_unsigned_sse4<target_simd_t>(src);
109 else if constexpr (simd_traits<source_simd_t>::max_length == 32) // AVX2
110 return upcast_unsigned_avx2<target_simd_t>(src);
111 else if constexpr (simd_traits<source_simd_t>::max_length == 64) // AVX512
112 return upcast_unsigned_avx512<target_simd_t>(src);
113 else
114 static_assert(simd_traits<source_simd_t>::max_length <= 32, "simd type is not supported.");
115}
116
139template <uint8_t index, simd::simd_concept simd_t>
140constexpr simd_t extract_half(simd_t const & src)
141{
142 static_assert(index < 2, "The index must be in the range of [0, 1]");
143
144 return detail::extract_impl<2>(src, index);
145}
146
148template <uint8_t index, simd::simd_concept simd_t>
149 requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>
150constexpr simd_t extract_half(simd_t const & src)
151{
152 static_assert(index < 2, "The index must be in the range of [0, 1]");
153
154 if constexpr (simd_traits<simd_t>::length < 2) // In case there are less elements available return unchanged value.
155 return src;
156 else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
157 return detail::extract_half_sse4<index>(src);
158 else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
159 return detail::extract_half_avx2<index>(src);
160 else if constexpr (simd_traits<simd_t>::max_length == 64) // AVX512
161 return detail::extract_half_avx512<index>(src);
162 else // Anything else
163 return detail::extract_impl<2>(src, index);
164}
166
189template <uint8_t index, simd::simd_concept simd_t>
190constexpr simd_t extract_quarter(simd_t const & src)
191{
192 static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");
193
194 return detail::extract_impl<4>(src, index);
195}
196
198template <uint8_t index, simd::simd_concept simd_t>
199 requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>
200constexpr simd_t extract_quarter(simd_t const & src)
201{
202 static_assert(index < 4, "The index must be in the range of [0, 1, 2, 3]");
203
204 if constexpr (simd_traits<simd_t>::length < 4) // In case there are less elements available return unchanged value.
205 return src;
206 else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
207 return detail::extract_quarter_sse4<index>(src);
208 else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
209 return detail::extract_quarter_avx2<index>(src);
210#if defined(__AVX512DQ__)
211 else if constexpr (simd_traits<simd_t>::max_length == 64) // AVX512
212 return detail::extract_quarter_avx512<index>(src);
213#endif // defined(__AVX512DQ__)
214 else // Anything else
215 return detail::extract_impl<4>(src, index);
216}
218
241template <uint8_t index, simd::simd_concept simd_t>
242constexpr simd_t extract_eighth(simd_t const & src)
243{
244 return detail::extract_impl<8>(src, index);
245}
246
248template <uint8_t index, simd::simd_concept simd_t>
249 requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>
250constexpr simd_t extract_eighth(simd_t const & src)
251{
252 static_assert(index < 8, "The index must be in the range of [0, 1, 2, 3, 4, 5, 6, 7]");
253
254 if constexpr (simd_traits<simd_t>::length < 8) // In case there are less elements available return unchanged value.
255 return src;
256 else if constexpr (simd_traits<simd_t>::max_length == 16) // SSE4
257 return detail::extract_eighth_sse4<index>(src);
258 else if constexpr (simd_traits<simd_t>::max_length == 32) // AVX2
259 return detail::extract_eighth_avx2<index>(src);
260#if defined(__AVX512DQ__)
261 else if constexpr (simd_traits<simd_t>::max_length == 64) // AVX512
262 return detail::extract_eighth_avx512<index>(src);
263#endif // defined(__AVX512DQ__)
264 else // Anything else
265 return detail::extract_impl<8>(src, index);
266}
268
270template <simd::simd_concept simd_t>
271constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
272{
274
275 for (size_t i = 0; i < matrix.size(); ++i)
276 for (size_t j = 0; j < matrix.size(); ++j)
277 tmp[j][i] = matrix[i][j];
278
279 std::swap(tmp, matrix);
280}
282} // namespace seqan3::detail
283
284namespace seqan3
285{
286
287inline namespace simd
288{
289
299template <simd::simd_concept simd_t>
300constexpr simd_t fill(typename simd_traits<simd_t>::scalar_type const scalar) noexcept
301{
302 constexpr size_t length = simd_traits<simd_t>::length;
303 return detail::fill_impl<simd_t>(scalar, std::make_index_sequence<length>{});
304}
305
315template <simd::simd_concept simd_t>
316constexpr simd_t iota(typename simd_traits<simd_t>::scalar_type const offset)
317{
318 constexpr size_t length = simd_traits<simd_t>::length;
319 using scalar_type = typename simd_traits<simd_t>::scalar_type;
320 return detail::iota_impl<simd_t>(offset, std::make_integer_sequence<scalar_type, length>{});
321}
322
332template <simd::simd_concept simd_t>
333constexpr simd_t load(void const * mem_addr)
334{
335 assert(mem_addr != nullptr);
336 simd_t tmp{};
337
338 for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)
339 tmp[i] = *(static_cast<typename simd_traits<simd_t>::scalar_type const *>(mem_addr) + i);
340
341 return tmp;
342}
343
345template <simd::simd_concept simd_t>
346 requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>
347constexpr simd_t load(void const * mem_addr)
348{
349 assert(mem_addr != nullptr);
350
351 if constexpr (simd_traits<simd_t>::max_length == 16)
352 return detail::load_sse4<simd_t>(mem_addr);
353 else if constexpr (simd_traits<simd_t>::max_length == 32)
354 return detail::load_avx2<simd_t>(mem_addr);
355 else if constexpr (simd_traits<simd_t>::max_length == 64)
356 return detail::load_avx512<simd_t>(mem_addr);
357 else
359 "Unsupported simd type.");
360}
362
373template <simd::simd_concept simd_t>
374constexpr void store(void * mem_addr, simd_t const & simd_vec)
375{
376 assert(mem_addr != nullptr);
377 using scalar_t = typename simd_traits<simd_t>::scalar_type;
378
379 for (size_t i = 0; i < simd_traits<simd_t>::length; ++i)
380 *(static_cast<scalar_t *>(mem_addr) + i) = simd_vec[i];
381}
382
384template <simd::simd_concept simd_t>
385 requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>
386constexpr void store(void * mem_addr, simd_t const & simd_vec)
387{
388 assert(mem_addr != nullptr);
389
390 if constexpr (simd_traits<simd_t>::max_length == 16)
391 detail::store_sse4<simd_t>(mem_addr, simd_vec);
392 else if constexpr (simd_traits<simd_t>::max_length == 32)
393 detail::store_avx2<simd_t>(mem_addr, simd_vec);
394 else if constexpr (simd_traits<simd_t>::max_length == 64)
395 detail::store_avx512<simd_t>(mem_addr, simd_vec);
396 else
398 "Unsupported simd type.");
399}
401
419template <simd::simd_concept simd_t>
420constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
421{
422 detail::transpose(matrix);
423}
424
426// Implementation for seqan builtin simd.
427template <simd::simd_concept simd_t>
428 requires detail::is_builtin_simd_v<simd_t> && detail::is_native_builtin_simd_v<simd_t>
430constexpr void transpose(std::array<simd_t, simd_traits<simd_t>::length> & matrix)
431{
432 if constexpr (simd_traits<simd_t>::length == 16) // SSE4 implementation
434 else if constexpr (simd_traits<simd_t>::length == 32) // AVX2 implementation
436#if defined(__AVX512BW__) // Requires byte-word extension of AVX512 instruction set.
437 else if constexpr (simd_traits<simd_t>::length == 64) // AVX512 implementation
439#endif // defined(__AVX512BW__)
440 else
441 detail::transpose(matrix);
442}
444
453template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
454constexpr target_simd_t upcast(source_simd_t const & src)
455{
456 static_assert(
458 "The length of the target simd type must be greater or equal than the length of the source simd type.");
459
460 target_simd_t tmp{};
461 for (unsigned i = 0; i < simd_traits<target_simd_t>::length; ++i)
462 tmp[i] = static_cast<typename simd_traits<target_simd_t>::scalar_type>(src[i]);
463
464 return tmp;
465}
466
468template <simd::simd_concept target_simd_t, simd::simd_concept source_simd_t>
469 requires detail::is_builtin_simd_v<target_simd_t> && detail::is_builtin_simd_v<source_simd_t>
470 && detail::is_native_builtin_simd_v<source_simd_t>
471constexpr target_simd_t upcast(source_simd_t const & src)
472{
473 static_assert(
475 "The length of the target simd type must be greater or equal than the length of the source simd type.");
476
478 {
480 "Target vector has a different byte size.");
481 return reinterpret_cast<target_simd_t>(src); // Same packing so we do not cast.
482 }
483 else if constexpr (std::signed_integral<typename simd_traits<source_simd_t>::scalar_type>)
484 {
485 return detail::upcast_signed<target_simd_t>(src);
486 }
487 else
488 {
489 static_assert(std::unsigned_integral<typename simd_traits<source_simd_t>::scalar_type>,
490 "Expected unsigned scalar type.");
491 return detail::upcast_unsigned<target_simd_t>(src);
492 }
493}
495
496} // namespace simd
497
498} // namespace seqan3
Provides seqan3::detail::builtin_simd, seqan3::detail::is_builtin_simd and seqan3::simd::simd_traits<...
Implementation of a masked alphabet to be used for tuple composites.
Definition mask.hpp:35
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
constexpr void transpose(std::array< simd_t, simd_traits< simd_t >::length > &matrix)
Transposes the given simd vector matrix.
Definition algorithm.hpp:420
constexpr simd_t extract_impl(simd_t const &src, uint8_t const mask)
Helper function to extract a part of the given simd vector.
Definition algorithm.hpp:58
constexpr target_simd_t upcast(source_simd_t const &src)
Upcasts the given vector into the target vector using sign extension of packed values.
Definition algorithm.hpp:454
constexpr simd_t iota(typename simd_traits< simd_t >::scalar_type const offset)
Fills a seqan3::simd::simd_type vector with the scalar values offset, offset+1, offset+2,...
Definition algorithm.hpp:316
constexpr simd_t extract_eighth(simd_t const &src)
Extracts one eighth of the given simd vector and stores it in the lower eighth of the target vector.
Definition algorithm.hpp:242
constexpr simd_t iota_impl(scalar_t const offset, std::integer_sequence< scalar_t, I... >)
Helper function for seqan3::simd::iota.
Definition algorithm.hpp:38
constexpr simd_t fill_impl(typename simd_traits< simd_t >::scalar_type const scalar, std::index_sequence< I... >) noexcept
Helper function for seqan3::simd::fill.
Definition algorithm.hpp:30
constexpr simd_t extract_quarter(simd_t const &src)
Extracts one quarter of the given simd vector and stores it in the lower quarter of the target vector...
Definition algorithm.hpp:190
constexpr void store(void *mem_addr, simd_t const &simd_vec)
Store simd_t size bits of integral data into memory.
Definition algorithm.hpp:374
constexpr simd_t load(void const *mem_addr)
Load simd_t size bits of integral data from memory.
Definition algorithm.hpp:333
constexpr target_simd_t upcast_unsigned(source_simd_t const &src)
Upcasts the given vector into the target vector using unsigned extension of packed values.
Definition algorithm.hpp:102
constexpr simd_t fill(typename simd_traits< simd_t >::scalar_type const scalar) noexcept
Fills a seqan3::simd::simd_type vector with a scalar value.
Definition algorithm.hpp:300
constexpr simd_t extract_half(simd_t const &src)
Extracts one half of the given simd vector and stores the result in the lower half of the target vect...
Definition algorithm.hpp:140
constexpr target_simd_t upcast_signed(source_simd_t const &src)
Upcasts the given vector into the target vector using signed extension of packed values.
Definition algorithm.hpp:78
The internal SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
void transpose_matrix_avx512(std::array< simd_t, simd_traits< simd_t >::length > &matrix)
Transposes the given simd vector matrix.
void transpose_matrix_sse4(std::array< simd_t, simd_traits< simd_t >::length > &matrix)
Transposes the given simd vector matrix.
void transpose_matrix_avx2(std::array< simd_t, simd_traits< simd_t >::length > &matrix)
Transposes the given simd vector matrix.
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
Provides specific algorithm implementations for AVX2 instruction set.
Provides specific algorithm implementations for AVX512 instruction set.
Provides specific algorithm implementations for SSE4 instruction set.
Provides seqan3::simd::simd_traits.
seqan3::simd::simd_traits is the trait class that provides uniform interface to the properties of sim...
Definition simd_traits.hpp:38
static constexpr auto length
The number of packed values in a simd vector (is not defined if simd_t does not model seqan3::simd::s...
Definition simd_traits.hpp:46
static constexpr auto max_length
The maximum number of packable values in a simd vector, if the underlying type would be [u]int8_t (is...
Definition simd_traits.hpp:51
T swap(T... args)
Provides seqan3::simd::simd_concept.
Hide me