SeqAn3 3.2.0
The Modern C++ library for sequence analysis.
to_simd.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <algorithm>
16#include <iterator>
17#include <ranges>
18
29
30namespace seqan3::detail
31{
32
58template <std::ranges::view urng_t, simd::simd_concept simd_t>
59class view_to_simd : public std::ranges::view_interface<view_to_simd<urng_t, simd_t>>
60{
61private:
62 static_assert(std::ranges::forward_range<urng_t>, "The underlying range must model forward_range.");
63 static_assert(std::ranges::input_range<std::ranges::range_value_t<urng_t>>,
64 "Expects the value type of the underlying range to be an input_range.");
65 static_assert(std::default_initializable<std::ranges::iterator_t<std::ranges::range_value_t<urng_t>>>,
66 "Expects the inner range iterator to be default initializable.");
67 static_assert(std::default_initializable<std::ranges::sentinel_t<std::ranges::range_value_t<urng_t>>>,
68 "Expects the inner range sentinel to be default initializable.");
70 "Expects semi-alphabet as value type of the inner range.");
71
75 using inner_range_type = std::ranges::range_value_t<urng_t>;
77 using scalar_type = typename simd_traits<simd_t>::scalar_type;
79 using max_simd_type = simd_type_t<uint8_t, simd_traits<simd_t>::max_length>;
81
86 static constexpr bool fast_load =
87 std::ranges::contiguous_range<inner_range_type>
88 && std::sized_sentinel_for<std::ranges::iterator_t<inner_range_type>, std::ranges::sentinel_t<inner_range_type>>
89 && sizeof(alphabet_rank_t<std::ranges::range_value_t<inner_range_type>>) == 1;
90
92 static constexpr uint8_t chunk_size = simd_traits<simd_t>::length;
94 static constexpr uint8_t chunks_per_load = simd_traits<simd_t>::max_length / chunk_size;
96 static constexpr uint8_t total_chunks = fast_load ? (chunks_per_load * chunks_per_load) : 1;
98 static constexpr auto alphabet_size = seqan3::alphabet_size<std::ranges::range_value_t<inner_range_type>>;
100
101 // Forward declare class' iterator type. See definition below.
102 struct iterator_type;
103
104public:
108 constexpr view_to_simd()
109 requires std::default_initializable<urng_t>
110 = default;
111 constexpr view_to_simd(view_to_simd const &) = default;
112 constexpr view_to_simd(view_to_simd &&) = default;
113 constexpr view_to_simd & operator=(view_to_simd const &) = default;
114 constexpr view_to_simd & operator=(view_to_simd &&) = default;
115 ~view_to_simd() = default;
116
121 constexpr view_to_simd(urng_t urng, scalar_type const padding_value = alphabet_size) :
122 urng{std::move(urng)},
123 padding_simd_vector{simd::fill<simd_t>(padding_value)},
124 padding_value{padding_value}
125 {
126 // Check if the size is less or equal the simd size.
127 if (std::ranges::distance(urng) > chunk_size)
128 throw std::invalid_argument{"The size of the underlying range must be less than or equal to the size of "
129 "the given simd type!"};
130 }
131
133 template <typename other_urng_t>
134 requires (!std::same_as<std::remove_cvref_t<other_urng_t>, view_to_simd>)
135 && (!std::same_as<other_urng_t, urng_t>) && std::ranges::viewable_range<other_urng_t>
136 constexpr view_to_simd(other_urng_t && urng, scalar_type const padding_value = alphabet_size) :
137 view_to_simd{views::type_reduce(std::forward<other_urng_t>(urng)), padding_value}
138 {}
140
145 constexpr iterator_type begin() noexcept
146 {
147 return {*this};
148 }
149
151 constexpr void begin() const noexcept = delete;
152
154 constexpr std::default_sentinel_t end() noexcept
155 {
156 return std::default_sentinel;
157 }
158
160 constexpr void end() const noexcept = delete;
162
164 constexpr bool empty() const noexcept
165 requires std::ranges::forward_range<inner_range_type>
166 {
167 return std::ranges::all_of(urng,
168 [](auto & rng)
169 {
170 return std::ranges::empty(rng);
171 });
172 }
173
180 constexpr size_t size() const noexcept
181 requires std::ranges::sized_range<inner_range_type>
182 {
183 auto it = std::ranges::max_element(urng,
184 [](auto & lhs, auto & rhs)
185 {
186 return std::ranges::size(lhs) < std::ranges::size(rhs);
187 });
188
189 return (it != std::ranges::end(urng)) ? (std::ranges::size(*it) + chunk_size - 1) / chunk_size : 0;
190 }
191
192private:
193 urng_t urng{};
194 std::array<chunk_type, total_chunks> cached_simd_chunks{};
195 simd_t padding_simd_vector{};
196 scalar_type padding_value{};
197};
198
206template <std::ranges::view urng_t, simd::simd_concept simd_t>
207class view_to_simd<urng_t, simd_t>::iterator_type
208{
209public:
214 using value_type = reference;
215 using pointer = void;
216 using difference_type = ptrdiff_t;
217 using iterator_category = std::input_iterator_tag;
218 using iterator_concept = iterator_category;
220
224 constexpr iterator_type() = default;
225 constexpr iterator_type(iterator_type const &) = default;
226 constexpr iterator_type(iterator_type &&) = default;
227 constexpr iterator_type & operator=(iterator_type const &) = default;
228 constexpr iterator_type & operator=(iterator_type &&) = default;
229 ~iterator_type() = default;
230
239 constexpr iterator_type(view_to_simd & this_view) : this_view{&this_view}, current_chunk_pos{0}
240 {
241 // Initialise the iterator of the sub ranges.
242 size_t seq_id = 0;
243 for (auto it = std::ranges::begin(this_view.urng); it != std::ranges::end(this_view.urng); ++it, ++seq_id)
244 {
245 cached_iter[seq_id] = std::ranges::begin(*it);
246 cached_sentinel[seq_id] = std::ranges::end(*it);
247 }
248
249 // The batch is empty and by default the constructed iterator is pointing to the end.
250 if (seq_id == 0)
251 return;
252
253 // The batch is not empty but might not be full either.
254 // If a slot is supposed to be empty, it will be initialised with the iterator of the first sequence set to the
255 // end emulating an empty sequence.
256 auto sentinel_it = std::ranges::next(cached_iter[0], cached_sentinel[0]);
257 for (; seq_id < chunk_size; ++seq_id)
258 {
259 cached_iter[seq_id] = sentinel_it;
260 cached_sentinel[seq_id] = cached_sentinel[0];
261 }
262
263 // Check if this is the final chunk already.
264 final_chunk = all_iterators_reached_sentinel();
265
266 // Fetch the next available input characters from the sequences and transform them into simd vectors.
267 underflow();
268 }
270
275 constexpr reference operator*() const noexcept
276 {
277 assert(this_view != nullptr);
278 return std::span{this_view->cached_simd_chunks[current_chunk_pos].begin(),
279 (current_chunk_pos == final_chunk_pos) ? final_chunk_size : chunk_size};
280 }
282
287 constexpr iterator_type & operator++(/*pre-increment*/)
288 {
289 if constexpr (fast_load)
290 { // Check if cached chunks have been already consumed and we need to fetch the next chunks.
291 if (current_chunk_pos == final_chunk_pos)
292 {
293 underflow();
294 current_chunk_pos = 0;
295 }
296 else
297 {
298 ++current_chunk_pos;
299 }
300 }
301 else // In case fast load is not available only one chunk is filled at a time.
302 {
303 underflow();
304 }
305
306 return *this;
307 }
308
310 constexpr value_type operator++(int /*post-increment*/)
311 {
312 value_type tmp = this->operator*();
313 ++(*this);
314 return tmp;
315 }
317
322 constexpr bool operator==(std::default_sentinel_t const &) const noexcept
323 {
324 return at_end;
325 }
326
328 friend constexpr bool operator==(std::default_sentinel_t const &, iterator_type const & rhs) noexcept
329 {
330 return rhs.at_end;
331 }
332
334 constexpr bool operator!=(std::default_sentinel_t const &) const noexcept
335 {
336 return !at_end;
337 }
338
340 friend constexpr bool operator!=(std::default_sentinel_t const &, iterator_type const & rhs) noexcept
341 {
342 return !rhs.at_end;
343 }
345
346private:
357 auto unpack(max_simd_type const & row) const
358 {
359 if constexpr (chunk_size == simd_traits<max_simd_type>::length / 2) // upcast into 2 vectors.
360 {
361 return std::array{simd::upcast<simd_t>(extract_half<0>(row)), // 1. half
362 simd::upcast<simd_t>(extract_half<1>(row))}; // 2. half
363 }
364 else if constexpr (chunk_size == simd_traits<max_simd_type>::length / 4) // upcast into 4 vectors.
365 {
366 return std::array{simd::upcast<simd_t>(extract_quarter<0>(row)), // 1. quarter
367 simd::upcast<simd_t>(extract_quarter<1>(row)), // 2. quarter
368 simd::upcast<simd_t>(extract_quarter<2>(row)), // 3. quarter
369 simd::upcast<simd_t>(extract_quarter<3>(row))}; // 4. quarter
370 }
371 else if constexpr (chunk_size == simd_traits<max_simd_type>::length / 8) // upcast into 8 vectors.
372 {
373 return std::array{simd::upcast<simd_t>(extract_eighth<0>(row)), // 1. eighth
374 simd::upcast<simd_t>(extract_eighth<1>(row)), // 2. eighth
375 simd::upcast<simd_t>(extract_eighth<2>(row)), // 3. eighth
376 simd::upcast<simd_t>(extract_eighth<3>(row)), // 4. eighth
377 simd::upcast<simd_t>(extract_eighth<4>(row)), // 5. eighth
378 simd::upcast<simd_t>(extract_eighth<5>(row)), // 6. eighth
379 simd::upcast<simd_t>(extract_eighth<6>(row)), // 7. eighth
380 simd::upcast<simd_t>(extract_eighth<7>(row))}; // 8. eighth
381 }
382 else
383 {
384 return std::array{simd::upcast<simd_t>(row)};
385 }
386 }
387
398 constexpr void split_into_sub_matrices(std::array<max_simd_type, simd_traits<max_simd_type>::length> matrix) const
399 {
400 auto apply_padding = [this](simd_t const vec)
401 {
402 return (vec == simd::fill<simd_t>(static_cast<uint8_t>(~0))) ? this_view->padding_simd_vector : vec;
403 };
404
405 // Iterate over the rows of the matrix
406 for (uint8_t row = 0; row < static_cast<uint8_t>(matrix.size()); ++row)
407 {
408 // split a row into multiple chunks of size `chunk_size`
409 auto chunked_row = unpack(matrix[row]);
410
411 if constexpr (chunked_row.size() == 1)
412 {
413 this_view->cached_simd_chunks[0][row] = apply_padding(std::move(chunked_row[0]));
414 }
415 else // Parse the tuple elements and store them in the cached simd chunks.
416 {
417 static_assert(chunked_row.size() == chunks_per_load, "Expected chunks_per_load many simd vectors.");
418
419 for (uint8_t chunk = 0; chunk < chunks_per_load; ++chunk) // store chunks in respective cached entries.
420 {
421 size_t idx = chunk * chunks_per_load + row / chunk_size;
422 this_view->cached_simd_chunks[idx][row % chunk_size] = apply_padding(std::move(chunked_row[chunk]));
423 }
424 }
425 }
426 }
427
431 constexpr bool all_iterators_reached_sentinel() const noexcept
432 {
433 using std::get;
434
435 return std::ranges::all_of(views::zip(cached_iter, cached_sentinel),
436 [](auto && iterator_sentinel_pair)
437 {
438 return get<0>(iterator_sentinel_pair) == get<1>(iterator_sentinel_pair);
439 });
440 }
441
452 constexpr simd_t convert_single_column() noexcept
453 {
454 simd_t simd_column{};
455 for (size_t idx = 0u; idx < chunk_size; ++idx)
456 {
457 if (cached_iter[idx] == cached_sentinel[idx])
458 {
459 simd_column[idx] = this_view->padding_value;
460 }
461 else
462 {
463 simd_column[idx] = static_cast<scalar_type>(seqan3::to_rank(*cached_iter[idx]));
464 ++cached_iter[idx];
465 }
466 };
467 return simd_column;
468 }
469
480 template <typename array_t>
481 constexpr void update_final_chunk_position(array_t const & iterators_before_update) noexcept
482 {
483 size_t max_distance = 0;
484 for (auto && [it, sent] : views::zip(iterators_before_update, cached_sentinel))
485 max_distance = std::max<size_t>(std::ranges::distance(it, sent), max_distance);
486
487 assert(max_distance > 0);
488 assert(max_distance <= (total_chunks * chunk_size));
489
490 --max_distance;
491 final_chunk_pos = max_distance / chunk_size;
492 // first we should be able to check the chunk position.
493 final_chunk_size = (max_distance % chunk_size) + 1;
494 }
495
497 constexpr void underflow()
498 requires fast_load
499 {
500 at_end = final_chunk;
501 if (at_end) // reached end of stream.
502 return;
503 // For the efficient load we assume at most one byte sized alphabets.
504 // Hence we can load `simd_traits<simd_t>::max_length` length many elements at once.
505 // Depending on the packing of `simd_t` we can prefetch blocks and store them in the `cached_simd_chunks`.
506 // E.g. assume `simd_t` with length 8 on SSE4 with max length 16.
507 // To fill the 16x16 matrix we need four 8x8 matrices.
508 // Thus, for the 8 sequences we need to load two times 16 consecutive bytes to fill the matrix, i.e. two loads
509 // see figure below.
510 //
511 // 0 1 ... 7 | 8 9 ... 15
512 // 0 [a00, a01, ..., a07]|[a08, a09, ..., a15] // first load of seq a reads 16 characters
513 // 1 [b00, b01, ..., b07]|[b08, b09, ..., b15] // first load of seq b reads 16 characters
514 // ... | ...
515 // 7 [g00, g01, ..., g07]|[g08, g09, ..., g15] // first load of seq g reads 16 characters
516 // ----------------------------------------
517 // 8 [a16, a17, ..., a23]|[a24, a25, ..., a31] // second load of seq a reads next 16 characters
518 // 9 [b16, b17, ..., b23]|[b24, b25, ..., b31] // second load of seq b reads next 16 characters
519 // ... | ...
520 // 15 [g16, g17, ..., g23]|[g24, g25, ..., g31] // second load of seq g reads next 16 characters
521 //
522 // This quadratic byte matrix can be transposed efficiently with simd instructions.
523 // If the target simd scalar type is bigger we can apply the same mechanism but have then 16 4x4 matrices
524 // (32 bit) or 256 2x2 matrices (64 bit).
525
526 constexpr int8_t max_size = simd_traits<simd_t>::max_length;
528 decltype(cached_iter) iterators_before_update{cached_iter}; // Keep track of iterators before the update.
529 // Iterate over each sequence.
530 for (uint8_t sequence_pos = 0; sequence_pos < chunk_size; ++sequence_pos)
531 { // Iterate over each block depending on the packing of the target simd vector.
532 for (uint8_t chunk_pos = 0; chunk_pos < chunks_per_load; ++chunk_pos)
533 {
534 uint8_t pos = chunk_pos * chunk_size + sequence_pos; // matrix entry to fill
535 if (cached_sentinel[sequence_pos] - cached_iter[sequence_pos] >= max_size)
536 { // Not in final block, thus load directly from memory.
537 matrix[pos] = simd::load<max_simd_type>(std::addressof(*cached_iter[sequence_pos]));
538 std::advance(cached_iter[sequence_pos], max_size);
539 }
540 else // Loads the final block byte wise in order to not load from uninitialised memory.
541 {
542 matrix[pos] = simd::fill<max_simd_type>(~0);
543 auto & sequence_it = cached_iter[sequence_pos];
544 for (int8_t idx = 0; sequence_it != cached_sentinel[sequence_pos]; ++sequence_it, ++idx)
545 matrix[pos][idx] = seqan3::to_rank(*sequence_it);
546 }
547 }
548 }
549
550 // Handle final chunk which might not end at an offset which is not a multiple of `chunk_size`.
551 final_chunk = all_iterators_reached_sentinel();
552
553 if (final_chunk)
554 update_final_chunk_position(iterators_before_update);
555
556 simd::transpose(matrix);
557 split_into_sub_matrices(std::move(matrix));
558 }
559
561 constexpr void underflow()
562 requires (!fast_load)
563 {
564 at_end = final_chunk;
565 if (at_end) // reached end of stream.
566 return;
567
568 decltype(cached_iter) iterators_before_update{cached_iter}; // Keep track of iterators before the update.
569 for (size_t i = 0; i < chunk_size; ++i)
570 this_view->cached_simd_chunks[0][i] = convert_single_column();
571
572 final_chunk = all_iterators_reached_sentinel();
573
574 if (final_chunk)
575 update_final_chunk_position(iterators_before_update);
576 }
577
581 std::array<std::ranges::sentinel_t<inner_range_type>, chunk_size> cached_sentinel{};
583 view_to_simd * this_view{nullptr};
585 uint8_t final_chunk_size{chunk_size};
587 uint8_t final_chunk_pos{total_chunks - 1};
589 uint8_t current_chunk_pos{0};
591 bool final_chunk{true};
593 bool at_end{true};
594};
595
596// ============================================================================
597// to_simd_fn (adaptor definition)
598// ============================================================================
599
608template <simd::simd_concept simd_t>
609struct to_simd_fn
610{
612 using padding_t = typename simd_traits<simd_t>::scalar_type;
613
617 constexpr auto operator()(padding_t const padding_value) const noexcept
618 {
619 return detail::adaptor_from_functor{*this, padding_value};
620 }
621
623 constexpr auto operator()() const noexcept
624 {
625 return detail::adaptor_from_functor{*this};
626 }
627
633 template <std::ranges::range urng_t>
634 constexpr auto operator()(urng_t && urange, padding_t const padding_value) const noexcept
635 {
636 static_assert(std::ranges::forward_range<urng_t>,
637 "The underlying range in views::to_simd must model std::ranges::forward_range.");
638 static_assert(std::ranges::viewable_range<urng_t>,
639 "The underlying range in views::to_simd must model std::ranges::viewable_range.");
640 static_assert(std::ranges::input_range<std::ranges::range_value_t<urng_t>>,
641 "The value type of the underlying range must model std::ranges::input_range.");
643 "The value type of the inner ranges must model seqan3::semialphabet.");
644
645 return view_to_simd<type_reduce_t<urng_t>, simd_t>{std::forward<urng_t>(urange), padding_value};
646 }
647
652 template <std::ranges::range urng_t>
653 constexpr auto operator()(urng_t && urange) const noexcept
654 {
655 static_assert(std::ranges::forward_range<urng_t>,
656 "The underlying range in views::to_simd must model std::ranges::forward_range.");
657 static_assert(std::ranges::viewable_range<urng_t>,
658 "The underlying range in views::to_simd must model std::ranges::viewable_range.");
659 static_assert(std::ranges::input_range<std::ranges::range_value_t<urng_t>>,
660 "The value type of the underlying range must model std::ranges::input_range.");
662 "The value type of the inner ranges must model seqan3::semialphabet.");
663
664 return view_to_simd<type_reduce_t<urng_t>, simd_t>{std::forward<urng_t>(urange)};
665 }
666
668 template <std::ranges::range urng_t>
669 constexpr friend auto operator|(urng_t && urange, to_simd_fn const & me)
670 {
671 return me(std::forward<urng_t>(urange));
672 }
673};
674
675} // namespace seqan3::detail
676
677namespace seqan3::views
678{
679
781template <simd::simd_concept simd_t>
782inline constexpr auto to_simd = detail::to_simd_fn<simd_t>{};
783
784} // namespace seqan3::views
Provides seqan3::detail::adaptor_from_functor.
T addressof(T... args)
T advance(T... args)
Provides algorithms to modify seqan3::simd::simd_type.
T all_of(T... args)
Core alphabet concept and free function/type trait wrappers.
T begin(T... args)
Provides various transformation traits used by the range module.
T end(T... args)
T forward(T... args)
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:849
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
auto operator|(validator1_type &&vali1, validator2_type &&vali2)
Enables the chaining of validators.
Definition: validators.hpp:1124
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:146
constexpr auto chunk
Divide a range in chunks.
Definition: chunk.hpp:835
constexpr auto zip
A view adaptor that takes several views and returns tuple-like values from every i-th element of each...
Definition: zip.hpp:573
constexpr auto type_reduce
A view adaptor that behaves like std::views::all, but type erases certain ranges.
Definition: type_reduce.hpp:150
The basis for seqan3::alphabet, but requires only rank interface (not char).
T max_element(T... args)
The SeqAn namespace for views.
Definition: char_strictly_to.hpp:22
constexpr auto to_simd
A view that transforms a range of ranges into chunks of seqan3::simd vectors.
Definition: to_simd.hpp:782
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:415
SeqAn specific customisations in the standard namespace.
T operator!=(T... args)
Provides seqan3::simd::simd_type.
Provides seqan3::simd::simd_traits.
Provides type traits for working with templates.
Provides seqan3::views::type_reduce.
Provides seqan3::simd::simd_concept.
Provides seqan3::views::zip.