SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
view_to_simd.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
25 #include <seqan3/std/algorithm>
26 #include <seqan3/std/iterator>
27 #include <seqan3/std/ranges>
28 
30 
31 namespace seqan3::detail
32 {
33 
59 template <std::ranges::view urng_t, simd::simd_concept simd_t>
60 class view_to_simd : public std::ranges::view_interface<view_to_simd<urng_t, simd_t>>
61 {
62 private:
63 
64  static_assert(std::ranges::forward_range<urng_t>,
65  "The underlying range must model forward_range.");
66  static_assert(std::ranges::input_range<value_type_t<urng_t>>,
67  "Expects the value type of the underlying range to be an input_range.");
68  static_assert(std::default_constructible<value_type_t<urng_t>>,
69  "Expects the inner range to be default constructible.");
70  static_assert(semialphabet<value_type_t<value_type_t<urng_t>>>,
71  "Expects semi-alphabet as value type of the inner range.");
72 
76  using inner_range_type = value_type_t<urng_t>;
78  using scalar_type = typename simd_traits<simd_t>::scalar_type;
79  using max_simd_type = simd_type_t<uint8_t, simd_traits<simd_t>::max_length>;
82 
86  static constexpr bool fast_load = std::ranges::contiguous_range<inner_range_type> &&
88  std::sized_sentinel_for<std::ranges::iterator_t<inner_range_type>,
89  std::ranges::sentinel_t<inner_range_type>> &&
90  sizeof(alphabet_rank_t<value_type_t<inner_range_type>>) == 1;
91 
93  static constexpr uint8_t chunk_size = simd_traits<simd_t>::length;
95  static constexpr uint8_t chunks_per_load = simd_traits<simd_t>::max_length / chunk_size;
97  static constexpr uint8_t total_chunks = fast_load ? (chunks_per_load * chunks_per_load) : 1;
99  static constexpr auto alphabet_size = alphabet_size<value_type_t<inner_range_type>>;
101 
102  // Forward declare class' iterator type. See definition below.
103  struct iterator_type;
104 
105 public:
106 
110  constexpr view_to_simd() = default;
111  constexpr view_to_simd(view_to_simd const &) = default;
112  constexpr view_to_simd(view_to_simd &&) = default;
113  constexpr view_to_simd & operator=(view_to_simd const &) = default;
114  constexpr view_to_simd & operator=(view_to_simd &&) = default;
115  ~view_to_simd() = default;
116 
121  constexpr view_to_simd(urng_t urng, scalar_type const padding_value = alphabet_size) :
122  urng{std::move(urng)},
123  padding_simd_vector{simd::fill<simd_t>(padding_value)},
124  padding_value{padding_value}
125  {
126  // Check if the size is less or equal the simd size.
127  if (std::ranges::distance(urng) > chunk_size)
128  throw std::invalid_argument{"The size of the underlying range must be less than or equal to the size of "
129  "the given simd type!"};
130  }
131 
133  template <typename other_urng_t>
135  requires !std::same_as<remove_cvref_t<other_urng_t>, view_to_simd> &&
137  std::ranges::viewable_range<other_urng_t>
139  constexpr view_to_simd(other_urng_t && urng, scalar_type const padding_value = alphabet_size) :
140  view_to_simd{views::type_reduce(std::forward<other_urng_t>(urng)), padding_value}
141  {}
143 
147  constexpr iterator_type begin() noexcept
149  {
150  return {*this};
151  }
152 
154  constexpr void begin() const noexcept = delete;
156  constexpr void cbegin() const noexcept = delete;
157 
159  constexpr std::ranges::default_sentinel_t end() noexcept
160  {
161  return std::ranges::default_sentinel;
162  }
163 
165  constexpr void end() const noexcept = delete;
167  constexpr void cend() const noexcept = delete;
169 
176  constexpr size_t size() const noexcept
178  requires std::ranges::sized_range<inner_range_type>
180  {
181  auto it = std::ranges::max_element(urng, [] (auto & lhs, auto & rhs)
182  {
183  return std::ranges::size(lhs) < std::ranges::size(rhs);
184  });
185 
186  return (it != std::ranges::end(urng)) ? (std::ranges::size(*it) + chunk_size - 1) / chunk_size : 0;
187  }
188 
189 private:
190 
191  urng_t urng{};
192  value_type_t<urng_t> empty_inner_range{};
193  std::array<chunk_type, total_chunks> cached_simd_chunks{};
194  simd_t padding_simd_vector{};
195  scalar_type padding_value{};
196 };
197 
205 template <std::ranges::view urng_t, simd::simd_concept simd_t>
206 class view_to_simd<urng_t, simd_t>::iterator_type
207 {
208 public:
213  using value_type = chunk_type;
214  using pointer = void;
215  using difference_type = ptrdiff_t;
216  using iterator_category = std::input_iterator_tag;
217  using iterator_concept = iterator_category;
218 
223  constexpr iterator_type() = default;
224  constexpr iterator_type(iterator_type const &) = default;
225  constexpr iterator_type(iterator_type &&) = default;
226  constexpr iterator_type & operator=(iterator_type const &) = default;
227  constexpr iterator_type & operator=(iterator_type &&) = default;
228  ~iterator_type() = default;
229 
238  constexpr iterator_type(view_to_simd & this_view) : this_view{&this_view}, current_chunk_pos{0}
239  {
240  // Initialise the iterator of the sub ranges.
241  size_t seq_id = 0;
242  for (auto it = std::ranges::begin(this_view.urng); it != std::ranges::end(this_view.urng); ++it, ++seq_id)
243  {
244  cached_iter[seq_id] = std::ranges::begin(*it);
245  cached_sentinel[seq_id] = std::ranges::end(*it);
246  }
247 
248  // If the batch is not full, i.e. less than chunk_size many sequences, then fill them up with dummy sequences.
249  for (; seq_id < chunk_size; ++seq_id)
250  {
251  cached_iter[seq_id] = std::ranges::begin(this_view.empty_inner_range);
252  cached_sentinel[seq_id] = std::ranges::end(this_view.empty_inner_range);
253  }
254  // Check if this is the final chunk already.
255  final_chunk = all_iterators_reached_sentinel();
256 
257  // Fetch the next available input characters from the sequences and transform them into simd vectors.
258  underflow();
259  }
261 
265  constexpr reference operator*() const noexcept
267  {
268  assert(this_view != nullptr);
269  return std::span{this_view->cached_simd_chunks[current_chunk_pos].begin(),
270  (current_chunk_pos == final_chunk_pos) ? final_chunk_size : chunk_size};
271  }
273 
277  constexpr iterator_type & operator++(/*pre-increment*/)
279  {
280  if constexpr (fast_load)
281  { // Check if cached chunks have been already consumed and we need to fetch the next chunks.
282  if (current_chunk_pos == final_chunk_pos)
283  {
284  underflow();
285  current_chunk_pos = 0;
286  }
287  else
288  {
289  ++current_chunk_pos;
290  }
291  }
292  else // In case fast load is not available only one chunk is filled at a time.
293  {
294  underflow();
295  }
296 
297  return *this;
298  }
299 
301  constexpr value_type operator++(int /*post-increment*/)
302  {
303  value_type tmp = this->operator*();
304  ++(*this);
305  return tmp;
306  }
308 
312  constexpr bool operator==(std::ranges::default_sentinel_t const &) const noexcept
314  {
315  return at_end;
316  }
317 
319  friend constexpr bool operator==(std::ranges::default_sentinel_t const &, iterator_type const & rhs) noexcept
320  {
321  return rhs.at_end;
322  }
323 
325  constexpr bool operator!=(std::ranges::default_sentinel_t const &) const noexcept
326  {
327  return !at_end;
328  }
329 
331  friend constexpr bool operator!=(std::ranges::default_sentinel_t const &, iterator_type const & rhs) noexcept
332  {
333  return !rhs.at_end;
334  }
336 
337 private:
348  auto unpack(max_simd_type const & row) const
349  {
350  if constexpr (chunk_size == simd_traits<max_simd_type>::length / 2) // upcast into 2 vectors.
351  {
352  return std::array{simd::upcast<simd_t>(extract_halve<0>(row)), // 1. half
353  simd::upcast<simd_t>(extract_halve<1>(row))}; // 2. half
354  }
355  else if constexpr (chunk_size == simd_traits<max_simd_type>::length / 4) // upcast into 4 vectors.
356  {
357  return std::array{simd::upcast<simd_t>(extract_quarter<0>(row)), // 1. quarter
358  simd::upcast<simd_t>(extract_quarter<1>(row)), // 2. quarter
359  simd::upcast<simd_t>(extract_quarter<2>(row)), // 3. quarter
360  simd::upcast<simd_t>(extract_quarter<3>(row))}; // 4. quarter
361  }
362  else if constexpr (chunk_size == simd_traits<max_simd_type>::length / 8) // upcast into 8 vectors.
363  {
364  return std::array{simd::upcast<simd_t>(extract_eighth<0>(row)), // 1. eighth
365  simd::upcast<simd_t>(extract_eighth<1>(row)), // 2. eighth
366  simd::upcast<simd_t>(extract_eighth<2>(row)), // 3. eighth
367  simd::upcast<simd_t>(extract_eighth<3>(row)), // 4. eighth
368  simd::upcast<simd_t>(extract_eighth<4>(row)), // 5. eighth
369  simd::upcast<simd_t>(extract_eighth<5>(row)), // 6. eighth
370  simd::upcast<simd_t>(extract_eighth<6>(row)), // 7. eighth
371  simd::upcast<simd_t>(extract_eighth<7>(row))}; // 8. eighth
372  }
373  else
374  {
375  return std::array{simd::upcast<simd_t>(row)};
376  }
377  }
378 
389  constexpr void split_into_sub_matrices(std::array<max_simd_type, simd_traits<max_simd_type>::length> matrix) const
390  {
391  auto apply_padding = [this] (simd_t const vec)
392  {
393  return (vec == simd::fill<simd_t>(static_cast<uint8_t>(~0))) ? this_view->padding_simd_vector : vec;
394  };
395 
396  // Iterate over the rows of the matrix
397  for (uint8_t row = 0; row < static_cast<uint8_t>(matrix.size()); ++row)
398  {
399  // split a row into multiple chunks of size `chunk_size`
400  auto chunked_row = unpack(matrix[row]);
401 
402  if constexpr (chunked_row.size() == 1)
403  {
404  this_view->cached_simd_chunks[0][row] = apply_padding(std::move(chunked_row[0]));
405  }
406  else // Parse the tuple elements and store them in the cached simd chunks.
407  {
408  static_assert(chunked_row.size() == chunks_per_load, "Expected chunks_per_load many simd vectors.");
409 
410  for (uint8_t chunk = 0; chunk < chunks_per_load; ++chunk) // store chunks in respective cached entries.
411  {
412  size_t idx = chunk * chunks_per_load + row / chunk_size;
413  this_view->cached_simd_chunks[idx][row % chunk_size] = apply_padding(std::move(chunked_row[chunk]));
414  }
415  }
416  }
417  }
418 
422  constexpr bool all_iterators_reached_sentinel() const noexcept
423  {
424  using std::get;
425 
426  return std::ranges::all_of(views::zip(cached_iter, cached_sentinel), [] (auto && iterator_sentinel_pair)
427  {
428  return get<0>(iterator_sentinel_pair) == get<1>(iterator_sentinel_pair);
429  });
430  }
431 
442  constexpr simd_t convert_single_column()
443  noexcept
444  {
445  simd_t simd_column{};
446  for (size_t idx = 0u; idx < chunk_size; ++idx)
447  {
448  if (cached_iter[idx] == cached_sentinel[idx])
449  {
450  simd_column[idx] = this_view->padding_value;
451  }
452  else
453  {
454  simd_column[idx] = static_cast<scalar_type>(seqan3::to_rank(*cached_iter[idx]));
455  ++cached_iter[idx];
456  }
457  };
458  return simd_column;
459  }
460 
471  template <typename array_t>
472  constexpr void update_final_chunk_position(array_t const & iterators_before_update) noexcept
473  {
474  size_t max_distance = 0;
475  for (auto && [it, sent] : views::zip(iterators_before_update, cached_sentinel))
476  max_distance = std::max<size_t>(std::ranges::distance(it, sent), max_distance);
477 
478  assert(max_distance > 0);
479  assert(max_distance <= (total_chunks * chunk_size));
480 
481  --max_distance;
482  final_chunk_pos = max_distance / chunk_size;
483  // first we should be able to check the chunk position.
484  final_chunk_size = (max_distance % chunk_size) + 1;
485  }
486 
488  constexpr void underflow()
490  requires fast_load
492  {
493  at_end = final_chunk;
494  if (at_end) // reached end of stream.
495  return;
496  // For the efficient load we assume at most one byte sized alphabets.
497  // Hence we can load `simd_traits<simd_t>::max_length` length many elements at once.
498  // Depending on the packing of `simd_t` we can prefetch blocks and store them in the `cached_simd_chunks`.
499  // E.g. assume `simd_t` with length 8 on SSE4 with max length 16.
500  // To fill the 16x16 matrix we need four 8x8 matrices.
501  // Thus, for the 8 sequences we need to load two times 16 consecutive bytes to fill the matrix, i.e. two loads
502  // see figure below.
503  //
504  // 0 1 ... 7 | 8 9 ... 15
505  // 0 [a00, a01, ..., a07]|[a08, a09, ..., a15] // first load of seq a reads 16 characters
506  // 1 [b00, b01, ..., b07]|[b08, b09, ..., b15] // first load of seq b reads 16 characters
507  // ... | ...
508  // 7 [g00, g01, ..., g07]|[g08, g09, ..., g15] // first load of seq g reads 16 characters
509  // ----------------------------------------
510  // 8 [a16, a17, ..., a23]|[a24, a25, ..., a31] // second load of seq a reads next 16 characters
511  // 9 [b16, b17, ..., b23]|[b24, b25, ..., b31] // second load of seq b reads next 16 characters
512  // ... | ...
513  // 15 [g16, g17, ..., g23]|[g24, g25, ..., g31] // second load of seq g reads next 16 characters
514  //
515  // This quadratic byte matrix can be transposed efficiently with simd instructions.
516  // If the target simd scalar type is bigger we can apply the same mechanism but have then 16 4x4 matrices
517  // (32 bit) or 256 2x2 matrices (64 bit).
518 
519  constexpr int8_t max_size = simd_traits<simd_t>::max_length;
521  decltype(cached_iter) iterators_before_update{cached_iter}; // Keep track of iterators before the update.
522  // Iterate over each sequence.
523  for (uint8_t sequence_pos = 0; sequence_pos < chunk_size; ++sequence_pos)
524  { // Iterate over each block depending on the packing of the target simd vector.
525  for (uint8_t chunk_pos = 0; chunk_pos < chunks_per_load; ++chunk_pos)
526  {
527  uint8_t pos = chunk_pos * chunk_size + sequence_pos; // matrix entry to fill
528  if (cached_sentinel[sequence_pos] - cached_iter[sequence_pos] >= max_size)
529  { // Not in final block, thus load directly from memory.
530  matrix[pos] = simd::load<max_simd_type>(std::addressof(*cached_iter[sequence_pos]));
531  std::advance(cached_iter[sequence_pos], max_size);
532  }
533  else // Loads the final block byte wise in order to not load from uninitialised memory.
534  {
535  matrix[pos] = simd::fill<max_simd_type>(~0);
536  auto & sequence_it = cached_iter[sequence_pos];
537  for (int8_t idx = 0; sequence_it != cached_sentinel[sequence_pos]; ++sequence_it, ++idx)
538  matrix[pos][idx] = seqan3::to_rank(*sequence_it);
539  }
540  }
541  }
542 
543  // Handle final chunk which might not end at an offset which is not a multiple of `chunk_size`.
544  final_chunk = all_iterators_reached_sentinel();
545 
546  if (final_chunk)
547  update_final_chunk_position(iterators_before_update);
548 
549  simd::transpose(matrix);
550  split_into_sub_matrices(std::move(matrix));
551  }
552 
554  constexpr void underflow()
556  requires !fast_load
558  {
559  at_end = final_chunk;
560  if (at_end) // reached end of stream.
561  return;
562 
563  decltype(cached_iter) iterators_before_update{cached_iter}; // Keep track of iterators before the update.
564  for (size_t i = 0; i < chunk_size; ++i)
565  this_view->cached_simd_chunks[0][i] = convert_single_column();
566 
567  final_chunk = all_iterators_reached_sentinel();
568 
569  if (final_chunk)
570  update_final_chunk_position(iterators_before_update);
571  }
572 
574  std::array<std::ranges::iterator_t<inner_range_type>, chunk_size> cached_iter{};
576  std::array<std::ranges::sentinel_t<inner_range_type>, chunk_size> cached_sentinel{};
578  view_to_simd * this_view{nullptr};
580  uint8_t final_chunk_size{chunk_size};
582  uint8_t final_chunk_pos{total_chunks - 1};
584  uint8_t current_chunk_pos{0};
586  bool final_chunk{false};
588  bool at_end{false};
589 };
590 
591 // ============================================================================
592 // to_simd_fn (adaptor definition)
593 // ============================================================================
594 
603 template <simd::simd_concept simd_t>
604 struct to_simd_fn
605 {
607  using padding_t = typename simd_traits<simd_t>::scalar_type;
608 
612  constexpr auto operator()(padding_t const padding_value) const noexcept
613  {
614  return detail::adaptor_from_functor{*this, padding_value};
615  }
616 
618  constexpr auto operator()() const noexcept
619  {
620  return detail::adaptor_from_functor{*this};
621  }
622 
628  template <std::ranges::range urng_t>
629  constexpr auto operator()(urng_t && urange, padding_t const padding_value) const noexcept
630  {
631  static_assert(std::ranges::forward_range<urng_t>,
632  "The underlying range in views::to_simd must model std::ranges::forward_range.");
633  static_assert(std::ranges::viewable_range<urng_t>,
634  "The underlying range in views::to_simd must model std::ranges::viewable_range.");
635  static_assert(std::ranges::input_range<value_type_t<urng_t>>,
636  "The value type of the underlying range must model std::ranges::input_range.");
637  static_assert(semialphabet<value_type_t<value_type_t<urng_t>>>,
638  "The value type of the inner ranges must model seqan3::semialphabet.");
639 
640  return view_to_simd<type_reduce_view<urng_t>, simd_t>{std::forward<urng_t>(urange), padding_value};
641  }
642 
647  template <std::ranges::range urng_t>
648  constexpr auto operator()(urng_t && urange) const noexcept
649  {
650  static_assert(std::ranges::forward_range<urng_t>,
651  "The underlying range in views::to_simd must model std::ranges::forward_range.");
652  static_assert(std::ranges::viewable_range<urng_t>,
653  "The underlying range in views::to_simd must model std::ranges::viewable_range.");
654  static_assert(std::ranges::input_range<value_type_t<urng_t>>,
655  "The value type of the underlying range must model std::ranges::input_range.");
656  static_assert(semialphabet<value_type_t<value_type_t<urng_t>>>,
657  "The value type of the inner ranges must model seqan3::semialphabet.");
658 
659  return view_to_simd<type_reduce_view<urng_t>, simd_t>{std::forward<urng_t>(urange)};
660  }
661 
663  template <std::ranges::range urng_t>
664  constexpr friend auto operator|(urng_t && urange, to_simd_fn const & me)
665  {
666  return me(std::forward<urng_t>(urange));
667  }
668 };
669 
670 } // namespace seqan3::detail
671 
672 namespace seqan3::views
673 {
674 
776 template <simd::simd_concept simd_t>
777 inline constexpr auto to_simd = detail::to_simd_fn<simd_t>{};
778 
779 } // namespace seqan3::views
debug_stream.hpp
Provides seqan3::debug_stream and related types.
zip.hpp
Provides seqan3::views::zip.
seqan3::views
The SeqAn namespace for views.
Definition: view_to_simd.hpp:672
std::span
pack_algorithm.hpp
Provides algorithms for meta programming, parameter packs and seqan3::type_list.
std::rel_ops::operator!=
T operator!=(T... args)
iterator
Provides C++20 additions to the <iterator> header.
std::input_iterator_tag
seqan3::views::move
const auto move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
template_inspection.hpp
Provides seqan3::type_list and auxiliary type traits.
seqan3::to_rank
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:142
seqan3::views::type_reduce
constexpr auto type_reduce
A view adaptor that behaves like std::views::all, but type erases certain ranges.
Definition: type_reduce.hpp:158
algorithm
Adaptations of algorithms from the Ranges TS.
seqan3::operator|
auto operator|(validator1_type &&vali1, validator2_type &&vali2)
Enables the chaining of validators.
Definition: validators.hpp:1023
concept.hpp
Provides seqan3::simd::simd_concept.
default_constructible
The std::default_constructible concept provides a shorthand for the common case when the question is ...
same_as
The concept std::same_as<T, U> is satisfied if and only if T and U denote the same type.
simd_algorithm.hpp
Provides algorithms to modify seqan3::simd::simd_type.
seqan3::views::get
const auto get
A view calling std::get on each element in a range.
Definition: get.hpp:65
simd.hpp
Provides seqan3::simd::simd_type.
std::addressof
T addressof(T... args)
simd_traits.hpp
Provides seqan3::simd::simd_traits.
type_reduce.hpp
Provides seqan3::views::type_reduce.
range.hpp
Provides various transformation traits used by the range module.
seqan3::value_type_t
typename value_type< t >::type value_type_t
Shortcut for seqan3::value_type (transformation_trait shortcut).
Definition: pre.hpp:48
std::array
std::invalid_argument
seqan3::pack_traits::size
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
std::advance
T advance(T... args)
ranges
Adaptations of concepts from the Ranges TS.
std::begin
T begin(T... args)
std
SeqAn specific customisations in the standard namespace.
std::end
T end(T... args)
seqan3::views::to_simd
constexpr auto to_simd
A view that transforms a range of ranges into chunks of seqan3::simd vectors.
Definition: view_to_simd.hpp:777
semialphabet
The basis for seqan3::alphabet, but requires only rank interface (not char).
detail.hpp
Auxiliary header for the views submodule .
seqan3::alphabet_size
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:706