SeqAn3  3.0.2
The Modern C++ library for sequence analysis.
kmer_hash.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
16 #include <seqan3/core/math.hpp>
17 #include <seqan3/range/hash.hpp>
19 
20 namespace seqan3::detail
21 {
22 // ---------------------------------------------------------------------------------------------------------------------
23 // kmer_hash_view class
24 // ---------------------------------------------------------------------------------------------------------------------
25 
38 template <std::ranges::view urng_t>
39 class kmer_hash_view : public std::ranges::view_interface<kmer_hash_view<urng_t>>
40 {
41 private:
42  static_assert(std::ranges::forward_range<urng_t>, "The kmer_hash_view only works on forward_ranges");
43  static_assert(semialphabet<std::ranges::range_reference_t<urng_t>>,
44  "The reference type of the underlying range must model seqan3::semialphabet.");
45 
47  urng_t urange;
48 
50  shape shape_;
51 
52  template <typename rng_t>
53  class basic_iterator;
54 
55 public:
59  kmer_hash_view() = default;
60  kmer_hash_view(kmer_hash_view const & rhs) = default;
61  kmer_hash_view(kmer_hash_view && rhs) = default;
62  kmer_hash_view & operator=(kmer_hash_view const & rhs) = default;
63  kmer_hash_view & operator=(kmer_hash_view && rhs) = default;
64  ~kmer_hash_view() = default;
65 
70  kmer_hash_view(urng_t urange_, shape const & s_) : urange{std::move(urange_)}, shape_{s_}
71  {
72  if (shape_.count() > (64 / std::log2(alphabet_size<std::ranges::range_reference_t<urng_t>>)))
73  {
74  throw std::invalid_argument{"The chosen shape/alphabet combination is not valid. "
75  "The alphabet or shape size must be reduced."};
76  }
77  }
78 
83  template <typename rng_t>
85  requires (!std::same_as<std::remove_cvref_t<rng_t>, kmer_hash_view>) &&
86  std::ranges::viewable_range<rng_t> &&
87  std::constructible_from<urng_t, std::ranges::ref_view<std::remove_reference_t<rng_t>>>
89  kmer_hash_view(rng_t && urange_, shape const & s_) :
90  urange{std::views::all(std::forward<rng_t>(urange_))}, shape_{s_}
91  {
92  if (shape_.count() > (64 / std::log2(alphabet_size<std::ranges::range_reference_t<urng_t>>)))
93  {
94  throw std::invalid_argument{"The chosen shape/alphabet combination is not valid. "
95  "The alphabet or shape size must be reduced."};
96  }
97  }
99 
116  auto begin() noexcept
117  {
118  return basic_iterator<urng_t>{std::ranges::begin(urange), std::ranges::end(urange), shape_};
119  }
120 
122  auto begin() const noexcept
124  requires const_iterable_range<urng_t>
126  {
127  return basic_iterator<urng_t const>{std::ranges::cbegin(urange), std::ranges::cend(urange), shape_};
128  }
129 
145  auto end() noexcept
146  {
147  // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
148  if constexpr (std::ranges::common_range<urng_t>)
149  return basic_iterator<urng_t>{std::ranges::begin(urange), std::ranges::end(urange), shape_, true};
150  else
151  return std::ranges::end(urange);
152  }
153 
155  auto end() const noexcept
157  requires const_iterable_range<urng_t>
159  {
160  // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
161  if constexpr (std::ranges::common_range<urng_t const>)
162  return basic_iterator<urng_t const>{std::ranges::cbegin(urange), std::ranges::cend(urange), shape_, true};
163  else
164  return std::ranges::cend(urange);
165  }
167 
171  auto size()
173  requires std::ranges::sized_range<urng_t>
175  {
176  using size_type = std::ranges::range_size_t<urng_t>;
177  return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
178  }
179 
181  auto size() const
183  requires std::ranges::sized_range<urng_t const>
185  {
186  using size_type = std::ranges::range_size_t<urng_t const>;
187  return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
188  }
189 };
190 
216 template <std::ranges::view urng_t>
217 template <typename rng_t>
218 class kmer_hash_view<urng_t>::basic_iterator
219 {
220 private:
222  using it_t = std::ranges::iterator_t<rng_t>;
224  using sentinel_t = std::ranges::sentinel_t<rng_t>;
225 
226  template <typename urng2_t>
227  friend class basic_iterator;
228 
229 public:
233  using difference_type = typename std::iter_difference_t<it_t>;
236  using value_type = size_t;
238  using pointer = void;
240  using reference = value_type;
242  using iterator_category = detail::iterator_category_tag_t<it_t>;
244  using iterator_concept = std::conditional_t<std::contiguous_iterator<it_t>,
246  detail::iterator_concept_tag_t<it_t>>;
248 
252  constexpr basic_iterator() = default;
253  constexpr basic_iterator(basic_iterator const &) = default;
254  constexpr basic_iterator(basic_iterator &&) = default;
255  constexpr basic_iterator & operator=(basic_iterator const &) = default;
256  constexpr basic_iterator & operator=(basic_iterator &&) = default;
257  ~basic_iterator() = default;
258 
260  template <typename urng2_t>
262  requires std::same_as<std::remove_const_t<urng_t>, urng2_t>
264  basic_iterator(basic_iterator<urng2_t> it) :
265  hash_value{std::move(it.hash_value)},
266  roll_factor{std::move(it.roll_factor)},
267  shape_{std::move(it.shape_)},
268  text_left{std::move(it.text_left)},
269  text_right{std::move(it.text_right)}
270  {}
271 
283  basic_iterator(it_t it_start, sentinel_t it_end, shape s_) :
284  shape_{s_}, text_left{it_start}, text_right{std::ranges::next(text_left, shape_.size() - 1, it_end)}
285  {
286  assert(std::ranges::size(shape_) > 0);
287 
288  // shape size = 3
289  // Text: 1 2 3 4 5 6 7 8 9
290  // text_left: ^
291  // text_right: ^
292  // distance(text_left, text_right) = 2
293  if (shape_.size() <= std::ranges::distance(text_left, text_right) + 1)
294  {
295  roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
296  hash_full();
297  }
298  }
299 
323  basic_iterator(it_t it_start, sentinel_t it_end, shape s_, bool SEQAN3_DOXYGEN_ONLY(is_end)) : shape_{s_}
324  {
325  assert(std::ranges::size(shape_) > 0);
326 
327  auto urange_size = std::ranges::distance(it_start, it_end);
328  auto step = (shape_.size() + 1 > urange_size) ? 0 : urange_size - shape_.size() + 1;
329  text_left = std::ranges::next(it_start, step, it_end);
330  text_right = it_end;
331 
332  // shape size = 3
333  // Text: 1 2 3 4 5 6 7 8 9
334  // text_left: ^
335  // text_right: ^
336  // distance(text_left, text_right) = 2
337  if (shape_.size() <= std::ranges::distance(text_left, text_right) + 1)
338  {
339  roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
340  hash_full();
341  }
342 
343  text_right = it_end;
344  }
346 
350 
352  friend bool operator==(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
353  {
354  return lhs.text_right == rhs;
355  }
356 
358  friend bool operator==(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
359  {
360  return lhs == rhs.text_right;
361  }
362 
364  friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
365  {
366  return std::tie(lhs.text_right, lhs.shape_) == std::tie(rhs.text_right, rhs.shape_);
367  }
368 
370  friend bool operator!=(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
371  {
372  return !(lhs == rhs);
373  }
374 
376  friend bool operator!=(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
377  {
378  return !(lhs == rhs);
379  }
380 
382  friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
383  {
384  return !(lhs == rhs);
385  }
386 
388  friend bool operator<(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
389  {
390  return (lhs.shape_ <= rhs.shape_) && (lhs.text_right < rhs.text_right);
391  }
392 
394  friend bool operator>(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
395  {
396  return (lhs.shape_ >= rhs.shape_) && (lhs.text_right > rhs.text_right);
397  }
398 
400  friend bool operator<=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
401  {
402  return (lhs.shape_ <= rhs.shape_) && (lhs.text_right <= rhs.text_right);
403  }
404 
406  friend bool operator>=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
407  {
408  return (lhs.shape_ >= rhs.shape_) && (lhs.text_right >= rhs.text_right);
409  }
410 
412 
414  basic_iterator & operator++() noexcept
415  {
416  hash_forward();
417  return *this;
418  }
419 
421  basic_iterator operator++(int) noexcept
422  {
423  basic_iterator tmp{*this};
424  hash_forward();
425  return tmp;
426  }
427 
431  basic_iterator & operator--() noexcept
433  requires std::bidirectional_iterator<it_t>
435  {
436  hash_backward();
437  return *this;
438  }
439 
443  basic_iterator operator--(int) noexcept
445  requires std::bidirectional_iterator<it_t>
447  {
448  basic_iterator tmp{*this};
449  hash_backward();
450  return tmp;
451  }
452 
456  basic_iterator & operator+=(difference_type const skip) noexcept
458  requires std::random_access_iterator<it_t>
460  {
461  hash_forward(skip);
462  return *this;
463  }
464 
468  basic_iterator operator+(difference_type const skip) const noexcept
470  requires std::random_access_iterator<it_t>
472  {
473  basic_iterator tmp{*this};
474  return tmp += skip;
475  }
476 
480  friend basic_iterator operator+(difference_type const skip, basic_iterator const & it) noexcept
482  requires std::random_access_iterator<it_t>
484  {
485  return it + skip;
486  }
487 
491  basic_iterator & operator-=(difference_type const skip) noexcept
493  requires std::random_access_iterator<it_t>
495  {
496  hash_backward(skip);
497  return *this;
498  }
499 
504  basic_iterator operator-(difference_type const skip) const noexcept
506  requires std::random_access_iterator<it_t>
508  {
509  basic_iterator tmp{*this};
510  return tmp -= skip;
511  }
512 
516  friend basic_iterator operator-(difference_type const skip, basic_iterator const & it) noexcept
518  requires std::random_access_iterator<it_t>
520  {
521  return it - skip;
522  }
523 
528  friend difference_type operator-(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
530  requires std::random_access_iterator<it_t>
532  {
533  return static_cast<difference_type>(lhs.text_right - rhs.text_right);
534  }
535 
539  friend difference_type operator-(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
541  requires std::sized_sentinel_for<sentinel_t, it_t>
543  {
544  return static_cast<difference_type>(lhs - rhs.text_right);
545  }
546 
550  friend difference_type operator-(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
552  requires std::sized_sentinel_for<it_t, sentinel_t>
554  {
555  return static_cast<difference_type>(lhs.text_right - rhs);
556  }
557 
561  reference operator[](difference_type const n) const
563  requires std::random_access_iterator<it_t>
565  {
566  return *(*this + n);
567  }
568 
570  value_type operator*() const noexcept
571  {
572  return hash_value + to_rank(*text_right);
573  }
574 
575 private:
577  using alphabet_t = std::iter_value_t<it_t>;
578 
580  static constexpr auto const sigma{alphabet_size<alphabet_t>};
581 
583  size_t hash_value{0};
584 
586  size_t roll_factor{0};
587 
589  shape shape_;
590 
592  it_t text_left;
593 
595  it_t text_right;
596 
598  void hash_forward()
599  {
600  if (shape_.all())
601  {
602  hash_roll_forward();
603  }
604  else
605  {
606  std::ranges::advance(text_left, 1);
607  hash_full();
608  }
609  }
610 
615  void hash_forward(difference_type const skip)
617  requires std::random_access_iterator<it_t>
619  {
620  std::ranges::advance(text_left, skip);
621  hash_full();
622  }
623 
627  void hash_backward()
629  requires std::bidirectional_iterator<it_t>
631  {
632  if (shape_.all())
633  {
634  hash_roll_backward();
635  }
636  else
637  {
638  std::ranges::advance(text_left, -1);
639  hash_full();
640  }
641  }
642 
647  void hash_backward(difference_type const skip)
648  {
649  std::ranges::advance(text_left, -skip);
650  hash_full();
651  }
652 
654  void hash_full()
655  {
656  text_right = text_left;
657  hash_value = 0;
658 
659  for (size_t i{0}; i < shape_.size() - 1u; ++i)
660  {
661  hash_value += shape_[i] * to_rank(*text_right);
662  hash_value *= shape_[i] ? sigma : 1;
663  std::ranges::advance(text_right, 1);
664  }
665 
666  }
667 
669  void hash_roll_forward()
670  {
671  hash_value -= to_rank(*(text_left)) * roll_factor;
672  hash_value += to_rank(*(text_right));
673  hash_value *= sigma;
674 
675  std::ranges::advance(text_left, 1);
676  std::ranges::advance(text_right, 1);
677  }
678 
682  void hash_roll_backward()
684  requires std::bidirectional_iterator<it_t>
686  {
687  std::ranges::advance(text_left, -1);
688  std::ranges::advance(text_right, -1);
689 
690  hash_value /= sigma;
691  hash_value -= to_rank(*(text_right));
692  hash_value += to_rank(*(text_left)) * roll_factor;
693  }
694 };
695 
697 template <std::ranges::viewable_range rng_t>
698 kmer_hash_view(rng_t &&, shape const & shape_) -> kmer_hash_view<std::views::all_t<rng_t>>;
699 
700 // ---------------------------------------------------------------------------------------------------------------------
701 // kmer_hash_fn (adaptor definition)
702 // ---------------------------------------------------------------------------------------------------------------------
703 
706 struct kmer_hash_fn
707 {
709  constexpr auto operator()(shape const & shape_) const
710  {
711  return adaptor_from_functor{*this, shape_};
712  }
713 
721  template <std::ranges::range urng_t>
722  constexpr auto operator()(urng_t && urange, shape const & shape_) const
723  {
724  static_assert(std::ranges::viewable_range<urng_t>,
725  "The range parameter to views::kmer_hash cannot be a temporary of a non-view range.");
726  static_assert(std::ranges::forward_range<urng_t>,
727  "The range parameter to views::kmer_hash must model std::ranges::forward_range.");
728  static_assert(semialphabet<std::ranges::range_reference_t<urng_t>>,
729  "The range parameter to views::kmer_hash must be over elements of seqan3::semialphabet.");
730 
731  return kmer_hash_view{std::forward<urng_t>(urange), shape_};
732  }
733 };
735 
736 } // namespace seqan3::detail
737 
738 namespace seqan3::views
739 {
740 
788 inline constexpr auto kmer_hash = detail::kmer_hash_fn{};
789 
791 
792 } // namespace seqan3::views
seqan3::views
The SeqAn namespace for views.
Definition: view_iota_simd.hpp:218
seqan3::views::kmer_hash
constexpr auto kmer_hash
Computes hash values for each position of a range via a given shape.
Definition: kmer_hash.hpp:788
std::rel_ops::operator!=
T operator!=(T... args)
shape.hpp
Provides seqan3::shape.
std::random_access_iterator_tag
seqan3::pow
base_t pow(base_t base, exp_t exp)
Computes the value of base raised to the power exp.
Definition: math.hpp:122
seqan3::to_rank
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:143
std::tie
T tie(T... args)
seqan3::views::move
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
const_iterable_range
Specifies requirements of an input range type for which the const version of that type satisfies the ...
std::iter_difference_t
std::invalid_argument
seqan3::pack_traits::size
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
std::begin
T begin(T... args)
std
SeqAn specific customisations in the standard namespace.
hash.hpp
Provides overloads for std::hash.
std::remove_cvref_t
std::end
T end(T... args)
std::conditional_t
math.hpp
Provides math related functionality.
semialphabet
The basis for seqan3::alphabet, but requires only rank interface (not char).
seqan3::alphabet_size
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:707
concept.hpp
Core alphabet concept and free function/type trait wrappers.