SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
kmer_hash.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
18 #include <seqan3/utility/math.hpp>
19 
20 namespace seqan3::detail
21 {
22 // ---------------------------------------------------------------------------------------------------------------------
23 // kmer_hash_view class
24 // ---------------------------------------------------------------------------------------------------------------------
25 
38 template <std::ranges::view urng_t>
39 class kmer_hash_view : public std::ranges::view_interface<kmer_hash_view<urng_t>>
40 {
41 private:
42  static_assert(std::ranges::forward_range<urng_t>, "The kmer_hash_view only works on forward_ranges");
43  static_assert(semialphabet<std::ranges::range_reference_t<urng_t>>,
44  "The reference type of the underlying range must model seqan3::semialphabet.");
45 
47  urng_t urange;
48 
50  shape shape_;
51 
52  template <bool const_range>
53  class basic_iterator;
54 
55 public:
59  kmer_hash_view() = default;
60  kmer_hash_view(kmer_hash_view const & rhs) = default;
61  kmer_hash_view(kmer_hash_view && rhs) = default;
62  kmer_hash_view & operator=(kmer_hash_view const & rhs) = default;
63  kmer_hash_view & operator=(kmer_hash_view && rhs) = default;
64  ~kmer_hash_view() = default;
65 
70  kmer_hash_view(urng_t urange_, shape const & s_) : urange{std::move(urange_)}, shape_{s_}
71  {
72  if (shape_.count() > (64 / std::log2(alphabet_size<std::ranges::range_reference_t<urng_t>>)))
73  {
74  throw std::invalid_argument{"The chosen shape/alphabet combination is not valid. "
75  "The alphabet or shape size must be reduced."};
76  }
77  }
78 
83  template <typename rng_t>
85  requires (!std::same_as<std::remove_cvref_t<rng_t>, kmer_hash_view>) &&
86  std::ranges::viewable_range<rng_t> &&
87  std::constructible_from<urng_t, std::ranges::ref_view<std::remove_reference_t<rng_t>>>
89  kmer_hash_view(rng_t && urange_, shape const & s_) :
90  urange{std::views::all(std::forward<rng_t>(urange_))}, shape_{s_}
91  {
92  if (shape_.count() > (64 / std::log2(alphabet_size<std::ranges::range_reference_t<urng_t>>)))
93  {
94  throw std::invalid_argument{"The chosen shape/alphabet combination is not valid. "
95  "The alphabet or shape size must be reduced."};
96  }
97  }
99 
116  auto begin() noexcept
117  {
118  return basic_iterator<false>{std::ranges::begin(urange), std::ranges::end(urange), shape_};
119  }
120 
122  auto begin() const noexcept
124  requires const_iterable_range<urng_t>
126  {
127  return basic_iterator<true>{std::ranges::cbegin(urange), std::ranges::cend(urange), shape_};
128  }
129 
145  auto end() noexcept
146  {
147  // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
148  if constexpr (std::ranges::common_range<urng_t>)
149  return basic_iterator<false>{std::ranges::begin(urange), std::ranges::end(urange), shape_, true};
150  else
151  return std::ranges::end(urange);
152  }
153 
155  auto end() const noexcept
157  requires const_iterable_range<urng_t>
159  {
160  // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
161  if constexpr (std::ranges::common_range<urng_t const>)
162  return basic_iterator<true>{std::ranges::cbegin(urange), std::ranges::cend(urange), shape_, true};
163  else
164  return std::ranges::cend(urange);
165  }
167 
171  auto size()
173  requires std::ranges::sized_range<urng_t>
175  {
176  using size_type = std::ranges::range_size_t<urng_t>;
177  return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
178  }
179 
181  auto size() const
183  requires std::ranges::sized_range<urng_t const>
185  {
186  using size_type = std::ranges::range_size_t<urng_t const>;
187  return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
188  }
189 };
190 
218 template <std::ranges::view urng_t>
219 template <bool const_range>
220 class kmer_hash_view<urng_t>::basic_iterator
221  : public maybe_iterator_category<maybe_const_iterator_t<const_range, urng_t>>
222 {
223 private:
225  using it_t = maybe_const_iterator_t<const_range, urng_t>;
227  using sentinel_t = maybe_const_sentinel_t<const_range, urng_t>;
228 
229  template <bool other_const_range>
230  friend class basic_iterator;
231 
232 public:
237  using difference_type = typename std::iter_difference_t<it_t>;
239  using value_type = size_t;
241  using pointer = void;
243  using reference = value_type;
245  using iterator_concept = std::conditional_t<std::contiguous_iterator<it_t>,
247  detail::iterator_concept_tag_t<it_t>>;
249 
253  constexpr basic_iterator() = default;
254  constexpr basic_iterator(basic_iterator const &) = default;
255  constexpr basic_iterator(basic_iterator &&) = default;
256  constexpr basic_iterator & operator=(basic_iterator const &) = default;
257  constexpr basic_iterator & operator=(basic_iterator &&) = default;
258  ~basic_iterator() = default;
259 
261  constexpr basic_iterator(basic_iterator<!const_range> const & it) noexcept
263  requires const_range
265  : hash_value{std::move(it.hash_value)},
266  roll_factor{std::move(it.roll_factor)},
267  shape_{std::move(it.shape_)},
268  text_left{std::move(it.text_left)},
269  text_right{std::move(it.text_right)}
270  {}
271 
283  basic_iterator(it_t it_start, sentinel_t it_end, shape s_) :
284  shape_{s_}, text_left{it_start}, text_right{std::ranges::next(text_left, shape_.size() - 1, it_end)}
285  {
286  assert(std::ranges::size(shape_) > 0);
287 
288  // shape size = 3
289  // Text: 1 2 3 4 5 6 7 8 9
290  // text_left: ^
291  // text_right: ^
292  // distance(text_left, text_right) = 2
293  if (shape_.size() <= std::ranges::distance(text_left, text_right) + 1)
294  {
295  roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
296  hash_full();
297  }
298  }
299 
323  basic_iterator(it_t it_start, sentinel_t it_end, shape s_, bool SEQAN3_DOXYGEN_ONLY(is_end)) : shape_{s_}
324  {
325  assert(std::ranges::size(shape_) > 0);
326 
327  auto urange_size = std::ranges::distance(it_start, it_end);
328  auto step = (shape_.size() > urange_size + 1) ? 0 : urange_size - shape_.size() + 1;
329  text_left = std::ranges::next(it_start, step, it_end);
330 
331  // shape size = 3
332  // Text: 1 2 3 4 5 6 7 8 9
333  // text_left: ^
334  // text_right: ^
335  // distance(text_left, text_right) = 2
336  if (shape_.size() <= std::ranges::distance(text_left, it_end) + 1)
337  {
338  roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
339  hash_full();
340  }
341 
342  text_right = it_end;
343  }
345 
349 
351  friend bool operator==(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
352  {
353  return lhs.text_right == rhs;
354  }
355 
357  friend bool operator==(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
358  {
359  return lhs == rhs.text_right;
360  }
361 
363  friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
364  {
365  return std::tie(lhs.text_right, lhs.shape_) == std::tie(rhs.text_right, rhs.shape_);
366  }
367 
369  friend bool operator!=(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
370  {
371  return !(lhs == rhs);
372  }
373 
375  friend bool operator!=(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
376  {
377  return !(lhs == rhs);
378  }
379 
381  friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
382  {
383  return !(lhs == rhs);
384  }
385 
387  friend bool operator<(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
388  {
389  return (lhs.shape_ <= rhs.shape_) && (lhs.text_right < rhs.text_right);
390  }
391 
393  friend bool operator>(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
394  {
395  return (lhs.shape_ >= rhs.shape_) && (lhs.text_right > rhs.text_right);
396  }
397 
399  friend bool operator<=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
400  {
401  return (lhs.shape_ <= rhs.shape_) && (lhs.text_right <= rhs.text_right);
402  }
403 
405  friend bool operator>=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
406  {
407  return (lhs.shape_ >= rhs.shape_) && (lhs.text_right >= rhs.text_right);
408  }
409 
411 
413  basic_iterator & operator++() noexcept
414  {
415  hash_forward();
416  return *this;
417  }
418 
420  basic_iterator operator++(int) noexcept
421  {
422  basic_iterator tmp{*this};
423  hash_forward();
424  return tmp;
425  }
426 
430  basic_iterator & operator--() noexcept
432  requires std::bidirectional_iterator<it_t>
434  {
435  hash_backward();
436  return *this;
437  }
438 
442  basic_iterator operator--(int) noexcept
444  requires std::bidirectional_iterator<it_t>
446  {
447  basic_iterator tmp{*this};
448  hash_backward();
449  return tmp;
450  }
451 
455  basic_iterator & operator+=(difference_type const skip) noexcept
457  requires std::random_access_iterator<it_t>
459  {
460  hash_forward(skip);
461  return *this;
462  }
463 
467  basic_iterator operator+(difference_type const skip) const noexcept
469  requires std::random_access_iterator<it_t>
471  {
472  basic_iterator tmp{*this};
473  return tmp += skip;
474  }
475 
479  friend basic_iterator operator+(difference_type const skip, basic_iterator const & it) noexcept
481  requires std::random_access_iterator<it_t>
483  {
484  return it + skip;
485  }
486 
490  basic_iterator & operator-=(difference_type const skip) noexcept
492  requires std::random_access_iterator<it_t>
494  {
495  hash_backward(skip);
496  return *this;
497  }
498 
503  basic_iterator operator-(difference_type const skip) const noexcept
505  requires std::random_access_iterator<it_t>
507  {
508  basic_iterator tmp{*this};
509  return tmp -= skip;
510  }
511 
515  friend basic_iterator operator-(difference_type const skip, basic_iterator const & it) noexcept
517  requires std::random_access_iterator<it_t>
519  {
520  return it - skip;
521  }
522 
527  friend difference_type operator-(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
529  requires std::random_access_iterator<it_t>
531  {
532  return static_cast<difference_type>(lhs.text_right - rhs.text_right);
533  }
534 
538  friend difference_type operator-(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
540  requires std::sized_sentinel_for<sentinel_t, it_t>
542  {
543  return static_cast<difference_type>(lhs - rhs.text_right);
544  }
545 
549  friend difference_type operator-(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
551  requires std::sized_sentinel_for<it_t, sentinel_t>
553  {
554  return static_cast<difference_type>(lhs.text_right - rhs);
555  }
556 
560  reference operator[](difference_type const n) const
562  requires std::random_access_iterator<it_t>
564  {
565  return *(*this + n);
566  }
567 
569  value_type operator*() const noexcept
570  {
571  return hash_value + to_rank(*text_right);
572  }
573 
574 private:
576  using alphabet_t = std::iter_value_t<it_t>;
577 
579  static constexpr auto const sigma{alphabet_size<alphabet_t>};
580 
582  size_t hash_value{0};
583 
585  size_t roll_factor{0};
586 
588  shape shape_;
589 
591  it_t text_left;
592 
594  it_t text_right;
595 
597  void hash_forward()
598  {
599  if (shape_.all())
600  {
601  hash_roll_forward();
602  }
603  else
604  {
605  std::ranges::advance(text_left, 1);
606  hash_full();
607  }
608  }
609 
614  void hash_forward(difference_type const skip)
616  requires std::random_access_iterator<it_t>
618  {
619  std::ranges::advance(text_left, skip);
620  hash_full();
621  }
622 
626  void hash_backward()
628  requires std::bidirectional_iterator<it_t>
630  {
631  if (shape_.all())
632  {
633  hash_roll_backward();
634  }
635  else
636  {
637  std::ranges::advance(text_left, -1);
638  hash_full();
639  }
640  }
641 
646  void hash_backward(difference_type const skip)
647  {
648  std::ranges::advance(text_left, -skip);
649  hash_full();
650  }
651 
653  void hash_full()
654  {
655  text_right = text_left;
656  hash_value = 0;
657 
658  for (size_t i{0}; i < shape_.size() - 1u; ++i)
659  {
660  hash_value += shape_[i] * to_rank(*text_right);
661  hash_value *= shape_[i] ? sigma : 1;
662  std::ranges::advance(text_right, 1);
663  }
664 
665  }
666 
668  void hash_roll_forward()
669  {
670  hash_value -= to_rank(*(text_left)) * roll_factor;
671  hash_value += to_rank(*(text_right));
672  hash_value *= sigma;
673 
674  std::ranges::advance(text_left, 1);
675  std::ranges::advance(text_right, 1);
676  }
677 
681  void hash_roll_backward()
683  requires std::bidirectional_iterator<it_t>
685  {
686  std::ranges::advance(text_left, -1);
687  std::ranges::advance(text_right, -1);
688 
689  hash_value /= sigma;
690  hash_value -= to_rank(*(text_right));
691  hash_value += to_rank(*(text_left)) * roll_factor;
692  }
693 };
694 
696 template <std::ranges::viewable_range rng_t>
697 kmer_hash_view(rng_t &&, shape const & shape_) -> kmer_hash_view<std::views::all_t<rng_t>>;
698 
699 // ---------------------------------------------------------------------------------------------------------------------
700 // kmer_hash_fn (adaptor definition)
701 // ---------------------------------------------------------------------------------------------------------------------
702 
705 struct kmer_hash_fn
706 {
708  constexpr auto operator()(shape const & shape_) const
709  {
710  return adaptor_from_functor{*this, shape_};
711  }
712 
720  template <std::ranges::range urng_t>
721  constexpr auto operator()(urng_t && urange, shape const & shape_) const
722  {
723  static_assert(std::ranges::viewable_range<urng_t>,
724  "The range parameter to views::kmer_hash cannot be a temporary of a non-view range.");
725  static_assert(std::ranges::forward_range<urng_t>,
726  "The range parameter to views::kmer_hash must model std::ranges::forward_range.");
727  static_assert(semialphabet<std::ranges::range_reference_t<urng_t>>,
728  "The range parameter to views::kmer_hash must be over elements of seqan3::semialphabet.");
729 
730  return kmer_hash_view{std::forward<urng_t>(urange), shape_};
731  }
732 };
734 
735 } // namespace seqan3::detail
736 
737 namespace seqan3::views
738 {
739 
789 inline constexpr auto kmer_hash = detail::kmer_hash_fn{};
790 
792 
793 } // namespace seqan3::views
Core alphabet concept and free function/type trait wrappers.
Provides overloads for std::hash.
T begin(T... args)
T end(T... args)
T forward(T... args)
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:858
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
base_t pow(base_t base, exp_t exp)
Computes the value of base raised to the power exp.
Definition: math.hpp:124
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
constexpr auto kmer_hash
Computes hash values for each position of a range via a given shape.
Definition: kmer_hash.hpp:789
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:74
Specifies requirements of an input range type for which the const version of that type satisfies the ...
The basis for seqan3::alphabet, but requires only rank interface (not char).
The SeqAn namespace for views.
Definition: char_to.hpp:22
SeqAn specific customisations in the standard namespace.
T next(T... args)
T operator!=(T... args)
Provides seqan3::shape.
T tie(T... args)
Provides math related functionality.