SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
kmer_hash.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cmath>
16 
18 #include <seqan3/range/hash.hpp>
20 
21 namespace seqan3::detail
22 {
23 
24 // ---------------------------------------------------------------------------------------------------------------------
25 // kmer_hash_view class
26 // ---------------------------------------------------------------------------------------------------------------------
27 
40 template <std::ranges::view urng_t>
41 class kmer_hash_view : public std::ranges::view_interface<kmer_hash_view<urng_t>>
42 {
43 private:
44  static_assert(std::ranges::forward_range<urng_t const>, "The kmer_hash_view only works on forward_ranges");
45  static_assert(semialphabet<reference_t<urng_t>>, "The reference type of the underlying range must model "
46  "seqan3::semialphabet.");
47 
49  urng_t urange;
50 
52  shape shape_;
53 
79  template <typename rng_t>
80  class shape_iterator
81  {
82  private:
84  using it_t = std::ranges::iterator_t<rng_t>;
86  using sentinel_t = std::ranges::sentinel_t<rng_t>;
87 
88  public:
92  using difference_type = typename it_t::difference_type;
95  using value_type = size_t;
97  using pointer = void;
99  using reference = value_type;
101  using iterator_category = std::input_iterator_tag;
103  using iterator_concept = std::conditional_t<std::contiguous_iterator<it_t>,
105  iterator_tag_t<it_t>>;
107 
111  constexpr shape_iterator() = default;
112  constexpr shape_iterator(shape_iterator const &) = default;
113  constexpr shape_iterator(shape_iterator &&) = default;
114  constexpr shape_iterator & operator=(shape_iterator const &) = default;
115  constexpr shape_iterator & operator=(shape_iterator &&) = default;
116  ~shape_iterator() = default;
117 
128  shape_iterator(it_t it_start, shape s_) :
129  shape_{s_}, text_left{it_start}, text_right{it_start}
130  {
131  assert(std::ranges::size(shape_) > 0);
132 
133  roll_factor = std::pow(sigma, std::ranges::size(shape_) - 1);
134 
135  hash_full();
136  }
138 
142 
144  friend bool operator==(shape_iterator const & lhs, sentinel_t const & rhs) noexcept
145  {
146  return lhs.text_right == rhs;
147  }
148 
150  friend bool operator==(sentinel_t const & lhs, shape_iterator const & rhs) noexcept
151  {
152  return lhs == rhs.text_right;
153  }
154 
156  friend bool operator==(shape_iterator const & lhs, shape_iterator const & rhs) noexcept
157  {
158  return std::tie(lhs.text_right, lhs.shape_) == std::tie(rhs.text_right, rhs.shape_);
159  }
160 
162  friend bool operator!=(shape_iterator const & lhs, sentinel_t const & rhs) noexcept
163  {
164  return !(lhs == rhs);
165  }
166 
168  friend bool operator!=(sentinel_t const & lhs, shape_iterator const & rhs) noexcept
169  {
170  return !(lhs == rhs);
171  }
172 
174  friend bool operator!=(shape_iterator const & lhs, shape_iterator const & rhs) noexcept
175  {
176  return !(lhs == rhs);
177  }
178 
180  friend bool operator<(shape_iterator const & lhs, shape_iterator const & rhs) noexcept
181  {
182  return (lhs.shape_ <= rhs.shape_) && (lhs.text_right < rhs.text_right);
183  }
184 
186  friend bool operator>(shape_iterator const & lhs, shape_iterator const & rhs) noexcept
187  {
188  return (lhs.shape_ >= rhs.shape_) && (lhs.text_right > rhs.text_right);
189  }
190 
192  friend bool operator<=(shape_iterator const & lhs, shape_iterator const & rhs) noexcept
193  {
194  return (lhs.shape_ <= rhs.shape_) && (lhs.text_right <= rhs.text_right);
195  }
196 
198  friend bool operator>=(shape_iterator const & lhs, shape_iterator const & rhs) noexcept
199  {
200  return (lhs.shape_ >= rhs.shape_) && (lhs.text_right >= rhs.text_right);
201  }
202 
204 
206  shape_iterator & operator++() noexcept
207  {
208  hash_forward();
209  return *this;
210  }
211 
213  shape_iterator operator++(int) noexcept
214  {
215  shape_iterator tmp{*this};
216  hash_forward();
217  return tmp;
218  }
219 
223  shape_iterator & operator--() noexcept
225  requires std::bidirectional_iterator<it_t>
227  {
228  hash_backward();
229  return *this;
230  }
231 
235  shape_iterator operator--(int) noexcept
237  requires std::bidirectional_iterator<it_t>
239  {
240  shape_iterator tmp{*this};
241  hash_backward();
242  return tmp;
243  }
244 
248  shape_iterator & operator+=(difference_type const skip) noexcept
250  requires std::random_access_iterator<it_t>
252  {
253  hash_forward(skip);
254  return *this;
255  }
256 
260  shape_iterator operator+(difference_type const skip) const noexcept
262  requires std::random_access_iterator<it_t>
264  {
265  shape_iterator tmp{*this};
266  return tmp += skip;
267  }
268 
272  friend shape_iterator operator+(difference_type const skip, shape_iterator const & it) noexcept
274  requires std::random_access_iterator<it_t>
276  {
277  return it + skip;
278  }
279 
283  shape_iterator & operator-=(difference_type const skip) noexcept
285  requires std::random_access_iterator<it_t>
287  {
288  hash_backward(skip);
289  return *this;
290  }
291 
296  shape_iterator operator-(difference_type const skip) const noexcept
298  requires std::random_access_iterator<it_t>
300  {
301  shape_iterator tmp{*this};
302  return tmp -= skip;
303  }
304 
308  friend shape_iterator operator-(difference_type const skip, shape_iterator const & it) noexcept
310  requires std::random_access_iterator<it_t>
312  {
313  return it - skip;
314  }
315 
320  difference_type operator-(shape_iterator const & lhs) const noexcept
322  requires std::random_access_iterator<it_t>
324  {
325  return static_cast<difference_type>(text_right - lhs.text_right);
326  }
327 
331  friend difference_type operator-(sentinel_t const & lhs, shape_iterator const & rhs) noexcept
333  requires std::sized_sentinel_for<sentinel_t, it_t>
335  {
336  return static_cast<difference_type>(lhs - rhs.text_right);
337  }
338 
342  friend difference_type operator-(shape_iterator const & lhs, sentinel_t const & rhs) noexcept
344  requires std::sized_sentinel_for<it_t, sentinel_t>
346  {
347  return static_cast<difference_type>(lhs.text_right - rhs);
348  }
349 
353  value_type operator[](difference_type const n)
355  requires std::random_access_iterator<it_t>
357  {
358  text_left += n;
359  hash_full();
360  return operator*();
361  }
362 
364  value_type operator*() const noexcept
365  {
366  return hash_value + to_rank(*text_right);
367  }
368 
369  private:
371  using alphabet_t = value_type_t<it_t>;
372 
374  static constexpr auto const sigma{alphabet_size<alphabet_t>};
375 
377  size_t hash_value{0};
378 
380  size_t roll_factor{0};
381 
383  shape shape_;
384 
386  it_t text_left;
387 
389  it_t text_right;
390 
392  void hash_forward()
393  {
394  if (shape_.all())
395  {
396  hash_roll_forward();
397  }
398  else
399  {
400  std::ranges::advance(text_left, 1);
401  hash_full();
402  }
403  }
404 
409  void hash_forward(difference_type const skip)
411  requires std::random_access_iterator<it_t>
413  {
414  std::ranges::advance(text_left, skip);
415  hash_full();
416  }
417 
421  void hash_backward()
423  requires std::bidirectional_iterator<it_t>
425  {
426  if (shape_.all())
427  {
428  hash_roll_backward();
429  }
430  else
431  {
432  std::ranges::advance(text_left, -1);
433  hash_full();
434  }
435  }
436 
441  void hash_backward(difference_type const skip)
442  {
443  std::ranges::advance(text_left, -skip);
444  hash_full();
445  }
446 
448  void hash_full()
449  {
450  text_right = text_left;
451  hash_value = 0;
452 
453  for (size_t i{0}; i < shape_.size() - 1u; ++i)
454  {
455  hash_value += shape_[i] * to_rank(*text_right);
456  hash_value *= shape_[i] ? sigma : 1;
457  std::ranges::advance(text_right, 1);
458  }
459  }
460 
462  void hash_roll_forward()
463  {
464  hash_value -= to_rank(*(text_left)) * roll_factor;
465  hash_value += to_rank(*(text_right));
466  hash_value *= sigma;
467 
468  std::ranges::advance(text_left, 1);
469  std::ranges::advance(text_right, 1);
470  }
471 
475  void hash_roll_backward()
477  requires std::bidirectional_iterator<it_t>
479  {
480  std::ranges::advance(text_left, -1);
481  std::ranges::advance(text_right, -1);
482 
483  hash_value /= sigma;
484  hash_value -= to_rank(*(text_right));
485  hash_value += to_rank(*(text_left)) * roll_factor;
486  }
487  };
488 
489 public:
493  kmer_hash_view() = default;
494  kmer_hash_view(kmer_hash_view const & rhs) = default;
495  kmer_hash_view(kmer_hash_view && rhs) = default;
496  kmer_hash_view & operator=(kmer_hash_view const & rhs) = default;
497  kmer_hash_view & operator=(kmer_hash_view && rhs) = default;
498  ~kmer_hash_view() = default;
499 
504  kmer_hash_view(urng_t urange_, shape const & s_) : urange{std::move(urange_)}, shape_{s_}
505  {
506  if (shape_.size() > (64 / std::log2(alphabet_size<reference_t<urng_t>>)))
507  {
508  throw std::invalid_argument{"The chosen shape/alphabet combination is not valid. "
509  "The alphabet or shape size must be reduced."};
510  }
511  }
512 
517  template <typename rng_t>
519  requires !std::same_as<remove_cvref_t<rng_t>, kmer_hash_view> &&
520  std::ranges::viewable_range<rng_t> &&
523  kmer_hash_view(rng_t && urange_, shape const & s_) :
524  urange{std::views::all(std::forward<rng_t>(urange_))}, shape_{s_}
525  {
526  if (shape_.size() > (64 / std::log2(alphabet_size<reference_t<urng_t>>)))
527  {
528  throw std::invalid_argument{"The chosen shape/alphabet combination is not valid. "
529  "The alphabet or shape size must be reduced."};
530  }
531  }
533 
550  auto begin() noexcept
551  {
552  return shape_iterator<urng_t>{std::ranges::begin(urange), shape_};
553  }
554 
556  auto begin() const noexcept
558  requires const_iterable_range<urng_t>
560  {
561  return shape_iterator<urng_t const>{std::ranges::begin(urange), shape_};
562  }
563 
565  auto cbegin() const noexcept
567  requires const_iterable_range<urng_t>
569  {
570  return begin();
571  }
572 
588  auto end() noexcept
589  {
590  return std::ranges::end(urange);
591  }
592 
594  auto end() const noexcept
596  requires const_iterable_range<urng_t>
598  {
599  return std::ranges::end(urange);
600  }
601 
603  auto cend() const noexcept
605  requires const_iterable_range<urng_t>
607  {
608  return end();
609  }
611 };
612 
614 template <std::ranges::viewable_range rng_t>
615 kmer_hash_view(rng_t &&, shape const & shape_) -> kmer_hash_view<std::ranges::all_view<rng_t>>;
616 
617 // ---------------------------------------------------------------------------------------------------------------------
618 // kmer_hash_fn (adaptor definition)
619 // ---------------------------------------------------------------------------------------------------------------------
620 
623 struct kmer_hash_fn
624 {
626  constexpr auto operator()(shape const & shape_) const
627  {
628  return adaptor_from_functor{*this, shape_};
629  }
630 
638  template <std::ranges::range urng_t>
639  constexpr auto operator()(urng_t && urange, shape const & shape_) const
640  {
641  static_assert(std::ranges::viewable_range<urng_t>,
642  "The range parameter to views::kmer_hash cannot be a temporary of a non-view range.");
643  static_assert(std::ranges::forward_range<urng_t>,
644  "The range parameter to views::kmer_hash must model std::ranges::forward_range.");
645  static_assert(semialphabet<reference_t<urng_t>>,
646  "The range parameter to views::kmer_hash must be over elements of seqan3::semialphabet.");
647 
648  return kmer_hash_view{std::forward<urng_t>(urange), shape_};
649  }
650 };
652 
653 } // namespace seqan3::detail
654 
655 namespace seqan3::views
656 {
657 
705 inline constexpr auto kmer_hash = detail::kmer_hash_fn{};
706 
708 
709 } // namespace seqan3::views
seqan3::views
The SeqAn namespace for views.
Definition: view_to_simd.hpp:672
seqan3::views::kmer_hash
constexpr auto kmer_hash
Computes hash values for each position of a range via a given shape.
Definition: kmer_hash.hpp:705
constructible_from
The std::constructible_from concept specifies that a variable of type T can be initialized with the g...
std::rel_ops::operator!=
T operator!=(T... args)
shape.hpp
Provides seqan3::shape.
std::input_iterator_tag
seqan3::views::move
const auto move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
seqan3::to_rank
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:142
cmath
std::tie
T tie(T... args)
same_as
The concept std::same_as<T, U> is satisfied if and only if T and U denote the same type.
const_iterable_range
Specifies requirements of an input range type for which the const version of that type satisfies the ...
std::invalid_argument
seqan3::search_cfg::all
constexpr detail::search_mode_all all
Configuration element to receive all hits within the error bounds.
Definition: mode.hpp:43
seqan3::pack_traits::size
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
std::begin
T begin(T... args)
std
SeqAn specific customisations in the standard namespace.
hash.hpp
Provides overloads for std::hash.
std::end
T end(T... args)
std::conditional_t
semialphabet
The basis for seqan3::alphabet, but requires only rank interface (not char).
seqan3::alphabet_size
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:706
concept.hpp
Core alphabet concept and free function/type trait wrappers.
std::pow
T pow(T... args)