SeqAn3 3.1.0
The Modern C++ library for sequence analysis.
kmer_hash.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
19
20namespace seqan3::detail
21{
22// ---------------------------------------------------------------------------------------------------------------------
23// kmer_hash_view class
24// ---------------------------------------------------------------------------------------------------------------------
25
38template <std::ranges::view urng_t>
39class kmer_hash_view : public std::ranges::view_interface<kmer_hash_view<urng_t>>
40{
41private:
42 static_assert(std::ranges::forward_range<urng_t>, "The kmer_hash_view only works on forward_ranges");
44 "The reference type of the underlying range must model seqan3::semialphabet.");
45
47 urng_t urange;
48
50 shape shape_;
51
52 template <bool const_range>
53 class basic_iterator;
54
55public:
59 kmer_hash_view() = default;
60 kmer_hash_view(kmer_hash_view const & rhs) = default;
61 kmer_hash_view(kmer_hash_view && rhs) = default;
62 kmer_hash_view & operator=(kmer_hash_view const & rhs) = default;
63 kmer_hash_view & operator=(kmer_hash_view && rhs) = default;
64 ~kmer_hash_view() = default;
65
70 kmer_hash_view(urng_t urange_, shape const & s_) : urange{std::move(urange_)}, shape_{s_}
71 {
72 if (shape_.count() > (64 / std::log2(alphabet_size<std::ranges::range_reference_t<urng_t>>)))
73 {
74 throw std::invalid_argument{"The chosen shape/alphabet combination is not valid. "
75 "The alphabet or shape size must be reduced."};
76 }
77 }
78
83 template <typename rng_t>
85 requires (!std::same_as<std::remove_cvref_t<rng_t>, kmer_hash_view>) &&
86 std::ranges::viewable_range<rng_t> &&
87 std::constructible_from<urng_t, std::ranges::ref_view<std::remove_reference_t<rng_t>>>
89 kmer_hash_view(rng_t && urange_, shape const & s_) :
90 urange{std::views::all(std::forward<rng_t>(urange_))}, shape_{s_}
91 {
92 if (shape_.count() > (64 / std::log2(alphabet_size<std::ranges::range_reference_t<urng_t>>)))
93 {
94 throw std::invalid_argument{"The chosen shape/alphabet combination is not valid. "
95 "The alphabet or shape size must be reduced."};
96 }
97 }
99
116 auto begin() noexcept
117 {
118 return basic_iterator<false>{std::ranges::begin(urange), std::ranges::end(urange), shape_};
119 }
120
122 auto begin() const noexcept
124 requires const_iterable_range<urng_t>
126 {
127 return basic_iterator<true>{std::ranges::cbegin(urange), std::ranges::cend(urange), shape_};
128 }
129
145 auto end() noexcept
146 {
147 // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
148 if constexpr (std::ranges::common_range<urng_t>)
149 return basic_iterator<false>{std::ranges::begin(urange), std::ranges::end(urange), shape_, true};
150 else
151 return std::ranges::end(urange);
152 }
153
155 auto end() const noexcept
157 requires const_iterable_range<urng_t>
159 {
160 // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
161 if constexpr (std::ranges::common_range<urng_t const>)
162 return basic_iterator<true>{std::ranges::cbegin(urange), std::ranges::cend(urange), shape_, true};
163 else
164 return std::ranges::cend(urange);
165 }
167
171 auto size()
173 requires std::ranges::sized_range<urng_t>
175 {
176 using size_type = std::ranges::range_size_t<urng_t>;
177 return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
178 }
179
181 auto size() const
183 requires std::ranges::sized_range<urng_t const>
185 {
186 using size_type = std::ranges::range_size_t<urng_t const>;
187 return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
188 }
189};
190
218template <std::ranges::view urng_t>
219template <bool const_range>
220class kmer_hash_view<urng_t>::basic_iterator
221 : public maybe_iterator_category<maybe_const_iterator_t<const_range, urng_t>>
222{
223private:
225 using it_t = maybe_const_iterator_t<const_range, urng_t>;
227 using sentinel_t = maybe_const_sentinel_t<const_range, urng_t>;
228
229 template <bool other_const_range>
230 friend class basic_iterator;
231
232public:
237 using difference_type = typename std::iter_difference_t<it_t>;
239 using value_type = size_t;
241 using pointer = void;
243 using reference = value_type;
247 detail::iterator_concept_tag_t<it_t>>;
249
253 constexpr basic_iterator() = default;
254 constexpr basic_iterator(basic_iterator const &) = default;
255 constexpr basic_iterator(basic_iterator &&) = default;
256 constexpr basic_iterator & operator=(basic_iterator const &) = default;
257 constexpr basic_iterator & operator=(basic_iterator &&) = default;
258 ~basic_iterator() = default;
259
261 constexpr basic_iterator(basic_iterator<!const_range> const & it) noexcept
263 requires const_range
265 : hash_value{std::move(it.hash_value)},
266 roll_factor{std::move(it.roll_factor)},
267 shape_{std::move(it.shape_)},
268 text_left{std::move(it.text_left)},
269 text_right{std::move(it.text_right)}
270 {}
271
283 basic_iterator(it_t it_start, sentinel_t it_end, shape s_) :
284 shape_{s_}, text_left{it_start}, text_right{std::ranges::next(text_left, shape_.size() - 1, it_end)}
285 {
286 assert(std::ranges::size(shape_) > 0);
287
288 // shape size = 3
289 // Text: 1 2 3 4 5 6 7 8 9
290 // text_left: ^
291 // text_right: ^
292 // distance(text_left, text_right) = 2
293 if (shape_.size() <= std::ranges::distance(text_left, text_right) + 1)
294 {
295 roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
296 hash_full();
297 }
298 }
299
323 basic_iterator(it_t it_start, sentinel_t it_end, shape s_, bool SEQAN3_DOXYGEN_ONLY(is_end)) : shape_{s_}
324 {
325 assert(std::ranges::size(shape_) > 0);
326
327 auto urange_size = std::ranges::distance(it_start, it_end);
328 auto step = (shape_.size() > urange_size + 1) ? 0 : urange_size - shape_.size() + 1;
329 text_left = std::ranges::next(it_start, step, it_end);
330
331 // shape size = 3
332 // Text: 1 2 3 4 5 6 7 8 9
333 // text_left: ^
334 // text_right: ^
335 // distance(text_left, text_right) = 2
336 if (shape_.size() <= std::ranges::distance(text_left, it_end) + 1)
337 {
338 roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
339 hash_full();
340 }
341
342 text_right = it_end;
343 }
345
349
351 friend bool operator==(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
352 {
353 return lhs.text_right == rhs;
354 }
355
357 friend bool operator==(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
358 {
359 return lhs == rhs.text_right;
360 }
361
363 friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
364 {
365 return std::tie(lhs.text_right, lhs.shape_) == std::tie(rhs.text_right, rhs.shape_);
366 }
367
369 friend bool operator!=(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
370 {
371 return !(lhs == rhs);
372 }
373
375 friend bool operator!=(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
376 {
377 return !(lhs == rhs);
378 }
379
381 friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
382 {
383 return !(lhs == rhs);
384 }
385
387 friend bool operator<(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
388 {
389 return (lhs.shape_ <= rhs.shape_) && (lhs.text_right < rhs.text_right);
390 }
391
393 friend bool operator>(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
394 {
395 return (lhs.shape_ >= rhs.shape_) && (lhs.text_right > rhs.text_right);
396 }
397
399 friend bool operator<=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
400 {
401 return (lhs.shape_ <= rhs.shape_) && (lhs.text_right <= rhs.text_right);
402 }
403
405 friend bool operator>=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
406 {
407 return (lhs.shape_ >= rhs.shape_) && (lhs.text_right >= rhs.text_right);
408 }
409
411
413 basic_iterator & operator++() noexcept
414 {
415 hash_forward();
416 return *this;
417 }
418
420 basic_iterator operator++(int) noexcept
421 {
422 basic_iterator tmp{*this};
423 hash_forward();
424 return tmp;
425 }
426
430 basic_iterator & operator--() noexcept
432 requires std::bidirectional_iterator<it_t>
434 {
435 hash_backward();
436 return *this;
437 }
438
442 basic_iterator operator--(int) noexcept
444 requires std::bidirectional_iterator<it_t>
446 {
447 basic_iterator tmp{*this};
448 hash_backward();
449 return tmp;
450 }
451
455 basic_iterator & operator+=(difference_type const skip) noexcept
457 requires std::random_access_iterator<it_t>
459 {
460 hash_forward(skip);
461 return *this;
462 }
463
467 basic_iterator operator+(difference_type const skip) const noexcept
469 requires std::random_access_iterator<it_t>
471 {
472 basic_iterator tmp{*this};
473 return tmp += skip;
474 }
475
479 friend basic_iterator operator+(difference_type const skip, basic_iterator const & it) noexcept
481 requires std::random_access_iterator<it_t>
483 {
484 return it + skip;
485 }
486
490 basic_iterator & operator-=(difference_type const skip) noexcept
492 requires std::random_access_iterator<it_t>
494 {
495 hash_backward(skip);
496 return *this;
497 }
498
503 basic_iterator operator-(difference_type const skip) const noexcept
505 requires std::random_access_iterator<it_t>
507 {
508 basic_iterator tmp{*this};
509 return tmp -= skip;
510 }
511
515 friend basic_iterator operator-(difference_type const skip, basic_iterator const & it) noexcept
517 requires std::random_access_iterator<it_t>
519 {
520 return it - skip;
521 }
522
527 friend difference_type operator-(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
529 requires std::random_access_iterator<it_t>
531 {
532 return static_cast<difference_type>(lhs.text_right - rhs.text_right);
533 }
534
538 friend difference_type operator-(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
540 requires std::sized_sentinel_for<sentinel_t, it_t>
542 {
543 return static_cast<difference_type>(lhs - rhs.text_right);
544 }
545
549 friend difference_type operator-(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
551 requires std::sized_sentinel_for<it_t, sentinel_t>
553 {
554 return static_cast<difference_type>(lhs.text_right - rhs);
555 }
556
560 reference operator[](difference_type const n) const
562 requires std::random_access_iterator<it_t>
564 {
565 return *(*this + n);
566 }
567
569 value_type operator*() const noexcept
570 {
571 return hash_value + to_rank(*text_right);
572 }
573
574private:
576 using alphabet_t = std::iter_value_t<it_t>;
577
579 static constexpr auto const sigma{alphabet_size<alphabet_t>};
580
582 size_t hash_value{0};
583
585 size_t roll_factor{0};
586
588 shape shape_;
589
591 it_t text_left;
592
594 it_t text_right;
595
597 void hash_forward()
598 {
599 if (shape_.all())
600 {
601 hash_roll_forward();
602 }
603 else
604 {
605 std::ranges::advance(text_left, 1);
606 hash_full();
607 }
608 }
609
614 void hash_forward(difference_type const skip)
616 requires std::random_access_iterator<it_t>
618 {
619 std::ranges::advance(text_left, skip);
620 hash_full();
621 }
622
626 void hash_backward()
628 requires std::bidirectional_iterator<it_t>
630 {
631 if (shape_.all())
632 {
633 hash_roll_backward();
634 }
635 else
636 {
637 std::ranges::advance(text_left, -1);
638 hash_full();
639 }
640 }
641
646 void hash_backward(difference_type const skip)
647 {
648 std::ranges::advance(text_left, -skip);
649 hash_full();
650 }
651
653 void hash_full()
654 {
655 text_right = text_left;
656 hash_value = 0;
657
658 for (size_t i{0}; i < shape_.size() - 1u; ++i)
659 {
660 hash_value += shape_[i] * to_rank(*text_right);
661 hash_value *= shape_[i] ? sigma : 1;
662 std::ranges::advance(text_right, 1);
663 }
664
665 }
666
668 void hash_roll_forward()
669 {
670 hash_value -= to_rank(*(text_left)) * roll_factor;
671 hash_value += to_rank(*(text_right));
672 hash_value *= sigma;
673
674 std::ranges::advance(text_left, 1);
675 std::ranges::advance(text_right, 1);
676 }
677
681 void hash_roll_backward()
683 requires std::bidirectional_iterator<it_t>
685 {
686 std::ranges::advance(text_left, -1);
687 std::ranges::advance(text_right, -1);
688
689 hash_value /= sigma;
690 hash_value -= to_rank(*(text_right));
691 hash_value += to_rank(*(text_left)) * roll_factor;
692 }
693};
694
696template <std::ranges::viewable_range rng_t>
697kmer_hash_view(rng_t &&, shape const & shape_) -> kmer_hash_view<std::views::all_t<rng_t>>;
698
699// ---------------------------------------------------------------------------------------------------------------------
700// kmer_hash_fn (adaptor definition)
701// ---------------------------------------------------------------------------------------------------------------------
702
706struct kmer_hash_fn
707{
709 constexpr auto operator()(shape const & shape_) const
710 {
711 return adaptor_from_functor{*this, shape_};
712 }
713
721 template <std::ranges::range urng_t>
722 constexpr auto operator()(urng_t && urange, shape const & shape_) const
723 {
724 static_assert(std::ranges::viewable_range<urng_t>,
725 "The range parameter to views::kmer_hash cannot be a temporary of a non-view range.");
726 static_assert(std::ranges::forward_range<urng_t>,
727 "The range parameter to views::kmer_hash must model std::ranges::forward_range.");
729 "The range parameter to views::kmer_hash must be over elements of seqan3::semialphabet.");
730
731 return kmer_hash_view{std::forward<urng_t>(urange), shape_};
732 }
733};
735
736} // namespace seqan3::detail
737
738namespace seqan3::views
739{
785inline constexpr auto kmer_hash = detail::kmer_hash_fn{};
786
787} // namespace seqan3::views
Core alphabet concept and free function/type trait wrappers.
T begin(T... args)
T end(T... args)
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:861
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
constexpr auto kmer_hash
Computes hash values for each position of a range via a given shape.
Definition: kmer_hash.hpp:785
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
base_t pow(base_t base, exp_t exp)
Computes the value of base raised to the power exp.
Definition: math.hpp:124
Specifies requirements of an input range type for which the const version of that type satisfies the ...
The basis for seqan3::alphabet, but requires only rank interface (not char).
Provides math related functionality.
The SeqAn namespace for views.
Definition: char_to.hpp:22
SeqAn specific customisations in the standard namespace.
T next(T... args)
T operator!=(T... args)
Provides overloads for std::hash.
Provides seqan3::shape.
T tie(T... args)