SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
kmer_hash.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
16
17namespace seqan3::detail
18{
19// ---------------------------------------------------------------------------------------------------------------------
20// kmer_hash_view class
21// ---------------------------------------------------------------------------------------------------------------------
22
35template <std::ranges::view urng_t>
36class kmer_hash_view : public std::ranges::view_interface<kmer_hash_view<urng_t>>
37{
38private:
39 static_assert(std::ranges::forward_range<urng_t>, "The kmer_hash_view only works on forward_ranges");
41 "The reference type of the underlying range must model seqan3::semialphabet.");
42
44 urng_t urange;
45
48
49 template <bool const_range>
50 class basic_iterator;
51
53 static inline int max_shape_count = 64 / std::log2(alphabet_size<std::ranges::range_reference_t<urng_t>>);
54
56 [[noreturn]] void shape_too_long_error() const
57 {
58 std::string message{"The shape is too long for the given alphabet.\n"};
59 message += "Alphabet: ";
60 // Note: Since we want the alphabet type name, std::ranges::range_value_t is the better choice.
61 // For seqan3::bitpacked_sequence<seqan3::dna4>:
62 // reference_t: seqan3::bitpacked_sequence<seqan3::dna4>::reference_proxy_type
63 // value_t: seqan3::dna4
64 message += detail::type_name_as_string<std::remove_cvref_t<std::ranges::range_value_t<urng_t>>>;
65 message += "\nMaximum shape count: ";
67 message += "\nGiven shape count: ";
68 message += std::to_string(shape_.count());
69 throw std::invalid_argument{message};
70 }
71
73 inline void validate_shape() const
74 {
77 }
78
79public:
84 requires std::default_initializable<urng_t>
85 = default;
86 kmer_hash_view(kmer_hash_view const & rhs) = default;
87 kmer_hash_view(kmer_hash_view && rhs) = default;
88 kmer_hash_view & operator=(kmer_hash_view const & rhs) = default;
89 kmer_hash_view & operator=(kmer_hash_view && rhs) = default;
90 ~kmer_hash_view() = default;
91
96 explicit kmer_hash_view(urng_t urange_, shape const & s_) : urange{std::move(urange_)}, shape_{s_}
97 {
99 }
100
105 template <typename rng_t>
106 requires (!std::same_as<std::remove_cvref_t<rng_t>, kmer_hash_view>) && std::ranges::viewable_range<rng_t>
107 && std::constructible_from<urng_t, std::ranges::ref_view<std::remove_reference_t<rng_t>>>
108 explicit kmer_hash_view(rng_t && urange_, shape const & s_) :
109 urange{std::views::all(std::forward<rng_t>(urange_))},
110 shape_{s_}
111 {
113 }
115
132 auto begin() noexcept
133 {
134 return basic_iterator<false>{std::ranges::begin(urange), std::ranges::end(urange), shape_};
135 }
136
138 auto begin() const noexcept
139 requires const_iterable_range<urng_t>
140 {
141 return basic_iterator<true>{std::ranges::begin(urange), std::ranges::end(urange), shape_};
142 }
143
159 auto end() noexcept
160 {
161 // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
162 if constexpr (std::ranges::common_range<urng_t>)
163 return basic_iterator<false>{std::ranges::begin(urange), std::ranges::end(urange), shape_, true};
164 else
165 return std::ranges::end(urange);
166 }
167
169 auto end() const noexcept
170 requires const_iterable_range<urng_t>
171 {
172 // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
173 if constexpr (std::ranges::common_range<urng_t const>)
174 return basic_iterator<true>{std::ranges::begin(urange), std::ranges::end(urange), shape_, true};
175 else
176 return std::ranges::cend(urange);
177 }
179
183 auto size()
184 requires std::ranges::sized_range<urng_t>
185 {
186 using size_type = std::ranges::range_size_t<urng_t>;
187 return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
188 }
189
191 auto size() const
192 requires std::ranges::sized_range<urng_t const>
193 {
194 using size_type = std::ranges::range_size_t<urng_t const>;
195 return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
196 }
197};
198
226template <std::ranges::view urng_t>
227template <bool const_range>
229 public maybe_iterator_category<maybe_const_iterator_t<const_range, urng_t>>
230{
231private:
236
237 template <bool other_const_range>
238 friend class basic_iterator;
239
240public:
247 using value_type = size_t;
249 using pointer = void;
257
261 constexpr basic_iterator() = default;
262 constexpr basic_iterator(basic_iterator const &) = default;
263 constexpr basic_iterator(basic_iterator &&) = default;
264 constexpr basic_iterator & operator=(basic_iterator const &) = default;
265 constexpr basic_iterator & operator=(basic_iterator &&) = default;
266 ~basic_iterator() = default;
267
269 constexpr basic_iterator(basic_iterator<!const_range> const & it) noexcept
270 requires const_range
271 :
272 hash_value{std::move(it.hash_value)},
273 roll_factor{std::move(it.roll_factor)},
274 shape_{std::move(it.shape_)},
275 text_left{std::move(it.text_left)},
276 text_right{std::move(it.text_right)}
277 {}
278
290 basic_iterator(it_t it_start, sentinel_t it_end, shape s_) :
291 shape_{s_},
292 text_left{it_start},
293 text_right{std::ranges::next(text_left, shape_.size() - 1, it_end)}
294 {
295 assert(std::ranges::size(shape_) > 0);
296
297 // shape size = 3
298 // Text: 1 2 3 4 5 6 7 8 9
299 // text_left: ^
300 // text_right: ^
301 // distance(text_left, text_right) = 2
302 if (shape_.size() <= std::ranges::distance(text_left, text_right) + 1)
303 {
304 roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
305 hash_full();
306 }
307 }
308
332 basic_iterator(it_t it_start, sentinel_t it_end, shape s_, bool SEQAN3_DOXYGEN_ONLY(is_end)) : shape_{s_}
333 {
334 assert(std::ranges::size(shape_) > 0);
335
336 auto urange_size = std::ranges::distance(it_start, it_end);
337 auto step = (shape_.size() > urange_size + 1) ? 0 : urange_size - shape_.size() + 1;
338 text_left = std::ranges::next(it_start, step, it_end);
339
340 // shape size = 3
341 // Text: 1 2 3 4 5 6 7 8 9
342 // text_left: ^
343 // text_right: ^
344 // distance(text_left, text_right) = 2
345 if (shape_.size() <= std::ranges::distance(text_left, it_end) + 1)
346 {
347 roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
348 hash_full();
349 }
350
351 text_right = it_end;
352 }
354
358
360 friend bool operator==(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
361 {
362 return lhs.text_right == rhs;
363 }
364
366 friend bool operator==(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
367 {
368 return lhs == rhs.text_right;
369 }
370
372 friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
373 {
374 return std::tie(lhs.text_right, lhs.shape_) == std::tie(rhs.text_right, rhs.shape_);
375 }
376
378 friend bool operator!=(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
379 {
380 return !(lhs == rhs);
381 }
382
384 friend bool operator!=(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
385 {
386 return !(lhs == rhs);
387 }
388
390 friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
391 {
392 return !(lhs == rhs);
393 }
394
396 friend bool operator<(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
397 {
398 return (lhs.shape_ <= rhs.shape_) && (lhs.text_right < rhs.text_right);
399 }
400
402 friend bool operator>(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
403 {
404 return (lhs.shape_ >= rhs.shape_) && (lhs.text_right > rhs.text_right);
405 }
406
408 friend bool operator<=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
409 {
410 return (lhs.shape_ <= rhs.shape_) && (lhs.text_right <= rhs.text_right);
411 }
412
414 friend bool operator>=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
415 {
416 return (lhs.shape_ >= rhs.shape_) && (lhs.text_right >= rhs.text_right);
417 }
418
420
423 {
424 hash_forward();
425 return *this;
426 }
427
430 {
431 basic_iterator tmp{*this};
432 hash_forward();
433 return tmp;
434 }
435
440 requires std::bidirectional_iterator<it_t>
441 {
442 hash_backward();
443 return *this;
444 }
445
450 requires std::bidirectional_iterator<it_t>
451 {
452 basic_iterator tmp{*this};
453 hash_backward();
454 return tmp;
455 }
456
461 requires std::random_access_iterator<it_t>
462 {
463 hash_forward(skip);
464 return *this;
465 }
466
470 basic_iterator operator+(difference_type const skip) const noexcept
471 requires std::random_access_iterator<it_t>
472 {
473 basic_iterator tmp{*this};
474 return tmp += skip;
475 }
476
480 friend basic_iterator operator+(difference_type const skip, basic_iterator const & it) noexcept
481 requires std::random_access_iterator<it_t>
482 {
483 return it + skip;
484 }
485
490 requires std::random_access_iterator<it_t>
491 {
492 hash_backward(skip);
493 return *this;
494 }
495
500 basic_iterator operator-(difference_type const skip) const noexcept
501 requires std::random_access_iterator<it_t>
502 {
503 basic_iterator tmp{*this};
504 return tmp -= skip;
505 }
506
510 friend basic_iterator operator-(difference_type const skip, basic_iterator const & it) noexcept
511 requires std::random_access_iterator<it_t>
512 {
513 return it - skip;
514 }
515
520 friend difference_type operator-(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
521 requires std::sized_sentinel_for<it_t, it_t>
522 {
523 return static_cast<difference_type>(lhs.text_right - rhs.text_right);
524 }
525
529 friend difference_type operator-(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
530 requires std::sized_sentinel_for<sentinel_t, it_t>
531 {
532 return static_cast<difference_type>(lhs - rhs.text_right);
533 }
534
538 friend difference_type operator-(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
539 requires std::sized_sentinel_for<it_t, sentinel_t>
540 {
541 return static_cast<difference_type>(lhs.text_right - rhs);
542 }
543
548 requires std::random_access_iterator<it_t>
549 {
550 return *(*this + n);
551 }
552
554 value_type operator*() const noexcept
555 {
556 return hash_value + to_rank(*text_right);
557 }
558
559private:
562
564 static constexpr auto const sigma{alphabet_size<alphabet_t>};
565
567 size_t hash_value{0};
568
570 size_t roll_factor{0};
571
574
577
580
583 {
584 if (shape_.all())
585 {
586 hash_roll_forward();
587 }
588 else
589 {
590 std::ranges::advance(text_left, 1);
591 hash_full();
592 }
593 }
594
600 requires std::random_access_iterator<it_t>
601 {
602 std::ranges::advance(text_left, skip);
603 hash_full();
604 }
605
610 requires std::bidirectional_iterator<it_t>
611 {
612 if (shape_.all())
613 {
614 hash_roll_backward();
615 }
616 else
617 {
618 std::ranges::advance(text_left, -1);
619 hash_full();
620 }
621 }
622
628 {
629 std::ranges::advance(text_left, -skip);
630 hash_full();
631 }
632
635 {
636 text_right = text_left;
637 hash_value = 0;
638
639 for (size_t i{0}; i < shape_.size() - 1u; ++i)
640 {
641 hash_value += shape_[i] * to_rank(*text_right);
642 hash_value *= shape_[i] ? sigma : 1;
643 std::ranges::advance(text_right, 1);
644 }
645 }
646
649 {
650 hash_value -= to_rank(*(text_left)) * roll_factor;
651 hash_value += to_rank(*(text_right));
652 hash_value *= sigma;
653
654 std::ranges::advance(text_left, 1);
655 std::ranges::advance(text_right, 1);
656 }
657
662 requires std::bidirectional_iterator<it_t>
663 {
664 std::ranges::advance(text_left, -1);
665 std::ranges::advance(text_right, -1);
666
667 hash_value /= sigma;
668 hash_value -= to_rank(*(text_right));
669 hash_value += to_rank(*(text_left)) * roll_factor;
670 }
671};
672
674template <std::ranges::viewable_range rng_t>
676
677// ---------------------------------------------------------------------------------------------------------------------
678// kmer_hash_fn (adaptor definition)
679// ---------------------------------------------------------------------------------------------------------------------
680
685{
687 constexpr auto operator()(shape const & shape_) const
688 {
689 return adaptor_from_functor{*this, shape_};
690 }
691
699 template <std::ranges::range urng_t>
700 constexpr auto operator()(urng_t && urange, shape const & shape_) const
701 {
702 static_assert(std::ranges::viewable_range<urng_t>,
703 "The range parameter to views::kmer_hash cannot be a temporary of a non-view range.");
704 static_assert(std::ranges::forward_range<urng_t>,
705 "The range parameter to views::kmer_hash must model std::ranges::forward_range.");
707 "The range parameter to views::kmer_hash must be over elements of seqan3::semialphabet.");
708
709 return kmer_hash_view{std::forward<urng_t>(urange), shape_};
710 }
711};
713
714} // namespace seqan3::detail
715
716namespace seqan3::views
717{
766inline constexpr auto kmer_hash = detail::kmer_hash_fn{};
767
768} // namespace seqan3::views
Core alphabet concept and free function/type trait wrappers.
T begin(T... args)
Template for range adaptor closure objects that store arguments and wrap a proto-adaptor.
Definition adaptor_from_functor.hpp:54
Iterator for calculating hash values via a given seqan3::shape.
Definition kmer_hash.hpp:230
friend bool operator<(basic_iterator const &lhs, basic_iterator const &rhs) noexcept
Compare to another basic_iterator.
Definition kmer_hash.hpp:396
basic_iterator operator-(difference_type const skip) const noexcept
Return decremented copy of this iterator.
Definition kmer_hash.hpp:500
basic_iterator & operator-=(difference_type const skip) noexcept
Decrement iterator by skip.
Definition kmer_hash.hpp:489
constexpr basic_iterator(basic_iterator const &)=default
Defaulted.
size_t value_type
Value type of this iterator.
Definition kmer_hash.hpp:247
void hash_backward()
Decrements iterator by 1.
Definition kmer_hash.hpp:609
friend bool operator==(sentinel_t const &lhs, basic_iterator const &rhs) noexcept
Compare to iterator on text.
Definition kmer_hash.hpp:366
basic_iterator & operator++() noexcept
Pre-increment.
Definition kmer_hash.hpp:422
shape shape_
The shape to use.
Definition kmer_hash.hpp:573
basic_iterator(it_t it_start, sentinel_t it_end, shape s_, bool is_end)
Construct from a given iterator on the text and a seqan3::shape. /param[in] it_start Iterator pointin...
Definition kmer_hash.hpp:332
friend bool operator>=(basic_iterator const &lhs, basic_iterator const &rhs) noexcept
Compare to another basic_iterator.
Definition kmer_hash.hpp:414
basic_iterator(it_t it_start, sentinel_t it_end, shape s_)
Construct from a given iterator on the text and a seqan3::shape. /param[in] it_start Iterator pointin...
Definition kmer_hash.hpp:290
constexpr basic_iterator(basic_iterator<!const_range > const &it) noexcept
Allow iterator on a const range to be constructible from an iterator over a non-const range.
Definition kmer_hash.hpp:269
maybe_const_sentinel_t< const_range, urng_t > sentinel_t
The sentinel type of the underlying range.
Definition kmer_hash.hpp:235
friend bool operator<=(basic_iterator const &lhs, basic_iterator const &rhs) noexcept
Compare to another basic_iterator.
Definition kmer_hash.hpp:408
friend bool operator!=(basic_iterator const &lhs, sentinel_t const &rhs) noexcept
Compare to iterator on text.
Definition kmer_hash.hpp:378
friend bool operator==(basic_iterator const &lhs, sentinel_t const &rhs) noexcept
Compare to iterator on text.
Definition kmer_hash.hpp:360
value_type operator*() const noexcept
Return the hash value.
Definition kmer_hash.hpp:554
friend bool operator>(basic_iterator const &lhs, basic_iterator const &rhs) noexcept
Compare to another basic_iterator.
Definition kmer_hash.hpp:402
value_type reference
Reference to value_type.
Definition kmer_hash.hpp:251
friend difference_type operator-(basic_iterator const &lhs, basic_iterator const &rhs) noexcept
Return offset between two iterator's positions.
Definition kmer_hash.hpp:520
void pointer
The pointer type.
Definition kmer_hash.hpp:249
basic_iterator operator--(int) noexcept
Post-decrement.
Definition kmer_hash.hpp:449
reference operator[](difference_type const n) const
Move the iterator by a given offset and return the corresponding hash value.
Definition kmer_hash.hpp:547
void hash_forward()
Increments iterator by 1.
Definition kmer_hash.hpp:582
constexpr basic_iterator(basic_iterator &&)=default
Defaulted.
friend basic_iterator operator-(difference_type const skip, basic_iterator const &it) noexcept
Non-member operator- delegates to non-friend operator-.
Definition kmer_hash.hpp:510
basic_iterator & operator+=(difference_type const skip) noexcept
Forward this iterator.
Definition kmer_hash.hpp:460
it_t text_left
Iterator to the leftmost position of the k-mer.
Definition kmer_hash.hpp:576
friend bool operator!=(sentinel_t const &lhs, basic_iterator const &rhs) noexcept
Compare to iterator on text.
Definition kmer_hash.hpp:384
friend bool operator==(basic_iterator const &lhs, basic_iterator const &rhs) noexcept
Compare to another basic_iterator.
Definition kmer_hash.hpp:372
typename std::iter_difference_t< it_t > difference_type
Type for distances between iterators.
Definition kmer_hash.hpp:245
void hash_backward(difference_type const skip)
Decrements iterator by skip.
Definition kmer_hash.hpp:627
void hash_full()
Calculates a hash value by explicitly looking at each position.
Definition kmer_hash.hpp:634
basic_iterator operator+(difference_type const skip) const noexcept
Forward copy of this iterator.
Definition kmer_hash.hpp:470
friend basic_iterator operator+(difference_type const skip, basic_iterator const &it) noexcept
Non-member operator+ delegates to non-friend operator+.
Definition kmer_hash.hpp:480
void hash_roll_forward()
Calculates the next hash value via rolling hash.
Definition kmer_hash.hpp:648
friend difference_type operator-(basic_iterator const &lhs, sentinel_t const &rhs) noexcept
Return offset this and remote sentinel's position.
Definition kmer_hash.hpp:538
friend difference_type operator-(sentinel_t const &lhs, basic_iterator const &rhs) noexcept
Return offset between remote sentinel's position and this.
Definition kmer_hash.hpp:529
constexpr basic_iterator()=default
Defaulted.
basic_iterator & operator--() noexcept
Pre-decrement.
Definition kmer_hash.hpp:439
it_t text_right
Iterator to the rightmost position of the k-mer.
Definition kmer_hash.hpp:579
constexpr basic_iterator & operator=(basic_iterator &&)=default
Defaulted.
maybe_const_iterator_t< const_range, urng_t > it_t
The iterator type of the underlying range.
Definition kmer_hash.hpp:233
void hash_roll_backward()
Calculates the previous hash value via rolling hash.
Definition kmer_hash.hpp:661
void hash_forward(difference_type const skip)
Increments iterator by skip.
Definition kmer_hash.hpp:599
friend bool operator!=(basic_iterator const &lhs, basic_iterator const &rhs) noexcept
Compare to another basic_iterator.
Definition kmer_hash.hpp:390
basic_iterator operator++(int) noexcept
Post-increment.
Definition kmer_hash.hpp:429
constexpr basic_iterator & operator=(basic_iterator const &)=default
Defaulted.
The type returned by seqan3::views::kmer_hash.
Definition kmer_hash.hpp:37
auto end() const noexcept
Returns an iterator to the element following the last element of the range.
Definition kmer_hash.hpp:169
auto end() noexcept
Returns an iterator to the element following the last element of the range.
Definition kmer_hash.hpp:159
auto begin() noexcept
Returns an iterator to the first element of the range.
Definition kmer_hash.hpp:132
void shape_too_long_error() const
Throws the exception for validate_shape().
Definition kmer_hash.hpp:56
urng_t urange
The underlying range.
Definition kmer_hash.hpp:44
auto begin() const noexcept
Returns an iterator to the first element of the range.
Definition kmer_hash.hpp:138
kmer_hash_view()=default
Defaulted.
shape shape_
The shape to use.
Definition kmer_hash.hpp:47
void validate_shape() const
Checks that the shape is not too long for the given alphabet.
Definition kmer_hash.hpp:73
kmer_hash_view(rng_t &&urange_, shape const &s_)
Construct from a non-view that can be view-wrapped and a given shape.
Definition kmer_hash.hpp:108
auto size() const
Returns the size of the range, if the underlying range is a std::ranges::sized_range.
Definition kmer_hash.hpp:191
auto size()
Returns the size of the range, if the underlying range is a std::ranges::sized_range.
Definition kmer_hash.hpp:183
static int max_shape_count
The maximum shape count for the given alphabet.
Definition kmer_hash.hpp:53
constexpr size_type size() const noexcept
Returns the number of elements in the container, i.e. std::distance(begin(), end()).
Definition dynamic_bitset.hpp:1230
constexpr size_type count() const noexcept
Returns the number of set bits.
Definition dynamic_bitset.hpp:1034
constexpr bool all() const noexcept
Checks if all bit are set.
Definition dynamic_bitset.hpp:1005
A class that defines which positions of a pattern to hash.
Definition shape.hpp:57
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition alphabet/concept.hpp:846
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition alphabet/concept.hpp:152
std::ranges::sentinel_t< maybe_const_range_t< const_v, range_t > > maybe_const_sentinel_t
Returns the const sentinel of range_t if const_range is true; otherwise the non-const sentinel.
Definition core/range/type_traits.hpp:46
std::ranges::iterator_t< maybe_const_range_t< const_range, range_t > > maybe_const_iterator_t
Returns the const iterator of range_t if const_range is true; otherwise the non-const iterator.
Definition core/range/type_traits.hpp:41
constexpr auto kmer_hash
Computes hash values for each position of a range via a given shape.
Definition kmer_hash.hpp:766
base_t pow(base_t base, exp_t exp)
Computes the value of base raised to the power exp.
Definition math.hpp:119
Specifies requirements of an input range type for which the const version of that type satisfies the ...
The basis for seqan3::alphabet, but requires only rank interface (not char).
T log2(T... args)
Provides math related functionality.
The internal SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
The SeqAn namespace for views.
Definition char_strictly_to.hpp:19
SeqAn specific customisations in the standard namespace.
Provides overloads for std::hash.
Provides seqan3::shape.
views::kmer_hash's range adaptor object type (non-closure).
Definition kmer_hash.hpp:685
constexpr auto operator()(shape const &shape_) const
Store the shape and return a range adaptor closure object.
Definition kmer_hash.hpp:687
constexpr auto operator()(urng_t &&urange, shape const &shape_) const
Call the view's constructor with the underlying view and a seqan3::shape as argument.
Definition kmer_hash.hpp:700
Defines iterator_category member if underlying_iterator_t has a valid std::iterator_traits::iterator_...
Definition iterator_traits.hpp:39
T tie(T... args)
T to_string(T... args)
Hide me