SeqAn3 3.2.0
The Modern C++ library for sequence analysis.
kmer_hash.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
19
20namespace seqan3::detail
21{
22// ---------------------------------------------------------------------------------------------------------------------
23// kmer_hash_view class
24// ---------------------------------------------------------------------------------------------------------------------
25
38template <std::ranges::view urng_t>
39class kmer_hash_view : public std::ranges::view_interface<kmer_hash_view<urng_t>>
40{
41private:
42 static_assert(std::ranges::forward_range<urng_t>, "The kmer_hash_view only works on forward_ranges");
44 "The reference type of the underlying range must model seqan3::semialphabet.");
45
47 urng_t urange;
48
50 shape shape_;
51
52 template <bool const_range>
53 class basic_iterator;
54
55public:
59 kmer_hash_view()
60 requires std::default_initializable<urng_t>
61 = default;
62 kmer_hash_view(kmer_hash_view const & rhs) = default;
63 kmer_hash_view(kmer_hash_view && rhs) = default;
64 kmer_hash_view & operator=(kmer_hash_view const & rhs) = default;
65 kmer_hash_view & operator=(kmer_hash_view && rhs) = default;
66 ~kmer_hash_view() = default;
67
72 kmer_hash_view(urng_t urange_, shape const & s_) : urange{std::move(urange_)}, shape_{s_}
73 {
74 if (shape_.count() > (64 / std::log2(alphabet_size<std::ranges::range_reference_t<urng_t>>)))
75 {
76 throw std::invalid_argument{"The chosen shape/alphabet combination is not valid. "
77 "The alphabet or shape size must be reduced."};
78 }
79 }
80
85 template <typename rng_t>
86 requires (!std::same_as<std::remove_cvref_t<rng_t>, kmer_hash_view>) && std::ranges::viewable_range<rng_t>
87 && std::constructible_from<urng_t, std::ranges::ref_view<std::remove_reference_t<rng_t>>>
88 kmer_hash_view(rng_t && urange_, shape const & s_) :
89 urange{std::views::all(std::forward<rng_t>(urange_))},
90 shape_{s_}
91 {
92 if (shape_.count() > (64 / std::log2(alphabet_size<std::ranges::range_reference_t<urng_t>>)))
93 {
94 throw std::invalid_argument{"The chosen shape/alphabet combination is not valid. "
95 "The alphabet or shape size must be reduced."};
96 }
97 }
99
116 auto begin() noexcept
117 {
118 return basic_iterator<false>{std::ranges::begin(urange), std::ranges::end(urange), shape_};
119 }
120
122 auto begin() const noexcept
123 requires const_iterable_range<urng_t>
124 {
125 return basic_iterator<true>{std::ranges::cbegin(urange), std::ranges::cend(urange), shape_};
126 }
127
143 auto end() noexcept
144 {
145 // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
146 if constexpr (std::ranges::common_range<urng_t>)
147 return basic_iterator<false>{std::ranges::begin(urange), std::ranges::end(urange), shape_, true};
148 else
149 return std::ranges::end(urange);
150 }
151
153 auto end() const noexcept
154 requires const_iterable_range<urng_t>
155 {
156 // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
157 if constexpr (std::ranges::common_range<urng_t const>)
158 return basic_iterator<true>{std::ranges::cbegin(urange), std::ranges::cend(urange), shape_, true};
159 else
160 return std::ranges::cend(urange);
161 }
163
167 auto size()
168 requires std::ranges::sized_range<urng_t>
169 {
170 using size_type = std::ranges::range_size_t<urng_t>;
171 return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
172 }
173
175 auto size() const
176 requires std::ranges::sized_range<urng_t const>
177 {
178 using size_type = std::ranges::range_size_t<urng_t const>;
179 return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
180 }
181};
182
210template <std::ranges::view urng_t>
211template <bool const_range>
212class kmer_hash_view<urng_t>::basic_iterator :
213 public maybe_iterator_category<maybe_const_iterator_t<const_range, urng_t>>
214{
215private:
217 using it_t = maybe_const_iterator_t<const_range, urng_t>;
219 using sentinel_t = maybe_const_sentinel_t<const_range, urng_t>;
220
221 template <bool other_const_range>
222 friend class basic_iterator;
223
224public:
229 using difference_type = typename std::iter_difference_t<it_t>;
231 using value_type = size_t;
233 using pointer = void;
235 using reference = value_type;
239 detail::iterator_concept_tag_t<it_t>>;
241
245 constexpr basic_iterator() = default;
246 constexpr basic_iterator(basic_iterator const &) = default;
247 constexpr basic_iterator(basic_iterator &&) = default;
248 constexpr basic_iterator & operator=(basic_iterator const &) = default;
249 constexpr basic_iterator & operator=(basic_iterator &&) = default;
250 ~basic_iterator() = default;
251
253 constexpr basic_iterator(basic_iterator<!const_range> const & it) noexcept
254 requires const_range
255 :
256 hash_value{std::move(it.hash_value)},
257 roll_factor{std::move(it.roll_factor)},
258 shape_{std::move(it.shape_)},
259 text_left{std::move(it.text_left)},
260 text_right{std::move(it.text_right)}
261 {}
262
274 basic_iterator(it_t it_start, sentinel_t it_end, shape s_) :
275 shape_{s_},
276 text_left{it_start},
277 text_right{std::ranges::next(text_left, shape_.size() - 1, it_end)}
278 {
279 assert(std::ranges::size(shape_) > 0);
280
281 // shape size = 3
282 // Text: 1 2 3 4 5 6 7 8 9
283 // text_left: ^
284 // text_right: ^
285 // distance(text_left, text_right) = 2
286 if (shape_.size() <= std::ranges::distance(text_left, text_right) + 1)
287 {
288 roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
289 hash_full();
290 }
291 }
292
316 basic_iterator(it_t it_start, sentinel_t it_end, shape s_, bool SEQAN3_DOXYGEN_ONLY(is_end)) : shape_{s_}
317 {
318 assert(std::ranges::size(shape_) > 0);
319
320 auto urange_size = std::ranges::distance(it_start, it_end);
321 auto step = (shape_.size() > urange_size + 1) ? 0 : urange_size - shape_.size() + 1;
322 text_left = std::ranges::next(it_start, step, it_end);
323
324 // shape size = 3
325 // Text: 1 2 3 4 5 6 7 8 9
326 // text_left: ^
327 // text_right: ^
328 // distance(text_left, text_right) = 2
329 if (shape_.size() <= std::ranges::distance(text_left, it_end) + 1)
330 {
331 roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
332 hash_full();
333 }
334
335 text_right = it_end;
336 }
338
342
344 friend bool operator==(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
345 {
346 return lhs.text_right == rhs;
347 }
348
350 friend bool operator==(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
351 {
352 return lhs == rhs.text_right;
353 }
354
356 friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
357 {
358 return std::tie(lhs.text_right, lhs.shape_) == std::tie(rhs.text_right, rhs.shape_);
359 }
360
362 friend bool operator!=(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
363 {
364 return !(lhs == rhs);
365 }
366
368 friend bool operator!=(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
369 {
370 return !(lhs == rhs);
371 }
372
374 friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
375 {
376 return !(lhs == rhs);
377 }
378
380 friend bool operator<(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
381 {
382 return (lhs.shape_ <= rhs.shape_) && (lhs.text_right < rhs.text_right);
383 }
384
386 friend bool operator>(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
387 {
388 return (lhs.shape_ >= rhs.shape_) && (lhs.text_right > rhs.text_right);
389 }
390
392 friend bool operator<=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
393 {
394 return (lhs.shape_ <= rhs.shape_) && (lhs.text_right <= rhs.text_right);
395 }
396
398 friend bool operator>=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
399 {
400 return (lhs.shape_ >= rhs.shape_) && (lhs.text_right >= rhs.text_right);
401 }
402
404
406 basic_iterator & operator++() noexcept
407 {
408 hash_forward();
409 return *this;
410 }
411
413 basic_iterator operator++(int) noexcept
414 {
415 basic_iterator tmp{*this};
416 hash_forward();
417 return tmp;
418 }
419
423 basic_iterator & operator--() noexcept
424 requires std::bidirectional_iterator<it_t>
425 {
426 hash_backward();
427 return *this;
428 }
429
433 basic_iterator operator--(int) noexcept
434 requires std::bidirectional_iterator<it_t>
435 {
436 basic_iterator tmp{*this};
437 hash_backward();
438 return tmp;
439 }
440
444 basic_iterator & operator+=(difference_type const skip) noexcept
445 requires std::random_access_iterator<it_t>
446 {
447 hash_forward(skip);
448 return *this;
449 }
450
454 basic_iterator operator+(difference_type const skip) const noexcept
455 requires std::random_access_iterator<it_t>
456 {
457 basic_iterator tmp{*this};
458 return tmp += skip;
459 }
460
464 friend basic_iterator operator+(difference_type const skip, basic_iterator const & it) noexcept
465 requires std::random_access_iterator<it_t>
466 {
467 return it + skip;
468 }
469
473 basic_iterator & operator-=(difference_type const skip) noexcept
474 requires std::random_access_iterator<it_t>
475 {
476 hash_backward(skip);
477 return *this;
478 }
479
484 basic_iterator operator-(difference_type const skip) const noexcept
485 requires std::random_access_iterator<it_t>
486 {
487 basic_iterator tmp{*this};
488 return tmp -= skip;
489 }
490
494 friend basic_iterator operator-(difference_type const skip, basic_iterator const & it) noexcept
495 requires std::random_access_iterator<it_t>
496 {
497 return it - skip;
498 }
499
504 friend difference_type operator-(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
505 requires std::sized_sentinel_for<it_t, it_t>
506 {
507 return static_cast<difference_type>(lhs.text_right - rhs.text_right);
508 }
509
513 friend difference_type operator-(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
514 requires std::sized_sentinel_for<sentinel_t, it_t>
515 {
516 return static_cast<difference_type>(lhs - rhs.text_right);
517 }
518
522 friend difference_type operator-(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
523 requires std::sized_sentinel_for<it_t, sentinel_t>
524 {
525 return static_cast<difference_type>(lhs.text_right - rhs);
526 }
527
531 reference operator[](difference_type const n) const
532 requires std::random_access_iterator<it_t>
533 {
534 return *(*this + n);
535 }
536
538 value_type operator*() const noexcept
539 {
540 return hash_value + to_rank(*text_right);
541 }
542
543private:
545 using alphabet_t = std::iter_value_t<it_t>;
546
548 static constexpr auto const sigma{alphabet_size<alphabet_t>};
549
551 size_t hash_value{0};
552
554 size_t roll_factor{0};
555
557 shape shape_;
558
560 it_t text_left;
561
563 it_t text_right;
564
566 void hash_forward()
567 {
568 if (shape_.all())
569 {
570 hash_roll_forward();
571 }
572 else
573 {
574 std::ranges::advance(text_left, 1);
575 hash_full();
576 }
577 }
578
583 void hash_forward(difference_type const skip)
584 requires std::random_access_iterator<it_t>
585 {
586 std::ranges::advance(text_left, skip);
587 hash_full();
588 }
589
593 void hash_backward()
594 requires std::bidirectional_iterator<it_t>
595 {
596 if (shape_.all())
597 {
598 hash_roll_backward();
599 }
600 else
601 {
602 std::ranges::advance(text_left, -1);
603 hash_full();
604 }
605 }
606
611 void hash_backward(difference_type const skip)
612 {
613 std::ranges::advance(text_left, -skip);
614 hash_full();
615 }
616
618 void hash_full()
619 {
620 text_right = text_left;
621 hash_value = 0;
622
623 for (size_t i{0}; i < shape_.size() - 1u; ++i)
624 {
625 hash_value += shape_[i] * to_rank(*text_right);
626 hash_value *= shape_[i] ? sigma : 1;
627 std::ranges::advance(text_right, 1);
628 }
629 }
630
632 void hash_roll_forward()
633 {
634 hash_value -= to_rank(*(text_left)) * roll_factor;
635 hash_value += to_rank(*(text_right));
636 hash_value *= sigma;
637
638 std::ranges::advance(text_left, 1);
639 std::ranges::advance(text_right, 1);
640 }
641
645 void hash_roll_backward()
646 requires std::bidirectional_iterator<it_t>
647 {
648 std::ranges::advance(text_left, -1);
649 std::ranges::advance(text_right, -1);
650
651 hash_value /= sigma;
652 hash_value -= to_rank(*(text_right));
653 hash_value += to_rank(*(text_left)) * roll_factor;
654 }
655};
656
658template <std::ranges::viewable_range rng_t>
659kmer_hash_view(rng_t &&, shape const & shape_) -> kmer_hash_view<std::views::all_t<rng_t>>;
660
661// ---------------------------------------------------------------------------------------------------------------------
662// kmer_hash_fn (adaptor definition)
663// ---------------------------------------------------------------------------------------------------------------------
664
668struct kmer_hash_fn
669{
671 constexpr auto operator()(shape const & shape_) const
672 {
673 return adaptor_from_functor{*this, shape_};
674 }
675
683 template <std::ranges::range urng_t>
684 constexpr auto operator()(urng_t && urange, shape const & shape_) const
685 {
686 static_assert(std::ranges::viewable_range<urng_t>,
687 "The range parameter to views::kmer_hash cannot be a temporary of a non-view range.");
688 static_assert(std::ranges::forward_range<urng_t>,
689 "The range parameter to views::kmer_hash must model std::ranges::forward_range.");
691 "The range parameter to views::kmer_hash must be over elements of seqan3::semialphabet.");
692
693 return kmer_hash_view{std::forward<urng_t>(urange), shape_};
694 }
695};
697
698} // namespace seqan3::detail
699
700namespace seqan3::views
701{
750inline constexpr auto kmer_hash = detail::kmer_hash_fn{};
751
752} // namespace seqan3::views
Core alphabet concept and free function/type trait wrappers.
T begin(T... args)
T end(T... args)
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:849
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
constexpr auto kmer_hash
Computes hash values for each position of a range via a given shape.
Definition: kmer_hash.hpp:750
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:146
base_t pow(base_t base, exp_t exp)
Computes the value of base raised to the power exp.
Definition: math.hpp:122
Specifies requirements of an input range type for which the const version of that type satisfies the ...
The basis for seqan3::alphabet, but requires only rank interface (not char).
T log2(T... args)
Provides math related functionality.
The SeqAn namespace for views.
Definition: char_strictly_to.hpp:22
SeqAn specific customisations in the standard namespace.
T next(T... args)
T operator!=(T... args)
Provides overloads for std::hash.
Provides seqan3::shape.
T tie(T... args)