SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
kmer_hash.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
16
17namespace seqan3::detail
18{
19// ---------------------------------------------------------------------------------------------------------------------
20// kmer_hash_view class
21// ---------------------------------------------------------------------------------------------------------------------
22
35template <std::ranges::view urng_t>
36class kmer_hash_view : public std::ranges::view_interface<kmer_hash_view<urng_t>>
37{
38private:
39 static_assert(std::ranges::forward_range<urng_t>, "The kmer_hash_view only works on forward_ranges");
41 "The reference type of the underlying range must model seqan3::semialphabet.");
42
44 urng_t urange;
45
47 shape shape_;
48
49 template <bool const_range>
50 class basic_iterator;
51
53 static inline int max_shape_count = 64 / std::log2(alphabet_size<std::ranges::range_reference_t<urng_t>>);
54
56 [[noreturn]] void shape_too_long_error() const
57 {
58 std::string message{"The shape is too long for the given alphabet.\n"};
59 message += "Alphabet: ";
60 // Note: Since we want the alphabet type name, std::ranges::range_value_t is the better choice.
61 // For seqan3::bitpacked_sequence<seqan3::dna4>:
62 // reference_t: seqan3::bitpacked_sequence<seqan3::dna4>::reference_proxy_type
63 // value_t: seqan3::dna4
64 message += detail::type_name_as_string<std::remove_cvref_t<std::ranges::range_value_t<urng_t>>>;
65 message += "\nMaximum shape count: ";
66 message += std::to_string(max_shape_count);
67 message += "\nGiven shape count: ";
68 message += std::to_string(shape_.count());
69 throw std::invalid_argument{message};
70 }
71
73 inline void validate_shape() const
74 {
75 if (shape_.count() > max_shape_count)
76 shape_too_long_error();
77 }
78
79public:
83 kmer_hash_view()
84 requires std::default_initializable<urng_t>
85 = default;
86 kmer_hash_view(kmer_hash_view const & rhs) = default;
87 kmer_hash_view(kmer_hash_view && rhs) = default;
88 kmer_hash_view & operator=(kmer_hash_view const & rhs) = default;
89 kmer_hash_view & operator=(kmer_hash_view && rhs) = default;
90 ~kmer_hash_view() = default;
91
96 explicit kmer_hash_view(urng_t urange_, shape const & s_) : urange{std::move(urange_)}, shape_{s_}
97 {
98 validate_shape();
99 }
100
105 template <typename rng_t>
106 requires (!std::same_as<std::remove_cvref_t<rng_t>, kmer_hash_view>) && std::ranges::viewable_range<rng_t>
107 && std::constructible_from<urng_t, std::ranges::ref_view<std::remove_reference_t<rng_t>>>
108 explicit kmer_hash_view(rng_t && urange_, shape const & s_) :
109 urange{std::views::all(std::forward<rng_t>(urange_))},
110 shape_{s_}
111 {
112 validate_shape();
113 }
115
132 auto begin() noexcept
133 {
134 return basic_iterator<false>{std::ranges::begin(urange), std::ranges::end(urange), shape_};
135 }
136
138 auto begin() const noexcept
139 requires const_iterable_range<urng_t>
140 {
141 return basic_iterator<true>{std::ranges::begin(urange), std::ranges::end(urange), shape_};
142 }
143
159 auto end() noexcept
160 {
161 // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
162 if constexpr (std::ranges::common_range<urng_t>)
163 return basic_iterator<false>{std::ranges::begin(urange), std::ranges::end(urange), shape_, true};
164 else
165 return std::ranges::end(urange);
166 }
167
169 auto end() const noexcept
170 requires const_iterable_range<urng_t>
171 {
172 // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
173 if constexpr (std::ranges::common_range<urng_t const>)
174 return basic_iterator<true>{std::ranges::begin(urange), std::ranges::end(urange), shape_, true};
175 else
176 return std::ranges::cend(urange);
177 }
179
183 auto size()
184 requires std::ranges::sized_range<urng_t>
185 {
186 using size_type = std::ranges::range_size_t<urng_t>;
187 return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
188 }
189
191 auto size() const
192 requires std::ranges::sized_range<urng_t const>
193 {
194 using size_type = std::ranges::range_size_t<urng_t const>;
195 return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
196 }
197};
198
226template <std::ranges::view urng_t>
227template <bool const_range>
228class kmer_hash_view<urng_t>::basic_iterator :
229 public maybe_iterator_category<maybe_const_iterator_t<const_range, urng_t>>
230{
231private:
233 using it_t = maybe_const_iterator_t<const_range, urng_t>;
235 using sentinel_t = maybe_const_sentinel_t<const_range, urng_t>;
236
237 template <bool other_const_range>
238 friend class basic_iterator;
239
240public:
245 using difference_type = typename std::iter_difference_t<it_t>;
247 using value_type = size_t;
249 using pointer = void;
251 using reference = value_type;
255 detail::iterator_concept_tag_t<it_t>>;
257
261 constexpr basic_iterator() = default;
262 constexpr basic_iterator(basic_iterator const &) = default;
263 constexpr basic_iterator(basic_iterator &&) = default;
264 constexpr basic_iterator & operator=(basic_iterator const &) = default;
265 constexpr basic_iterator & operator=(basic_iterator &&) = default;
266 ~basic_iterator() = default;
267
269 constexpr basic_iterator(basic_iterator<!const_range> const & it) noexcept
270 requires const_range
271 :
272 hash_value{std::move(it.hash_value)},
273 roll_factor{std::move(it.roll_factor)},
274 shape_{std::move(it.shape_)},
275 text_left{std::move(it.text_left)},
276 text_right{std::move(it.text_right)}
277 {}
278
290 basic_iterator(it_t it_start, sentinel_t it_end, shape s_) :
291 shape_{s_},
292 text_left{it_start},
293 text_right{std::ranges::next(text_left, shape_.size() - 1, it_end)}
294 {
295 assert(std::ranges::size(shape_) > 0);
296
297 // shape size = 3
298 // Text: 1 2 3 4 5 6 7 8 9
299 // text_left: ^
300 // text_right: ^
301 // distance(text_left, text_right) = 2
302 if (shape_.size() <= std::ranges::distance(text_left, text_right) + 1)
303 {
304 roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
305 hash_full();
306 }
307 }
308
332 basic_iterator(it_t it_start, sentinel_t it_end, shape s_, bool SEQAN3_DOXYGEN_ONLY(is_end)) : shape_{s_}
333 {
334 assert(std::ranges::size(shape_) > 0);
335
336 auto urange_size = std::ranges::distance(it_start, it_end);
337 auto step = (shape_.size() > urange_size + 1) ? 0 : urange_size - shape_.size() + 1;
338 text_left = std::ranges::next(it_start, step, it_end);
339
340 // shape size = 3
341 // Text: 1 2 3 4 5 6 7 8 9
342 // text_left: ^
343 // text_right: ^
344 // distance(text_left, text_right) = 2
345 if (shape_.size() <= std::ranges::distance(text_left, it_end) + 1)
346 {
347 roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
348 hash_full();
349 }
350
351 text_right = it_end;
352 }
354
358
360 friend bool operator==(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
361 {
362 return lhs.text_right == rhs;
363 }
364
366 friend bool operator==(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
367 {
368 return lhs == rhs.text_right;
369 }
370
372 friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
373 {
374 return std::tie(lhs.text_right, lhs.shape_) == std::tie(rhs.text_right, rhs.shape_);
375 }
376
378 friend bool operator!=(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
379 {
380 return !(lhs == rhs);
381 }
382
384 friend bool operator!=(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
385 {
386 return !(lhs == rhs);
387 }
388
390 friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
391 {
392 return !(lhs == rhs);
393 }
394
396 friend bool operator<(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
397 {
398 return (lhs.shape_ <= rhs.shape_) && (lhs.text_right < rhs.text_right);
399 }
400
402 friend bool operator>(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
403 {
404 return (lhs.shape_ >= rhs.shape_) && (lhs.text_right > rhs.text_right);
405 }
406
408 friend bool operator<=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
409 {
410 return (lhs.shape_ <= rhs.shape_) && (lhs.text_right <= rhs.text_right);
411 }
412
414 friend bool operator>=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
415 {
416 return (lhs.shape_ >= rhs.shape_) && (lhs.text_right >= rhs.text_right);
417 }
418
420
422 basic_iterator & operator++() noexcept
423 {
424 hash_forward();
425 return *this;
426 }
427
429 basic_iterator operator++(int) noexcept
430 {
431 basic_iterator tmp{*this};
432 hash_forward();
433 return tmp;
434 }
435
439 basic_iterator & operator--() noexcept
440 requires std::bidirectional_iterator<it_t>
441 {
442 hash_backward();
443 return *this;
444 }
445
449 basic_iterator operator--(int) noexcept
450 requires std::bidirectional_iterator<it_t>
451 {
452 basic_iterator tmp{*this};
453 hash_backward();
454 return tmp;
455 }
456
460 basic_iterator & operator+=(difference_type const skip) noexcept
461 requires std::random_access_iterator<it_t>
462 {
463 hash_forward(skip);
464 return *this;
465 }
466
470 basic_iterator operator+(difference_type const skip) const noexcept
471 requires std::random_access_iterator<it_t>
472 {
473 basic_iterator tmp{*this};
474 return tmp += skip;
475 }
476
480 friend basic_iterator operator+(difference_type const skip, basic_iterator const & it) noexcept
481 requires std::random_access_iterator<it_t>
482 {
483 return it + skip;
484 }
485
489 basic_iterator & operator-=(difference_type const skip) noexcept
490 requires std::random_access_iterator<it_t>
491 {
492 hash_backward(skip);
493 return *this;
494 }
495
500 basic_iterator operator-(difference_type const skip) const noexcept
501 requires std::random_access_iterator<it_t>
502 {
503 basic_iterator tmp{*this};
504 return tmp -= skip;
505 }
506
510 friend basic_iterator operator-(difference_type const skip, basic_iterator const & it) noexcept
511 requires std::random_access_iterator<it_t>
512 {
513 return it - skip;
514 }
515
520 friend difference_type operator-(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
521 requires std::sized_sentinel_for<it_t, it_t>
522 {
523 return static_cast<difference_type>(lhs.text_right - rhs.text_right);
524 }
525
529 friend difference_type operator-(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
530 requires std::sized_sentinel_for<sentinel_t, it_t>
531 {
532 return static_cast<difference_type>(lhs - rhs.text_right);
533 }
534
538 friend difference_type operator-(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
539 requires std::sized_sentinel_for<it_t, sentinel_t>
540 {
541 return static_cast<difference_type>(lhs.text_right - rhs);
542 }
543
547 reference operator[](difference_type const n) const
548 requires std::random_access_iterator<it_t>
549 {
550 return *(*this + n);
551 }
552
554 value_type operator*() const noexcept
555 {
556 return hash_value + to_rank(*text_right);
557 }
558
559private:
561 using alphabet_t = std::iter_value_t<it_t>;
562
564 static constexpr auto const sigma{alphabet_size<alphabet_t>};
565
567 size_t hash_value{0};
568
570 size_t roll_factor{0};
571
573 shape shape_;
574
576 it_t text_left;
577
579 it_t text_right;
580
582 void hash_forward()
583 {
584 if (shape_.all())
585 {
586 hash_roll_forward();
587 }
588 else
589 {
590 std::ranges::advance(text_left, 1);
591 hash_full();
592 }
593 }
594
599 void hash_forward(difference_type const skip)
600 requires std::random_access_iterator<it_t>
601 {
602 std::ranges::advance(text_left, skip);
603 hash_full();
604 }
605
609 void hash_backward()
610 requires std::bidirectional_iterator<it_t>
611 {
612 if (shape_.all())
613 {
614 hash_roll_backward();
615 }
616 else
617 {
618 std::ranges::advance(text_left, -1);
619 hash_full();
620 }
621 }
622
627 void hash_backward(difference_type const skip)
628 {
629 std::ranges::advance(text_left, -skip);
630 hash_full();
631 }
632
634 void hash_full()
635 {
636 text_right = text_left;
637 hash_value = 0;
638
639 for (size_t i{0}; i < shape_.size() - 1u; ++i)
640 {
641 hash_value += shape_[i] * to_rank(*text_right);
642 hash_value *= shape_[i] ? sigma : 1;
643 std::ranges::advance(text_right, 1);
644 }
645 }
646
648 void hash_roll_forward()
649 {
650 hash_value -= to_rank(*(text_left)) * roll_factor;
651 hash_value += to_rank(*(text_right));
652 hash_value *= sigma;
653
654 std::ranges::advance(text_left, 1);
655 std::ranges::advance(text_right, 1);
656 }
657
661 void hash_roll_backward()
662 requires std::bidirectional_iterator<it_t>
663 {
664 std::ranges::advance(text_left, -1);
665 std::ranges::advance(text_right, -1);
666
667 hash_value /= sigma;
668 hash_value -= to_rank(*(text_right));
669 hash_value += to_rank(*(text_left)) * roll_factor;
670 }
671};
672
674template <std::ranges::viewable_range rng_t>
675kmer_hash_view(rng_t &&, shape const & shape_) -> kmer_hash_view<std::views::all_t<rng_t>>;
676
677// ---------------------------------------------------------------------------------------------------------------------
678// kmer_hash_fn (adaptor definition)
679// ---------------------------------------------------------------------------------------------------------------------
680
684struct kmer_hash_fn
685{
687 constexpr auto operator()(shape const & shape_) const
688 {
689 return adaptor_from_functor{*this, shape_};
690 }
691
699 template <std::ranges::range urng_t>
700 constexpr auto operator()(urng_t && urange, shape const & shape_) const
701 {
702 static_assert(std::ranges::viewable_range<urng_t>,
703 "The range parameter to views::kmer_hash cannot be a temporary of a non-view range.");
704 static_assert(std::ranges::forward_range<urng_t>,
705 "The range parameter to views::kmer_hash must model std::ranges::forward_range.");
707 "The range parameter to views::kmer_hash must be over elements of seqan3::semialphabet.");
708
709 return kmer_hash_view{std::forward<urng_t>(urange), shape_};
710 }
711};
713
714} // namespace seqan3::detail
715
716namespace seqan3::views
717{
766inline constexpr auto kmer_hash = detail::kmer_hash_fn{};
767
768} // namespace seqan3::views
Core alphabet concept and free function/type trait wrappers.
T begin(T... args)
T end(T... args)
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition alphabet/concept.hpp:846
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition alphabet/concept.hpp:152
constexpr auto kmer_hash
Computes hash values for each position of a range via a given shape.
Definition kmer_hash.hpp:766
constexpr size_t size
The size of a type pack.
Definition type_pack/traits.hpp:143
base_t pow(base_t base, exp_t exp)
Computes the value of base raised to the power exp.
Definition math.hpp:119
Specifies requirements of an input range type for which the const version of that type satisfies the ...
The basis for seqan3::alphabet, but requires only rank interface (not char).
T log2(T... args)
Provides math related functionality.
The SeqAn namespace for views.
Definition char_strictly_to.hpp:19
SeqAn specific customisations in the standard namespace.
T next(T... args)
T operator!=(T... args)
Provides overloads for std::hash.
Provides seqan3::shape.
T tie(T... args)
T to_string(T... args)
Hide me