SeqAn3 3.2.0
The Modern C++ library for sequence analysis.
gap_decorator.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
14#pragma once
15
16#include <algorithm>
17#include <limits>
18#include <ranges>
19#include <set>
20#include <tuple>
21#include <type_traits>
22
28
29namespace seqan3
30{
31
77template <std::ranges::viewable_range inner_type>
78 requires std::ranges::random_access_range<inner_type> && std::ranges::sized_range<inner_type>
79 && (std::is_const_v<std::remove_reference_t<inner_type>> || std::ranges::view<inner_type>)
81{
82private:
83 // Declaration of class's iterator types; for the definition see below.
84 template <bool = true>
85 class basic_iterator;
86
88 using iterator = basic_iterator<true>;
91 using const_iterator = iterator;
92
94 using ungapped_view_type = decltype(views::type_reduce(std::declval<inner_type &&>()));
95
96public:
105
111
118
123 using size_type = std::ranges::range_size_t<inner_type>;
124
129 using difference_type = std::ranges::range_difference_t<inner_type>;
131
136 using unaligned_sequence_type = inner_type;
137
148 gap_decorator() = default;
149 gap_decorator(gap_decorator const &) = default;
150 gap_decorator & operator=(gap_decorator const &) = default;
151 gap_decorator(gap_decorator && rhs) = default;
153 ~gap_decorator() = default;
154
159 template <typename other_range_t>
160 requires (!std::same_as<other_range_t, gap_decorator>)
162 && std::ranges::viewable_range<other_range_t> // at end, otherwise it competes with the move ctor
163 gap_decorator(other_range_t && range) : ungapped_view{views::type_reduce(std::forward<inner_type>(range))}
164 {
165 } // TODO (@smehringer) only works for copyable views. Has to be changed once views are not required to be copyable anymore.
166 // !\}
167
182 {
183 if (anchors.size())
184 return anchors.rbegin()->second + ungapped_view.size();
185
186 return ungapped_view.size();
187 }
188
204 iterator insert_gap(const_iterator const it, size_type const count = 1)
205 {
206 if (!count) // [[unlikely]]
207 return it;
208
209 size_type const pos = it - begin();
210 assert(pos <= size());
211
212 set_iterator_type it_set = anchors.upper_bound(anchor_gap_t{pos, bound_dummy});
213
214 if (it_set == anchors.begin()) // will also catch if anchors is empty since begin() == end()
215 {
216 anchors.emplace_hint(anchors.begin(), anchor_gap_t{pos, count});
217 }
218 else // there are gaps before pos
219 {
220 --it_set;
221 auto gap_len{it_set->second};
222 if (it_set != anchors.begin())
223 gap_len -= (*(std::prev(it_set))).second;
224
225 if (it_set->first + gap_len >= pos) // extend existing gap
226 {
227 anchor_gap_t gap{it_set->first, it_set->second + count};
228 it_set = anchors.erase(it_set);
229 anchors.insert(it_set, gap);
230 }
231 else // insert new gap
232 {
233 anchor_gap_t gap{pos, it_set->second + count};
234 ++it_set;
235 anchors.insert(it_set, gap);
236 }
237 }
238
239 // post-processing: reverse update of succeeding gaps
240 rupdate(pos, count);
241 return iterator{*this, pos};
242 }
243
257 iterator erase_gap(const_iterator const it)
258 {
259 // check if [it, it+gap_len[ covers [first, last[
260 if ((*it) != gap{}) // [[unlikely]]
261 throw gap_erase_failure("The range to be erased does not correspond to a consecutive gap.");
262
263 return erase_gap(it, std::next(it));
264 }
265
281 iterator erase_gap(const_iterator const first, const_iterator const last)
282 {
283 size_type const pos1 = first - begin();
284 size_type const pos2 = last - begin();
285 set_iterator_type it = anchors.upper_bound(anchor_gap_t{pos1, bound_dummy}); // first element greater than pos1
286
287 if (it == anchors.begin())
288 throw gap_erase_failure{"There is no gap to erase in range [" + std::to_string(pos1) + ","
289 + std::to_string(pos2) + "]."};
290
291 --it;
292 size_type const gap_len = gap_length(it);
293
294 // check if [it, it+gap_len[ covers [first, last[
295 if ((it->first + gap_len) < pos2) // [[unlikely]]
296 {
297 throw gap_erase_failure{"The range to be erased does not correspond to a consecutive gap."};
298 }
299 // case 1: complete gap is deleted
300 else if (gap_len == pos2 - pos1)
301 {
302 it = anchors.erase(it);
303 }
304 // case 2: gap to be deleted in tail or larger than 1 (equiv. to shift tail left, i.e. pos remains unchanged)
305 else
306 {
307 anchor_gap_t gap{it->first, it->second - pos2 + pos1};
308 it = anchors.erase(it);
309 it = anchors.insert(it, gap); // amortized constant because of hint
310 ++it; // update node after the current
311 }
312
313 // post-processing: forward update of succeeding gaps
314 update(it, pos2 - pos1);
315
316 return iterator{*this, pos1};
317 }
318
326 template <typename unaligned_sequence_t> // generic template to use forwarding reference
327 requires std::assignable_from<gap_decorator &, unaligned_sequence_t>
328 friend void assign_unaligned(gap_decorator & dec, unaligned_sequence_t && unaligned)
329 {
330 dec = unaligned;
331 }
333
352 const_iterator begin() const noexcept
353 {
354 return iterator{*this};
355 }
356
358 const_iterator cbegin() const noexcept
359 {
360 return const_iterator{*this};
361 }
362
378 const_iterator end() const noexcept
379 {
380 return iterator{*this, size()};
381 }
382
384 const_iterator cend() const noexcept
385 {
386 return const_iterator{*this, size()};
387 }
389
408 {
409 if (i >= size()) // [[unlikely]]
410 throw std::out_of_range{"Trying to access element behind the last in gap_decorator."};
411 return (*this)[i];
412 }
413
416 {
417 if (i >= size()) // [[unlikely]]
418 throw std::out_of_range{"Trying to access element behind the last in gap_decorator."};
419 return (*this)[i];
420 }
421
435 {
436 return *iterator{*this, i};
437 }
439
463 friend bool operator==(gap_decorator const & lhs, gap_decorator const & rhs)
464 {
465 if (lhs.size() == rhs.size() && lhs.anchors == rhs.anchors
466 && std::ranges::equal(lhs.ungapped_view, rhs.ungapped_view))
467 {
468 return true;
469 }
470
471 return false;
472 }
473
478 friend bool operator!=(gap_decorator const & lhs, gap_decorator const & rhs)
479 {
480 return !(lhs == rhs);
481 }
482
487 friend bool operator<(gap_decorator const & lhs, gap_decorator const & rhs)
488 {
489 auto lit = lhs.begin();
490 auto rit = rhs.begin();
491
492 while (lit != lhs.end() && rit != rhs.end() && *lit == *rit)
493 ++lit, ++rit;
494
495 if (rit == rhs.end())
496 return false; // lhs == rhs, or rhs prefix of lhs
497 else if (lit == lhs.end())
498 return true; // lhs prefix of rhs
499
500 return *lit < *rit;
501 }
502
507 friend bool operator<=(gap_decorator const & lhs, gap_decorator const & rhs)
508 {
509 auto lit = lhs.begin();
510 auto rit = rhs.begin();
511
512 while (lit != lhs.end() && rit != rhs.end() && *lit == *rit)
513 ++lit, ++rit;
514
515 if (lit == lhs.end())
516 return true; // lhs == rhs, or lhs prefix of rhs
517 else if (rit == rhs.end())
518 return false; // rhs prefix of lhs
519
520 return *lit < *rit;
521 }
522
527 friend bool operator>(gap_decorator const & lhs, gap_decorator const & rhs)
528 {
529 return !(lhs <= rhs);
530 }
531
536 friend bool operator>=(gap_decorator const & lhs, gap_decorator const & rhs)
537 {
538 return !(lhs < rhs);
539 }
541
542private:
544 using anchor_gap_t = typename std::pair<size_t, size_t>;
545
547 using anchor_set_type = std::set<anchor_gap_t>;
548
550 using set_iterator_type = typename anchor_set_type::iterator;
551
553 static constexpr size_t bound_dummy{std::numeric_limits<size_t>::max()};
554
567 size_type gap_length(set_iterator_type it) const
568 {
569 return (it == anchors.begin()) ? it->second : it->second - (*std::prev(it)).second;
570 }
571
584 void rupdate(size_type const pos, size_type const offset)
585 {
586 for (auto it = std::prev(anchors.end(), 1); it->first > pos;)
587 {
588 anchors.emplace_hint(it, anchor_gap_t{it->first + offset, it->second + offset});
589 anchors.erase(*it--);
590 }
591 }
592
605 void update(set_iterator_type it, size_type const offset)
606 {
607 while (it != anchors.end())
608 {
609 anchor_gap_t gap{it->first - offset, it->second - offset};
610 it = anchors.erase(it);
611 it = anchors.insert(it, gap);
612 ++it;
613 }
614 }
615
617 ungapped_view_type ungapped_view{};
618
620 anchor_set_type anchors{};
621};
622
630template <std::ranges::viewable_range urng_t>
631 requires (!std::ranges::view<std::remove_reference_t<urng_t>>)
633
638template <std::ranges::view urng_t>
641
658template <std::ranges::viewable_range inner_type>
659 requires std::ranges::random_access_range<inner_type> && std::ranges::sized_range<inner_type>
660 && (std::is_const_v<std::remove_reference_t<inner_type>> || std::ranges::view<inner_type>)
661template <bool>
663{
664protected:
666 typename std::add_pointer_t<gap_decorator const> host{nullptr};
668 typename gap_decorator::size_type pos{0u};
670 int64_t ungapped_view_pos{0}; // must be signed because we need this value to be -1 in case of leading gaps.
673 typename gap_decorator::size_type left_gap_end{0};
676 typename gap_decorator::set_iterator_type anchor_set_it{};
678 bool is_at_gap{true};
679
681 void jump(typename gap_decorator::size_type const new_pos)
682 {
683 assert(new_pos <= host->size());
684 pos = new_pos;
685
686 anchor_set_it = host->anchors.upper_bound(anchor_gap_t{pos, host->bound_dummy});
687 ungapped_view_pos = pos;
688
689 if (anchor_set_it != host->anchors.begin())
690 {
691 typename gap_decorator::set_iterator_type prev{std::prev(anchor_set_it)};
692 size_type gap_len{prev->second};
693
694 if (prev != host->anchors.begin())
695 gap_len -= std::prev(prev)->second;
696
697 ungapped_view_pos -= prev->second;
698 left_gap_end = prev->first + gap_len;
699 }
700
701 if (ungapped_view_pos != static_cast<int64_t>(host->ungapped_view.size()) && pos >= left_gap_end
702 && (anchor_set_it == host->anchors.end() || pos < anchor_set_it->first))
703 is_at_gap = false;
704 else
705 is_at_gap = true;
706 }
707
708public:
713 using difference_type = typename gap_decorator::difference_type;
715 using value_type = typename gap_decorator::value_type;
717 using reference = typename gap_decorator::const_reference;
719 using pointer = value_type *;
721 using iterator_category = std::bidirectional_iterator_tag;
723
727 basic_iterator() = default;
728 basic_iterator(basic_iterator const &) = default;
729 basic_iterator & operator=(basic_iterator const &) = default;
730 basic_iterator(basic_iterator &&) = default;
731 basic_iterator & operator=(basic_iterator &&) = default;
732 ~basic_iterator() = default;
733
735 explicit basic_iterator(gap_decorator const & host_) : host(&host_), anchor_set_it{host_.anchors.begin()}
736 {
737 if (host_.anchors.size() && (*host_.anchors.begin()).first == 0) // there are gaps at the very front
738 {
739 --ungapped_view_pos; // set ungapped_view_pos to -1 so operator++ works without an extra if-branch.
740 left_gap_end = anchor_set_it->second;
741 ++anchor_set_it;
742 }
743 else
744 {
745 is_at_gap = false;
746 }
747 }
748
750 basic_iterator(gap_decorator const & host_, typename gap_decorator::size_type const pos_) : host(&host_)
751 {
752 jump(pos_); // random access to pos
753 }
755
760 basic_iterator & operator++()
761 {
762 assert(host); // host is set
763 ++pos;
764
765 if (pos < left_gap_end) // we stay within the preceding gap stretch
766 return *this;
767
768 if (anchor_set_it == host->anchors.end() || pos < anchor_set_it->first)
769 { // proceed within the view since we are right of the previous gap but didn't arrive at the right gap yet
770 ++ungapped_view_pos;
771 if (ungapped_view_pos != static_cast<int64_t>(host->ungapped_view.size()))
772 is_at_gap = false;
773 }
774 else
775 { // we arrived at the right gap and have to update the variables. ungapped_view_pos remains unchanged.
776 left_gap_end = anchor_set_it->first + anchor_set_it->second
777 - ((anchor_set_it != host->anchors.begin()) ? (std::prev(anchor_set_it))->second : 0);
778 ++anchor_set_it;
779 is_at_gap = true;
780
781 if (left_gap_end == host->size()) // very last gap
782 ++ungapped_view_pos;
783 }
784
785 return *this;
786 }
787
789 basic_iterator operator++(int)
790 {
791 basic_iterator cpy{*this};
792 ++(*this);
793 return cpy;
794 }
795
797 basic_iterator & operator+=(difference_type const skip)
798 {
799 this->jump(this->pos + skip);
800 return *this;
801 }
802
804 basic_iterator operator+(difference_type const skip) const
805 {
806 return basic_iterator{*(this->host), this->pos + skip};
807 }
808
810 friend basic_iterator operator+(difference_type const skip, basic_iterator const & it)
811 {
812 return it + skip;
813 }
814
816 basic_iterator & operator--()
817 {
818 assert(host); // host is set
819 --pos;
820
821 if (pos < left_gap_end)
822 { // there was no gap before but we arrive at the left gap and have to update the variables.
823 (anchor_set_it != host->anchors.begin()) ? --anchor_set_it : anchor_set_it;
824
825 if (anchor_set_it != host->anchors.begin())
826 {
827 auto prev = std::prev(anchor_set_it);
828 left_gap_end =
829 prev->first + prev->second - ((prev != host->anchors.begin()) ? std::prev(prev)->second : 0);
830 }
831 else // [[unlikely]]
832 {
833 left_gap_end = 0;
834 }
835 is_at_gap = true;
836 }
837 else if (anchor_set_it == host->anchors.end() || pos < anchor_set_it->first)
838 { // we are neither at the left nor right gap
839 --ungapped_view_pos;
840 is_at_gap = false;
841 }
842 // else -> no op (we are still within the right gap stretch)
843
844 return *this;
845 }
846
848 basic_iterator operator--(int)
849 {
850 basic_iterator cpy{*this};
851 --(*this);
852 return cpy;
853 }
854
856 basic_iterator & operator-=(difference_type const skip)
857 {
858 this->jump(this->pos - skip);
859 return *this;
860 }
861
863 basic_iterator operator-(difference_type const skip) const
864 {
865 return basic_iterator{*(this->host), this->pos - skip};
866 }
867
869 friend basic_iterator operator-(difference_type const skip, basic_iterator const & it)
870 {
871 return it - skip;
872 }
873
875 difference_type operator-(basic_iterator const lhs) const noexcept
876 {
877 return static_cast<difference_type>(this->pos - lhs.pos);
878 }
880
885 reference operator*() const
886 {
887 return (is_at_gap) ? reference{gap{}} : reference{host->ungapped_view[ungapped_view_pos]};
888 }
889
891 reference operator[](difference_type const n) const
892 {
893 return *(*this + n);
894 }
896
903 friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
904 {
905 return lhs.pos == rhs.pos;
906 }
907
909 friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
910 {
911 return lhs.pos != rhs.pos;
912 }
913
915 friend bool operator<(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
916 {
917 return lhs.pos < rhs.pos;
918 }
919
921 friend bool operator>(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
922 {
923 return lhs.pos > rhs.pos;
924 }
925
927 friend bool operator<=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
928 {
929 return lhs.pos <= rhs.pos;
930 }
931
933 friend bool operator>=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
934 {
935 return lhs.pos >= rhs.pos;
936 }
938};
939
940} // namespace seqan3
Includes customized exception types for the alignment module .
Core alphabet concept and free function/type trait wrappers.
T begin(T... args)
A combined alphabet that can hold values of either of its alternatives..
Definition: alphabet_variant.hpp:120
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition: gap_decorator.hpp:81
reference at(size_type const i)
Return the i-th element as a reference.
Definition: gap_decorator.hpp:407
friend bool operator==(gap_decorator const &lhs, gap_decorator const &rhs)
Checks whether lhs is equal to rhs.
Definition: gap_decorator.hpp:463
friend void assign_unaligned(gap_decorator &dec, unaligned_sequence_t &&unaligned)
Assigns a new sequence of type seqan3::gap_decorator::unaligned_sequence_type to the decorator.
Definition: gap_decorator.hpp:328
gap_decorator & operator=(gap_decorator const &)=default
Defaulted.
gap_decorator(gap_decorator &&rhs)=default
Defaulted.
const_reference at(size_type const i) const
Return the i-th element as a reference.
Definition: gap_decorator.hpp:415
std::ranges::range_difference_t< inner_type > difference_type
The difference type of the underlying sequence.
Definition: gap_decorator.hpp:129
const_iterator cend() const noexcept
Returns an iterator pointing behind the last element of the decorator.
Definition: gap_decorator.hpp:384
const_iterator end() const noexcept
Returns an iterator pointing behind the last element of the decorator.
Definition: gap_decorator.hpp:378
inner_type unaligned_sequence_type
The underlying ungapped range type.
Definition: gap_decorator.hpp:136
reference operator[](size_type const i) const
Return the i-th element as a reference.
Definition: gap_decorator.hpp:434
friend bool operator!=(gap_decorator const &lhs, gap_decorator const &rhs)
Checks whether lhs is not equal to rhs.
Definition: gap_decorator.hpp:478
iterator erase_gap(const_iterator const it)
Erase one gap symbol at the indicated iterator postion.
Definition: gap_decorator.hpp:257
gap_decorator(other_range_t &&range)
Construct with the ungapped range type.
Definition: gap_decorator.hpp:163
const_iterator begin() const noexcept
Returns an iterator to the first element of the container.
Definition: gap_decorator.hpp:352
iterator insert_gap(const_iterator const it, size_type const count=1)
Insert a gap of length count at the aligned sequence iterator position.
Definition: gap_decorator.hpp:204
~gap_decorator()=default
Defaulted.
iterator erase_gap(const_iterator const first, const_iterator const last)
Erase gap symbols at the iterator postions [first, last[.
Definition: gap_decorator.hpp:281
gap_decorator(gap_decorator const &)=default
Defaulted.
friend bool operator<=(gap_decorator const &lhs, gap_decorator const &rhs)
Checks whether lhs is less than or equal to rhs.
Definition: gap_decorator.hpp:507
size_type size() const
Returns the total length of the aligned sequence.
Definition: gap_decorator.hpp:181
reference const_reference
const_reference type equals reference type equals value type because the underlying sequence must not...
Definition: gap_decorator.hpp:117
gap_decorator & operator=(gap_decorator &&rhs)=default
Defaulted.
gapped< std::ranges::range_value_t< inner_type > > value_type
The variant type of the alphabet type and gap symbol type (see seqan3::gapped).
Definition: gap_decorator.hpp:104
friend bool operator>(gap_decorator const &lhs, gap_decorator const &rhs)
Checks whether lhs is greater than rhs.
Definition: gap_decorator.hpp:527
friend bool operator>=(gap_decorator const &lhs, gap_decorator const &rhs)
Checks whether lhs is greater than or equal to rhs.
Definition: gap_decorator.hpp:536
gap_decorator()=default
Default constructor.
friend bool operator<(gap_decorator const &lhs, gap_decorator const &rhs)
Checks whether lhs is less than rhs.
Definition: gap_decorator.hpp:487
std::ranges::range_size_t< inner_type > size_type
The size_type of the underlying sequence.
Definition: gap_decorator.hpp:123
const_iterator cbegin() const noexcept
Returns an iterator to the first element of the container.
Definition: gap_decorator.hpp:358
Thrown in function seqan3::erase_gap, if a position does not contain a gap.
Definition: exception.hpp:24
The alphabet of a gap character '-'.
Definition: gap.hpp:39
T equal(T... args)
Provides seqan3::gap.
Provides seqan3::gapped.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: traits.hpp:164
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:146
constexpr auto type_reduce
A view adaptor that behaves like std::views::all, but type erases certain ranges.
Definition: type_reduce.hpp:150
T max(T... args)
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
gap_decorator(urng_t &&range) -> gap_decorator< std::remove_reference_t< urng_t > const & >
Ranges (not views!) always deduce to const & range_type since they are access-only anyway.
T next(T... args)
T prev(T... args)
T size(T... args)
T to_string(T... args)
Provides seqan3::views::type_reduce.