SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
gap_decorator.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
11#pragma once
12
13#include <algorithm>
14#include <limits>
15#include <ranges>
16#include <set>
17#include <tuple>
18#include <type_traits>
19
25
26namespace seqan3
27{
28
74template <std::ranges::viewable_range inner_type>
75 requires std::ranges::random_access_range<inner_type> && std::ranges::sized_range<inner_type>
76 && (std::is_const_v<std::remove_reference_t<inner_type>> || std::ranges::view<inner_type>)
78{
79private:
80 // Declaration of class's iterator types; for the definition see below.
81 template <bool = true>
82 class basic_iterator;
83
85 using iterator = basic_iterator<true>;
88 using const_iterator = iterator;
89
91 using ungapped_view_type = decltype(views::type_reduce(std::declval<inner_type &&>()));
92
93public:
102
108
115
120 using size_type = std::ranges::range_size_t<inner_type>;
121
126 using difference_type = std::ranges::range_difference_t<inner_type>;
128
133 using unaligned_sequence_type = inner_type;
134
145 gap_decorator() = default;
146 gap_decorator(gap_decorator const &) = default;
147 gap_decorator & operator=(gap_decorator const &) = default;
148 gap_decorator(gap_decorator && rhs) = default;
150 ~gap_decorator() = default;
151
156 template <typename other_range_t>
157 requires (!std::same_as<other_range_t, gap_decorator>)
159 && std::ranges::viewable_range<other_range_t> // at end, otherwise it competes with the move ctor
160 gap_decorator(other_range_t && range) : ungapped_view{views::type_reduce(std::forward<inner_type>(range))}
161 {
162 } // TODO (@smehringer) only works for copyable views. Has to be changed once views are not required to be copyable anymore.
163 // !\}
164
179 {
180 if (anchors.size())
181 return anchors.rbegin()->second + ungapped_view.size();
182
183 return ungapped_view.size();
184 }
185
201 iterator insert_gap(const_iterator const it, size_type const count = 1)
202 {
203 if (!count) // [[unlikely]]
204 return it;
205
206 size_type const pos = it - begin();
207 assert(pos <= size());
208
209 set_iterator_type it_set = anchors.upper_bound(anchor_gap_t{pos, bound_dummy});
210
211 if (it_set == anchors.begin()) // will also catch if anchors is empty since begin() == end()
212 {
213 anchors.emplace_hint(anchors.begin(), anchor_gap_t{pos, count});
214 }
215 else // there are gaps before pos
216 {
217 --it_set;
218 auto gap_len{it_set->second};
219 if (it_set != anchors.begin())
220 gap_len -= (*(std::prev(it_set))).second;
221
222 if (it_set->first + gap_len >= pos) // extend existing gap
223 {
224 anchor_gap_t gap{it_set->first, it_set->second + count};
225 it_set = anchors.erase(it_set);
226 anchors.insert(it_set, gap);
227 }
228 else // insert new gap
229 {
230 anchor_gap_t gap{pos, it_set->second + count};
231 ++it_set;
232 anchors.insert(it_set, gap);
233 }
234 }
235
236 // post-processing: reverse update of succeeding gaps
237 rupdate(pos, count);
238 return iterator{*this, pos};
239 }
240
254 iterator erase_gap(const_iterator const it)
255 {
256 // check if [it, it+gap_len[ covers [first, last[
257 if ((*it) != gap{}) // [[unlikely]]
258 throw gap_erase_failure("The range to be erased does not correspond to a consecutive gap.");
259
260 return erase_gap(it, std::next(it));
261 }
262
278 iterator erase_gap(const_iterator const first, const_iterator const last)
279 {
280 size_type const pos1 = first - begin();
281 size_type const pos2 = last - begin();
282 set_iterator_type it = anchors.upper_bound(anchor_gap_t{pos1, bound_dummy}); // first element greater than pos1
283
284 if (it == anchors.begin())
285 throw gap_erase_failure{"There is no gap to erase in range [" + std::to_string(pos1) + ","
286 + std::to_string(pos2) + "]."};
287
288 --it;
289 size_type const gap_len = gap_length(it);
290
291 // check if [it, it+gap_len[ covers [first, last[
292 if ((it->first + gap_len) < pos2) // [[unlikely]]
293 {
294 throw gap_erase_failure{"The range to be erased does not correspond to a consecutive gap."};
295 }
296 // case 1: complete gap is deleted
297 else if (gap_len == pos2 - pos1)
298 {
299 it = anchors.erase(it);
300 }
301 // case 2: gap to be deleted in tail or larger than 1 (equiv. to shift tail left, i.e. pos remains unchanged)
302 else
303 {
304 anchor_gap_t gap{it->first, it->second - pos2 + pos1};
305 it = anchors.erase(it);
306 it = anchors.insert(it, gap); // amortized constant because of hint
307 ++it; // update node after the current
308 }
309
310 // post-processing: forward update of succeeding gaps
311 update(it, pos2 - pos1);
312
313 return iterator{*this, pos1};
314 }
315
323 template <typename unaligned_sequence_t> // generic template to use forwarding reference
324 requires std::assignable_from<gap_decorator &, unaligned_sequence_t>
325 friend void assign_unaligned(gap_decorator & dec, unaligned_sequence_t && unaligned)
326 {
327 dec = unaligned;
328 }
330
349 const_iterator begin() const noexcept
350 {
351 return iterator{*this};
352 }
353
355 const_iterator cbegin() const noexcept
356 {
357 return const_iterator{*this};
358 }
359
375 const_iterator end() const noexcept
376 {
377 return iterator{*this, size()};
378 }
379
381 const_iterator cend() const noexcept
382 {
383 return const_iterator{*this, size()};
384 }
386
405 {
406 if (i >= size()) // [[unlikely]]
407 throw std::out_of_range{"Trying to access element behind the last in gap_decorator."};
408 return (*this)[i];
409 }
410
413 {
414 if (i >= size()) // [[unlikely]]
415 throw std::out_of_range{"Trying to access element behind the last in gap_decorator."};
416 return (*this)[i];
417 }
418
432 {
433 return *iterator{*this, i};
434 }
436
460 friend bool operator==(gap_decorator const & lhs, gap_decorator const & rhs)
461 {
462 if (lhs.size() == rhs.size() && lhs.anchors == rhs.anchors
463 && std::ranges::equal(lhs.ungapped_view, rhs.ungapped_view))
464 {
465 return true;
466 }
467
468 return false;
469 }
470
475 friend bool operator!=(gap_decorator const & lhs, gap_decorator const & rhs)
476 {
477 return !(lhs == rhs);
478 }
479
484 friend bool operator<(gap_decorator const & lhs, gap_decorator const & rhs)
485 {
486 auto lit = lhs.begin();
487 auto rit = rhs.begin();
488
489 while (lit != lhs.end() && rit != rhs.end() && *lit == *rit)
490 ++lit, ++rit;
491
492 if (rit == rhs.end())
493 return false; // lhs == rhs, or rhs prefix of lhs
494 else if (lit == lhs.end())
495 return true; // lhs prefix of rhs
496
497 return *lit < *rit;
498 }
499
504 friend bool operator<=(gap_decorator const & lhs, gap_decorator const & rhs)
505 {
506 auto lit = lhs.begin();
507 auto rit = rhs.begin();
508
509 while (lit != lhs.end() && rit != rhs.end() && *lit == *rit)
510 ++lit, ++rit;
511
512 if (lit == lhs.end())
513 return true; // lhs == rhs, or lhs prefix of rhs
514 else if (rit == rhs.end())
515 return false; // rhs prefix of lhs
516
517 return *lit < *rit;
518 }
519
524 friend bool operator>(gap_decorator const & lhs, gap_decorator const & rhs)
525 {
526 return !(lhs <= rhs);
527 }
528
533 friend bool operator>=(gap_decorator const & lhs, gap_decorator const & rhs)
534 {
535 return !(lhs < rhs);
536 }
538
539private:
541 using anchor_gap_t = typename std::pair<size_t, size_t>;
542
544 using anchor_set_type = std::set<anchor_gap_t>;
545
547 using set_iterator_type = typename anchor_set_type::iterator;
548
550 static constexpr size_t bound_dummy{std::numeric_limits<size_t>::max()};
551
564 size_type gap_length(set_iterator_type it) const
565 {
566 return (it == anchors.begin()) ? it->second : it->second - (*std::prev(it)).second;
567 }
568
581 void rupdate(size_type const pos, size_type const offset)
582 {
583 for (auto it = std::prev(anchors.end(), 1); it->first > pos;)
584 {
585 anchors.emplace_hint(it, anchor_gap_t{it->first + offset, it->second + offset});
586 anchors.erase(*it--);
587 }
588 }
589
602 void update(set_iterator_type it, size_type const offset)
603 {
604 while (it != anchors.end())
605 {
606 anchor_gap_t gap{it->first - offset, it->second - offset};
607 it = anchors.erase(it);
608 it = anchors.insert(it, gap);
609 ++it;
610 }
611 }
612
614 ungapped_view_type ungapped_view{};
615
617 anchor_set_type anchors{};
618};
619
627template <std::ranges::viewable_range urng_t>
628 requires (!std::ranges::view<std::remove_reference_t<urng_t>>)
630
635template <std::ranges::view urng_t>
638
655template <std::ranges::viewable_range inner_type>
656 requires std::ranges::random_access_range<inner_type> && std::ranges::sized_range<inner_type>
657 && (std::is_const_v<std::remove_reference_t<inner_type>> || std::ranges::view<inner_type>)
658template <bool>
660{
661protected:
663 typename std::add_pointer_t<gap_decorator const> host{nullptr};
665 typename gap_decorator::size_type pos{0u};
667 int64_t ungapped_view_pos{0}; // must be signed because we need this value to be -1 in case of leading gaps.
670 typename gap_decorator::size_type left_gap_end{0};
673 typename gap_decorator::set_iterator_type anchor_set_it{};
675 bool is_at_gap{true};
676
678 void jump(typename gap_decorator::size_type const new_pos)
679 {
680 assert(new_pos <= host->size());
681 pos = new_pos;
682
683 anchor_set_it = host->anchors.upper_bound(anchor_gap_t{pos, host->bound_dummy});
684 ungapped_view_pos = pos;
685
686 if (anchor_set_it != host->anchors.begin())
687 {
688 typename gap_decorator::set_iterator_type prev{std::prev(anchor_set_it)};
689 size_type gap_len{prev->second};
690
691 if (prev != host->anchors.begin())
692 gap_len -= std::prev(prev)->second;
693
694 ungapped_view_pos -= prev->second;
695 left_gap_end = prev->first + gap_len;
696 }
697
698 if (ungapped_view_pos != static_cast<int64_t>(host->ungapped_view.size()) && pos >= left_gap_end
699 && (anchor_set_it == host->anchors.end() || pos < anchor_set_it->first))
700 is_at_gap = false;
701 else
702 is_at_gap = true;
703 }
704
705public:
710 using difference_type = typename gap_decorator::difference_type;
712 using value_type = typename gap_decorator::value_type;
714 using reference = typename gap_decorator::const_reference;
716 using pointer = value_type *;
718 using iterator_category = std::bidirectional_iterator_tag;
720
724 basic_iterator() = default;
725 basic_iterator(basic_iterator const &) = default;
726 basic_iterator & operator=(basic_iterator const &) = default;
727 basic_iterator(basic_iterator &&) = default;
728 basic_iterator & operator=(basic_iterator &&) = default;
729 ~basic_iterator() = default;
730
732 explicit basic_iterator(gap_decorator const & host_) : host(&host_), anchor_set_it{host_.anchors.begin()}
733 {
734 if (host_.anchors.size() && (*host_.anchors.begin()).first == 0) // there are gaps at the very front
735 {
736 --ungapped_view_pos; // set ungapped_view_pos to -1 so operator++ works without an extra if-branch.
737 left_gap_end = anchor_set_it->second;
738 ++anchor_set_it;
739 }
740 else
741 {
742 is_at_gap = false;
743 }
744 }
745
747 basic_iterator(gap_decorator const & host_, typename gap_decorator::size_type const pos_) : host(&host_)
748 {
749 jump(pos_); // random access to pos
750 }
752
757 basic_iterator & operator++()
758 {
759 assert(host); // host is set
760 ++pos;
761
762 if (pos < left_gap_end) // we stay within the preceding gap stretch
763 return *this;
764
765 if (anchor_set_it == host->anchors.end() || pos < anchor_set_it->first)
766 { // proceed within the view since we are right of the previous gap but didn't arrive at the right gap yet
767 ++ungapped_view_pos;
768 if (ungapped_view_pos != static_cast<int64_t>(host->ungapped_view.size()))
769 is_at_gap = false;
770 }
771 else
772 { // we arrived at the right gap and have to update the variables. ungapped_view_pos remains unchanged.
773 left_gap_end = anchor_set_it->first + anchor_set_it->second
774 - ((anchor_set_it != host->anchors.begin()) ? (std::prev(anchor_set_it))->second : 0);
775 ++anchor_set_it;
776 is_at_gap = true;
777
778 if (left_gap_end == host->size()) // very last gap
779 ++ungapped_view_pos;
780 }
781
782 return *this;
783 }
784
786 basic_iterator operator++(int)
787 {
788 basic_iterator cpy{*this};
789 ++(*this);
790 return cpy;
791 }
792
794 basic_iterator & operator+=(difference_type const skip)
795 {
796 this->jump(this->pos + skip);
797 return *this;
798 }
799
801 basic_iterator operator+(difference_type const skip) const
802 {
803 return basic_iterator{*(this->host), this->pos + skip};
804 }
805
807 friend basic_iterator operator+(difference_type const skip, basic_iterator const & it)
808 {
809 return it + skip;
810 }
811
813 basic_iterator & operator--()
814 {
815 assert(host); // host is set
816 --pos;
817
818 if (pos < left_gap_end)
819 { // there was no gap before but we arrive at the left gap and have to update the variables.
820 (anchor_set_it != host->anchors.begin()) ? --anchor_set_it : anchor_set_it;
821
822 if (anchor_set_it != host->anchors.begin())
823 {
824 auto prev = std::prev(anchor_set_it);
825 left_gap_end =
826 prev->first + prev->second - ((prev != host->anchors.begin()) ? std::prev(prev)->second : 0);
827 }
828 else // [[unlikely]]
829 {
830 left_gap_end = 0;
831 }
832 is_at_gap = true;
833 }
834 else if (anchor_set_it == host->anchors.end() || pos < anchor_set_it->first)
835 { // we are neither at the left nor right gap
836 --ungapped_view_pos;
837 is_at_gap = false;
838 }
839 // else -> no op (we are still within the right gap stretch)
840
841 return *this;
842 }
843
845 basic_iterator operator--(int)
846 {
847 basic_iterator cpy{*this};
848 --(*this);
849 return cpy;
850 }
851
853 basic_iterator & operator-=(difference_type const skip)
854 {
855 this->jump(this->pos - skip);
856 return *this;
857 }
858
860 basic_iterator operator-(difference_type const skip) const
861 {
862 return basic_iterator{*(this->host), this->pos - skip};
863 }
864
866 friend basic_iterator operator-(difference_type const skip, basic_iterator const & it)
867 {
868 return it - skip;
869 }
870
872 difference_type operator-(basic_iterator const lhs) const noexcept
873 {
874 return static_cast<difference_type>(this->pos - lhs.pos);
875 }
877
882 reference operator*() const
883 {
884 return (is_at_gap) ? reference{gap{}} : reference{host->ungapped_view[ungapped_view_pos]};
885 }
886
888 reference operator[](difference_type const n) const
889 {
890 return *(*this + n);
891 }
893
900 friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
901 {
902 return lhs.pos == rhs.pos;
903 }
904
906 friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
907 {
908 return lhs.pos != rhs.pos;
909 }
910
912 friend bool operator<(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
913 {
914 return lhs.pos < rhs.pos;
915 }
916
918 friend bool operator>(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
919 {
920 return lhs.pos > rhs.pos;
921 }
922
924 friend bool operator<=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
925 {
926 return lhs.pos <= rhs.pos;
927 }
928
930 friend bool operator>=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
931 {
932 return lhs.pos >= rhs.pos;
933 }
935};
936
937} // namespace seqan3
Includes customized exception types for the alignment module .
Core alphabet concept and free function/type trait wrappers.
T begin(T... args)
A combined alphabet that can hold values of either of its alternatives..
Definition alphabet_variant.hpp:129
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition gap_decorator.hpp:78
reference at(size_type const i)
Return the i-th element as a reference.
Definition gap_decorator.hpp:404
friend bool operator==(gap_decorator const &lhs, gap_decorator const &rhs)
Checks whether lhs is equal to rhs.
Definition gap_decorator.hpp:460
friend void assign_unaligned(gap_decorator &dec, unaligned_sequence_t &&unaligned)
Assigns a new sequence of type seqan3::gap_decorator::unaligned_sequence_type to the decorator.
Definition gap_decorator.hpp:325
gap_decorator & operator=(gap_decorator const &)=default
Defaulted.
gap_decorator(gap_decorator &&rhs)=default
Defaulted.
const_reference at(size_type const i) const
Return the i-th element as a reference.
Definition gap_decorator.hpp:412
std::ranges::range_difference_t< inner_type > difference_type
The difference type of the underlying sequence.
Definition gap_decorator.hpp:126
const_iterator cend() const noexcept
Returns an iterator pointing behind the last element of the decorator.
Definition gap_decorator.hpp:381
const_iterator end() const noexcept
Returns an iterator pointing behind the last element of the decorator.
Definition gap_decorator.hpp:375
inner_type unaligned_sequence_type
The underlying ungapped range type.
Definition gap_decorator.hpp:133
reference operator[](size_type const i) const
Return the i-th element as a reference.
Definition gap_decorator.hpp:431
friend bool operator!=(gap_decorator const &lhs, gap_decorator const &rhs)
Checks whether lhs is not equal to rhs.
Definition gap_decorator.hpp:475
iterator erase_gap(const_iterator const it)
Erase one gap symbol at the indicated iterator postion.
Definition gap_decorator.hpp:254
gap_decorator(other_range_t &&range)
Construct with the ungapped range type.
Definition gap_decorator.hpp:160
const_iterator begin() const noexcept
Returns an iterator to the first element of the container.
Definition gap_decorator.hpp:349
iterator insert_gap(const_iterator const it, size_type const count=1)
Insert a gap of length count at the aligned sequence iterator position.
Definition gap_decorator.hpp:201
~gap_decorator()=default
Defaulted.
iterator erase_gap(const_iterator const first, const_iterator const last)
Erase gap symbols at the iterator postions [first, last[.
Definition gap_decorator.hpp:278
gap_decorator(gap_decorator const &)=default
Defaulted.
friend bool operator<=(gap_decorator const &lhs, gap_decorator const &rhs)
Checks whether lhs is less than or equal to rhs.
Definition gap_decorator.hpp:504
size_type size() const
Returns the total length of the aligned sequence.
Definition gap_decorator.hpp:178
reference const_reference
const_reference type equals reference type equals value type because the underlying sequence must not...
Definition gap_decorator.hpp:114
gap_decorator & operator=(gap_decorator &&rhs)=default
Defaulted.
gapped< std::ranges::range_value_t< inner_type > > value_type
The variant type of the alphabet type and gap symbol type (see seqan3::gapped).
Definition gap_decorator.hpp:101
friend bool operator>(gap_decorator const &lhs, gap_decorator const &rhs)
Checks whether lhs is greater than rhs.
Definition gap_decorator.hpp:524
friend bool operator>=(gap_decorator const &lhs, gap_decorator const &rhs)
Checks whether lhs is greater than or equal to rhs.
Definition gap_decorator.hpp:533
gap_decorator()=default
Default constructor.
friend bool operator<(gap_decorator const &lhs, gap_decorator const &rhs)
Checks whether lhs is less than rhs.
Definition gap_decorator.hpp:484
std::ranges::range_size_t< inner_type > size_type
The size_type of the underlying sequence.
Definition gap_decorator.hpp:120
const_iterator cbegin() const noexcept
Returns an iterator to the first element of the container.
Definition gap_decorator.hpp:355
Thrown in function seqan3::erase_gap, if a position does not contain a gap.
Definition alignment/exception.hpp:21
The alphabet of a gap character '-'.
Definition gap.hpp:36
T emplace_hint(T... args)
T equal(T... args)
Provides seqan3::gap.
Provides seqan3::gapped.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
T max(T... args)
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
T next(T... args)
T prev(T... args)
T size(T... args)
T to_string(T... args)
Provides seqan3::views::type_reduce.
Hide me