SeqAn3  3.0.0
The Modern C++ library for sequence analysis.
gap_decorator.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
14 #pragma once
15 
16 #include <limits>
17 #include <set>
18 #include <tuple>
19 #include <type_traits>
20 
28 #include <seqan3/std/algorithm>
29 #include <seqan3/std/ranges>
30 
31 namespace seqan3
32 {
33 
78 template <std::ranges::ViewableRange inner_type>
81  (std::is_const_v<std::remove_reference_t<inner_type>> || std::ranges::View<inner_type>)
84 {
85 private:
94  class gap_decorator_iterator
95  {
96  private:
98  typename std::add_pointer_t<gap_decorator const> host{nullptr};
100  typename gap_decorator::size_type pos{0u};
102  int64_t ungapped_view_pos{0}; // must be signed because we need this value to be -1 in case of leading gaps.
105  typename gap_decorator::size_type left_gap_end{0};
108  typename gap_decorator::set_iterator_type anchor_set_it{};
110  bool is_at_gap{true};
111 
113  void jump(typename gap_decorator::size_type const new_pos)
114  {
115  assert(new_pos <= host->size());
116  pos = new_pos;
117 
118  anchor_set_it = host->anchors.upper_bound(anchor_gap_t{pos, host->bound_dummy});
119  ungapped_view_pos = pos;
120 
121  if (anchor_set_it != host->anchors.begin())
122  {
123  typename gap_decorator::set_iterator_type prev{std::prev(anchor_set_it)};
124  size_type gap_len{prev->second};
125 
126  if (prev != host->anchors.begin())
127  gap_len -= std::prev(prev)->second;
128 
129  ungapped_view_pos -= prev->second;
130  left_gap_end = prev->first + gap_len;
131  }
132 
133  if (ungapped_view_pos != static_cast<int64_t>(host->ungapped_view.size()) &&
134  pos >= left_gap_end && (anchor_set_it == host->anchors.end() || pos < anchor_set_it->first))
135  is_at_gap = false;
136  else
137  is_at_gap = true;
138  }
139 
140  public:
148  using value_type = typename gap_decorator::value_type;
150  using reference = typename gap_decorator::const_reference; // = reference
152  using const_reference = reference;
154  using pointer = value_type *;
156  using iterator_category = std::bidirectional_iterator_tag;
158 
162  constexpr gap_decorator_iterator() = default;
165  constexpr gap_decorator_iterator(gap_decorator_iterator const &) = default;
167  constexpr gap_decorator_iterator & operator=(gap_decorator_iterator const &) = default;
169  constexpr gap_decorator_iterator (gap_decorator_iterator &&) = default;
171  constexpr gap_decorator_iterator & operator=(gap_decorator_iterator &&) = default;
173  ~gap_decorator_iterator() = default;
174 
176  explicit constexpr gap_decorator_iterator(gap_decorator const & host_) noexcept :
177  host(&host_), anchor_set_it{host_.anchors.begin()}
178  {
179  if (host_.anchors.size() && (*host_.anchors.begin()).first == 0) // there are gaps at the very front
180  {
181  --ungapped_view_pos; // set ungapped_view_pos to -1 so operator++ works without an extra if-branch.
182  left_gap_end = anchor_set_it->second;
183  ++anchor_set_it;
184  }
185  else
186  {
187  is_at_gap = false;
188  }
189  }
190 
192  constexpr gap_decorator_iterator(gap_decorator const & host_,
193  typename gap_decorator::size_type const pos_) noexcept :
194  host(&host_)
195  {
196  jump(pos_); // random access to pos
197  }
199 
203  constexpr gap_decorator_iterator & operator++() noexcept
205  {
206  assert(host); // host is set
207  ++pos;
208 
209  if (pos < left_gap_end)
210  { // we stay within the preceding gap stretch
211  // no op but must precede other cases
212  }
213  else if (anchor_set_it == host->anchors.end() || pos < anchor_set_it->first)
214  { // proceed within the view since we are right of the previous gap but didn't arrive at the right gap yet
215  ++ungapped_view_pos;
216  if (ungapped_view_pos != static_cast<int64_t>(host->ungapped_view.size()))
217  is_at_gap = false;
218  }
219  else
220  { // we arrived at the right gap and have to update the variables. ungapped_view_pos remains unchanged.
221  left_gap_end = anchor_set_it->first + anchor_set_it->second -
222  ((anchor_set_it != host->anchors.begin()) ? (std::prev(anchor_set_it))->second : 0);
223  ++anchor_set_it;
224  is_at_gap = true;
225  }
226 
227  return *this;
228  }
229 
231  constexpr gap_decorator_iterator & operator--() noexcept
232  {
233  assert(host); // host is set
234  --pos;
235 
236  if (pos < left_gap_end)
237  { // there was no gap before but we arrive at the left gap and have to update the variables.
238  (anchor_set_it != host->anchors.begin()) ? --anchor_set_it : anchor_set_it;
239 
240  if (anchor_set_it != host->anchors.begin())
241  {
242  auto prev = std::prev(anchor_set_it);
243  left_gap_end = prev->first + prev->second -
244  ((prev != host->anchors.begin()) ? std::prev(prev)->second : 0);
245  }
246  else // [[unlikely]]
247  {
248  left_gap_end = 0;
249  }
250  is_at_gap = true;
251  }
252  else if (anchor_set_it == host->anchors.end() || pos < anchor_set_it->first)
253  { // we are neither at the left nor right gap
254  --ungapped_view_pos;
255  is_at_gap = false;
256  }
257  // else -> no op (we are still within the right gap stretch)
258 
259  return *this;
260  }
261 
263  constexpr gap_decorator_iterator operator++(int) noexcept
264  {
265  gap_decorator_iterator cpy{*this};
266  ++(*this);
267  return cpy;
268  }
269 
271  constexpr gap_decorator_iterator operator--(int) noexcept
272  {
273  gap_decorator_iterator cpy{*this};
274  --(*this);
275  return cpy;
276  }
278 
282  constexpr reference operator*() const noexcept
284  {
285  return (is_at_gap) ? static_cast<reference>(gap{})
286  : static_cast<reference>(host->ungapped_view[ungapped_view_pos]);
287  }
289 
295  constexpr friend bool operator==(gap_decorator_iterator const & lhs,
297  gap_decorator_iterator const & rhs)
298  {
299  return lhs.pos == rhs.pos;
300  }
301 
303  constexpr friend bool operator!=(gap_decorator_iterator const & lhs,
304  gap_decorator_iterator const & rhs)
305  {
306  return lhs.pos != rhs.pos;
307  }
308 
310  constexpr friend bool operator<(gap_decorator_iterator const & lhs,
311  gap_decorator_iterator const & rhs)
312  {
313  return lhs.pos < rhs.pos;
314  }
315 
317  constexpr friend bool operator>(gap_decorator_iterator const & lhs,
318  gap_decorator_iterator const & rhs)
319  {
320  return lhs.pos > rhs.pos;
321  }
322 
324  constexpr friend bool operator<=(gap_decorator_iterator const & lhs,
325  gap_decorator_iterator const & rhs)
326  {
327  return lhs.pos <= rhs.pos;
328  }
329 
331  constexpr friend bool operator>=(gap_decorator_iterator const & lhs,
332  gap_decorator_iterator const & rhs)
333  {
334  return lhs.pos >= rhs.pos;
335  }
337  };
338 
339 public:
355  using iterator = gap_decorator_iterator;
360 
362  using unaligned_seq_type = inner_type;
363 
367  constexpr gap_decorator() = default;
371  constexpr gap_decorator(gap_decorator const &) = default;
373  constexpr gap_decorator & operator=(gap_decorator const &) = default;
375  constexpr gap_decorator(gap_decorator && rhs) = default;
377  constexpr gap_decorator & operator=(gap_decorator && rhs) = default;
379  ~gap_decorator() = default;
380 
382  template <typename other_range_t>
386  std::ranges::ViewableRange<other_range_t> // at end, otherwise it competes with the move ctor
388  gap_decorator(other_range_t && range) : ungapped_view{view::all(std::forward<inner_type>(range))}
389  {} // TODO (@smehringer) only works for copyable views. Has to be changed once views are not required to be copyable anymore.
390  // !\}
391 
403  size_type size() const noexcept
404  {
405  if (anchors.size())
406  return anchors.rbegin()->second + ungapped_view.size();
407 
408  return ungapped_view.size();
409  }
410 
424  iterator insert_gap(iterator const it, size_type const count = 1)
425  {
426  assert(ungapped_view.size());
427 
428  if (!count) // [[unlikely]]
429  return it;
430 
431  size_type const pos = std::distance(begin(), it);
432  assert(pos <= size());
433 
434  set_iterator_type it_set = anchors.upper_bound(anchor_gap_t{pos, bound_dummy});
435 
436  if (it_set == anchors.begin()) // will also catch if anchors is empty since begin() == end()
437  {
438  anchors.emplace_hint(anchors.begin(), anchor_gap_t{pos, count});
439  }
440  else // there are gaps before pos
441  {
442  --it_set;
443  auto gap_len{it_set->second};
444  if (it_set != anchors.begin())
445  gap_len -= (*(std::prev(it_set))).second;
446 
447  if (it_set->first + gap_len >= pos) // extend existing gap
448  {
449  anchor_gap_t gap{it_set->first, it_set->second + count};
450  it_set = anchors.erase(it_set);
451  anchors.insert(it_set, gap);
452  }
453  else // insert new gap
454  {
455  anchor_gap_t gap{pos, it_set->second + count};
456  ++it_set;
457  anchors.insert(it_set, gap);
458  }
459  }
460 
461  // post-processing: reverse update of succeeding gaps
462  rupdate(pos, count);
463  return iterator{*this, pos};
464  }
465 
478  {
479  assert(ungapped_view.size());
480 
481  // check if [it, it+gap_len[ covers [first, last[
482  if ((*it) != gap{}) // [[unlikely]]
483  throw gap_erase_failure("The range to be erased does not correspond to a consecutive gap.");
484 
485  auto end_it = std::next(it);
486  return erase_gap(it, end_it);
487  }
488 
502  iterator erase_gap(iterator const first, iterator const last)
503  {
504  assert(ungapped_view.size());
505 
506  size_type const pos1 = std::distance(begin(), first);
507  size_type const pos2 = std::distance(begin(), last);
508  set_iterator_type it = anchors.upper_bound(anchor_gap_t{pos1, bound_dummy}); // first element greater than pos1
509 
510  if (it == anchors.begin())
511  throw gap_erase_failure{"There is no gap to erase in range [" + std::to_string(pos1) + "," +
512  std::to_string(pos2) + "]."};
513 
514  --it;
515  size_type const gap_len = gap_length(it);
516 
517  // check if [it, it+gap_len[ covers [first, last[
518  if ((it->first + gap_len) < pos2) // [[unlikely]]
519  {
520  throw gap_erase_failure{"The range to be erased does not correspond to a consecutive gap."};
521  }
522  // case 1: complete gap is deleted
523  else if (gap_len == pos2 - pos1)
524  {
525  it = anchors.erase(it);
526  }
527  // case 2: gap to be deleted in tail or larger than 1 (equiv. to shift tail left, i.e. pos remains unchanged)
528  else
529  {
530  anchor_gap_t gap{it->first, it->second - pos2 + pos1};
531  it = anchors.erase(it);
532  it = anchors.insert(it, gap); // amortized constant because of hint
533  ++it; // update node after the current
534  }
535 
536  // post-processing: forward update of succeeding gaps
537  update(it, pos2 - pos1);
538 
539  return iterator{*this, pos1};
540  }
541 
546  template <typename unaligned_seq_t> // generic template to use forwarding reference
550  friend void assign_unaligned(gap_decorator & dec, unaligned_seq_t && unaligned)
551  {
552  dec = unaligned;
553  }
555 
572  iterator begin() const noexcept
573  {
574  return iterator{*this};
575  }
576 
578  const_iterator cbegin() const noexcept
579  {
580  return const_iterator{*this};
581  }
582 
596  iterator end() const noexcept
597  {
598  return iterator{*this, size()};
599  }
600 
602  const_iterator cend() const noexcept
603  {
604  return const_iterator{*this, size()};
605  }
607 
624  {
625  if (i >= size()) // [[unlikely]]
626  throw std::out_of_range{"Trying to access element behind the last in gap_decorator."};
627  return (*this)[i];
628  }
629 
631  const_reference at(size_type const i) const
632  {
633  if (i >= size()) // [[unlikely]]
634  throw std::out_of_range{"Trying to access element behind the last in gap_decorator."};
635  return (*this)[i];
636  }
637 
649  constexpr reference operator[](size_type const i) const noexcept
650  {
651  return *iterator{*this, i};
652  }
654 
674  friend bool operator==(gap_decorator const & lhs, gap_decorator const & rhs) noexcept
676  {
677  if (lhs.size() == rhs.size() &&
678  lhs.anchors == rhs.anchors &&
679  std::ranges::equal(lhs.ungapped_view, rhs.ungapped_view))
680  {
681  return true;
682  }
683 
684  return false;
685  }
686 
688  friend bool operator!=(gap_decorator const & lhs, gap_decorator const & rhs) noexcept
689  {
690  return !(lhs == rhs);
691  }
692 
694  friend bool operator<(gap_decorator const & lhs, gap_decorator const & rhs) noexcept
695  {
696  auto lit = lhs.begin();
697  auto rit = rhs.begin();
698 
699  while (lit != lhs.end() && rit != rhs.end() && *lit == *rit)
700  ++lit, ++rit;
701 
702  if (rit == rhs.end())
703  return false; // lhs == rhs, or rhs prefix of lhs
704  else if (lit == lhs.end())
705  return true; // lhs prefix of rhs
706 
707  return *lit < *rit;
708  }
709 
711  friend bool operator<=(gap_decorator const & lhs, gap_decorator const & rhs) noexcept
712  {
713  auto lit = lhs.begin();
714  auto rit = rhs.begin();
715 
716  while (lit != lhs.end() && rit != rhs.end() && *lit == *rit)
717  ++lit, ++rit;
718 
719  if (lit == lhs.end())
720  return true; // lhs == rhs, or lhs prefix of rhs
721  else if (rit == rhs.end())
722  return false; // rhs prefix of lhs
723 
724  return *lit < *rit;
725  }
726 
728  friend bool operator>(gap_decorator const & lhs, gap_decorator const & rhs) noexcept
729  {
730  return !(lhs <= rhs);
731  }
732 
734  friend bool operator>=(gap_decorator const & lhs, gap_decorator const & rhs) noexcept
735  {
736  return !(lhs < rhs);
737  }
739 
740 private:
742  using anchor_gap_t = typename std::pair<size_t, size_t>;
743 
745  using anchor_set_type = std::set<anchor_gap_t>;
746 
748  using set_iterator_type = typename anchor_set_type::iterator;
749 
751  constexpr static size_t bound_dummy{std::numeric_limits<size_t>::max()};
752 
765  constexpr size_type gap_length(set_iterator_type it) const noexcept
766  {
767  return (it == anchors.begin()) ? it->second : it->second - (*std::prev(it)).second;
768  }
769 
782  void rupdate(size_type const pos, size_type const offset)
783  {
784  for (auto it = std::prev(anchors.end(), 1); it->first > pos;)
785  {
786  anchors.emplace_hint(it, anchor_gap_t{it->first + offset, it->second + offset});
787  anchors.erase(*it--);
788  }
789  }
790 
803  void update(set_iterator_type it, size_type const offset)
804  {
805  while (it != anchors.end())
806  {
807  anchor_gap_t gap{it->first - offset, it->second - offset};
808  it = anchors.erase(it);
809  it = anchors.insert(it, gap);
810  ++it;
811  }
812  }
813 
815  decltype(view::all(std::declval<inner_type &&>())) ungapped_view{};
816 
818  anchor_set_type anchors{};
819 };
820 
824 template <std::ranges::ViewableRange urng_t>
829 gap_decorator(urng_t && range) -> gap_decorator<std::remove_reference_t<urng_t> const &>;
830 
832 template <std::ranges::View urng_t>
833 gap_decorator(urng_t range) -> gap_decorator<urng_t>;
835 
836 } // namespace seqan
837 
838 namespace seqan3::detail
839 {
840 
842 template <typename type>
843 constexpr int enable_view<seqan3::gap_decorator<type>> = 0;
844 
845 template <typename type>
846 constexpr int enable_view<seqan3::gap_decorator<type> const> = 0;
847 
848 } // namespace seqan3::detail
::ranges::equal equal
Alias for ranges::equal. Determines if two sets of elements are the same.
Definition: algorithm:54
A combined alphabet that can hold values of either of its alternatives.
Definition: alphabet_variant.hpp:205
Specifies the requirements of a Range type that is either a std::ranges::View or an lvalue-reference...
T distance(T... args)
::ranges::prev prev
Alias for ranges::prev. Returns the nth predecessor of the given iterator.
Definition: iterator:326
The alphabet of a gap character &#39;-&#39;.
Definition: gap.hpp:36
T to_string(T... args)
Specifies requirements of a Range type for which begin returns a type that models std::RandomAccessIt...
difference_type_t< inner_type > difference_type
The difference type of the underlying sequence.
Definition: gap_decorator.hpp:353
size_type size() const noexcept
Returns the total length of the aligned sequence.
Definition: gap_decorator.hpp:403
friend bool operator!=(gap_decorator const &lhs, gap_decorator const &rhs) noexcept
Checks whether lhs is not equal to rhs.
Definition: gap_decorator.hpp:688
constexpr auto all
A view adaptor that behaves like std::view:all, but type erases contiguous ranges.
Definition: view_all.hpp:160
Provides the seqan3::detail::random_access_iterator class.
reference at(size_type const i)
Return the i-th element as a reference.
Definition: gap_decorator.hpp:623
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:189
The main SeqAn3 namespace.
typename difference_type< t >::type difference_type_t
Shortcut for seqan3::difference_type (TransformationTrait shortcut).
Definition: pre.hpp:166
friend void assign_unaligned(gap_decorator &dec, unaligned_seq_t &&unaligned)
Assigns a new sequence of type seqan3::gap_decorator::unaligned_seq_type to the decorator.
Definition: gap_decorator.hpp:550
T prev(T... args)
const_iterator cend() const noexcept
Returns an iterator to the element following the last element of the decorator.
Definition: gap_decorator.hpp:602
Specifies the requirements of a Range type that knows its size in constant time with the size functio...
const_iterator cbegin() const noexcept
Returns an iterator to the first element of the container.
Definition: gap_decorator.hpp:578
iterator insert_gap(iterator const it, size_type const count=1)
Insert a gap of length count at the aligned sequence iterator position.
Definition: gap_decorator.hpp:424
friend bool operator<(gap_decorator const &lhs, gap_decorator const &rhs) noexcept
Checks whether lhs is less than rhs.
Definition: gap_decorator.hpp:694
Provides seqan3::gapped.
size_type_t< inner_type > size_type
The size_type of the underlying sequence.
Definition: gap_decorator.hpp:351
T next(T... args)
reference const_reference
const_reference type equals reference type equals value type because the underlying sequence must not...
Definition: gap_decorator.hpp:349
Adaptations of concepts from the Ranges TS.
iterator end() const noexcept
Returns an iterator to the element following the last element of the decorator.
Definition: gap_decorator.hpp:596
friend bool operator>(gap_decorator const &lhs, gap_decorator const &rhs) noexcept
Checks whether lhs is greater than rhs.
Definition: gap_decorator.hpp:728
::ranges::begin begin
Alias for ranges::begin. Returns an iterator to the beginning of a range.
Definition: ranges:174
Provides seqan3::view::all.
Provides seqan3::gap.
iterator erase_gap(iterator const it)
Erase one gap symbol at the indicated iterator postion.
Definition: gap_decorator.hpp:477
Specifies the requirements of a Range type that has constant time copy, move and assignment operators...
friend bool operator<=(gap_decorator const &lhs, gap_decorator const &rhs) noexcept
Checks whether lhs is less than or equal to rhs.
Definition: gap_decorator.hpp:711
T max(T... args)
inner_type unaligned_seq_type
The underlying ungapped range type.
Definition: gap_decorator.hpp:362
Thrown in function seqan3::erase_gap, if a position does not contain a gap.
Definition: exception.hpp:23
typename size_type< t >::type size_type_t
Shortcut for seqan3::size_type (TransformationTrait shortcut).
Definition: pre.hpp:195
friend bool operator>=(gap_decorator const &lhs, gap_decorator const &rhs) noexcept
Checks whether lhs is greater than or equal to rhs.
Definition: gap_decorator.hpp:734
constexpr reference operator[](size_type const i) const noexcept
Return the i-th element as a reference.
Definition: gap_decorator.hpp:649
Definition: aligned_sequence_concept.hpp:35
T size(T... args)
iterator erase_gap(iterator const first, iterator const last)
Erase gap symbols at the iterator postions [first, last[.
Definition: gap_decorator.hpp:502
The std::Constructible concept specifies that a variable of type T can be initialized with the given ...
T begin(T... args)
aligned_seq_t::iterator erase_gap(aligned_seq_t &aligned_seq, typename aligned_seq_t::const_iterator pos_it)
An implementation of seqan3::AlignedSequence::erase_gap for sequence containers.
Definition: aligned_sequence_concept.hpp:278
Adaptations of algorithms from the Ranges TS.
Core alphabet concept and free function/type trait wrappers.
Includes customized exception types for the alignment module .
gap_decorator_iterator iterator
The iterator type of this container (a bidirectional iterator).
Definition: gap_decorator.hpp:355
const_reference at(size_type const i) const
Return the i-th element as a reference.
Definition: gap_decorator.hpp:631
The concept std::Same<T, U> is satisfied if and only if T and U denote the same type.
Adaptations of concepts from the standard library.
iterator const_iterator
The const_iterator equals the iterator type. Since no references are ever returned and thus the under...
Definition: gap_decorator.hpp:358
gapped< value_type_t< inner_type > > value_type
The variant type of the alphabet type and gap symbol type (see seqan3::gapped).
Definition: gap_decorator.hpp:344
iterator begin() const noexcept
Returns an iterator to the first element of the container.
Definition: gap_decorator.hpp:572
gap_decorator(urng_t &&range) -> gap_decorator< std::remove_reference_t< urng_t > const &>
Ranges (not views!) always deduce to const & range_type since they are access-only anyway...
A gap decorator allows the annotation of sequences with gap symbols while leaving the underlying sequ...
Definition: gap_decorator.hpp:83
gap_decorator(other_range_t &&range)
Construct with the ungapped range type.
Definition: gap_decorator.hpp:388