SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
bitpacked_sequence.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <seqan3/std/concepts>
16 #include <seqan3/std/iterator>
17 #include <seqan3/std/ranges>
18 #include <type_traits>
19 
20 #include <sdsl/int_vector.hpp>
21 
27 #include <seqan3/utility/math.hpp>
29 
30 namespace seqan3
31 {
32 
63 template <writable_semialphabet alphabet_type>
65  requires std::regular<alphabet_type>
68 {
69 private:
71  static constexpr size_t bits_per_letter = detail::ceil_log2(alphabet_size<alphabet_type>);
72 
73  static_assert(bits_per_letter <= 64, "alphabet must be representable in at most 64bit.");
74 
76  using data_type = sdsl::int_vector<bits_per_letter>;
77 
79  data_type data;
80 
82  class reference_proxy_type : public alphabet_proxy<reference_proxy_type, alphabet_type>
83  {
84  private:
88  friend base_t;
89 
91  std::ranges::range_reference_t<data_type> internal_proxy;
92 
94  constexpr void on_update() noexcept
95  {
96  internal_proxy = static_cast<base_t &>(*this).to_rank();
97  }
98 
99  public:
100  // Import from base:
101  using base_t::operator=;
102 
107  reference_proxy_type() = delete;
108  constexpr reference_proxy_type(reference_proxy_type const &) noexcept = default;
109  constexpr reference_proxy_type(reference_proxy_type &&) noexcept = default;
110  constexpr reference_proxy_type & operator=(reference_proxy_type const &) noexcept = default;
111  constexpr reference_proxy_type & operator=(reference_proxy_type &&) noexcept = default;
112  ~reference_proxy_type() noexcept = default;
113 
115  reference_proxy_type(std::ranges::range_reference_t<data_type> const & internal) noexcept :
116  internal_proxy{internal}
117  {
118  static_cast<base_t &>(*this).assign_rank(internal);
119  }
121  };
122 
125  //NOTE(h-2): it is entirely unclear to me why we need this
126  template <typename t>
127  requires std::is_same_v<std::ranges::range_value_t<std::remove_cvref_t<t>>, alphabet_type>
128  static constexpr bool has_same_value_type_v = true;
130 
131 public:
139  using value_type = alphabet_type;
145  using reference = reference_proxy_type;
150  using const_reference = alphabet_type;
155  using iterator = detail::random_access_iterator<bitpacked_sequence>;
160  using const_iterator = detail::random_access_iterator<bitpacked_sequence const>;
165  using difference_type = std::ranges::range_difference_t<data_type>;
170  using size_type = std::ranges::range_size_t<data_type>;
172 
174  // this signals to range-v3 that something is a container :|
175  using allocator_type = void;
177 
181  bitpacked_sequence() = default;
182  constexpr bitpacked_sequence(bitpacked_sequence const &) = default;
183  constexpr bitpacked_sequence(bitpacked_sequence &&) = default;
184  constexpr bitpacked_sequence & operator=(bitpacked_sequence const &) = default;
185  constexpr bitpacked_sequence & operator=(bitpacked_sequence &&) = default;
186  ~bitpacked_sequence() = default;
187 
203  template <typename other_range_t>
205  requires (!std::same_as<bitpacked_sequence, std::remove_cvref_t<other_range_t>>) &&
206  std::ranges::input_range<other_range_t> &&
207  has_same_value_type_v<other_range_t>
209  explicit bitpacked_sequence(other_range_t && range) :
210  bitpacked_sequence{std::ranges::begin(range), std::ranges::end(range)}
211  {}
212 
228  data(count, to_rank(value))
229  {}
230 
248  template <std::forward_iterator begin_iterator_type, typename end_iterator_type>
249  bitpacked_sequence(begin_iterator_type begin_it, end_iterator_type end_it)
251  requires std::sentinel_for<end_iterator_type, begin_iterator_type> &&
252  std::common_reference_with<std::iter_value_t<begin_iterator_type>, value_type>
254  {
255  insert(cend(), begin_it, end_it);
256  }
257 
272  bitpacked_sequence(std::begin(ilist), std::end(ilist))
273  {}
274 
289  {
290  assign(std::begin(ilist), std::end(ilist));
291  return *this;
292  }
293 
309  template <std::ranges::input_range other_range_t>
310  void assign(other_range_t && range)
312  requires std::common_reference_with<std::ranges::range_value_t<other_range_t>, value_type>
314  {
315  bitpacked_sequence rhs{std::forward<other_range_t>(range)};
316  swap(rhs);
317  }
318 
333  void assign(size_type const count, value_type const value)
334  {
335  bitpacked_sequence rhs{count, value};
336  swap(rhs);
337  }
338 
356  template <std::forward_iterator begin_iterator_type, typename end_iterator_type>
357  void assign(begin_iterator_type begin_it, end_iterator_type end_it)
359  requires std::sentinel_for<end_iterator_type, begin_iterator_type> &&
360  std::common_reference_with<std::iter_value_t<begin_iterator_type>, value_type>
362  {
363  bitpacked_sequence rhs{begin_it, end_it};
364  swap(rhs);
365  }
366 
381  {
382  assign(std::begin(ilist), std::end(ilist));
383  }
384 
386 
405  iterator begin() noexcept
406  {
407  return iterator{*this};
408  }
409 
411  const_iterator begin() const noexcept
412  {
413  return const_iterator{*this};
414  }
415 
417  const_iterator cbegin() const noexcept
418  {
419  return const_iterator{*this};
420  }
421 
437  iterator end() noexcept
438  {
439  return iterator{*this, size()};
440  }
441 
443  const_iterator end() const noexcept
444  {
445  return const_iterator{*this, size()};
446  }
447 
449  const_iterator cend() const noexcept
450  {
451  return const_iterator{*this, size()};
452  }
454 
474  {
475  if (i >= size()) // [[unlikely]]
476  {
477  throw std::out_of_range{"Trying to access element behind the last in bitpacked_sequence."};
478  }
479  return (*this)[i];
480  }
481 
483  const_reference at(size_type const i) const
484  {
485  if (i >= size()) // [[unlikely]]
486  {
487  throw std::out_of_range{"Trying to access element behind the last in bitpacked_sequence."};
488  }
489  return (*this)[i];
490  }
491 
509  reference operator[](size_type const i) noexcept
510  {
511  assert(i < size());
512  return data[i];
513  }
514 
516  const_reference operator[](size_type const i) const noexcept
517  {
518  assert(i < size());
519  return assign_rank_to(data[i], const_reference{});
520  }
521 
537  reference front() noexcept
538  {
539  assert(size() > 0);
540  return (*this)[0];
541  }
542 
544  const_reference front() const noexcept
545  {
546  assert(size() > 0);
547  return (*this)[0];
548  }
549 
565  reference back() noexcept
566  {
567  assert(size() > 0);
568  return (*this)[size()-1];
569  }
570 
572  const_reference back() const noexcept
573  {
574  assert(size() > 0);
575  return (*this)[size()-1];
576  }
577 
587  constexpr data_type & raw_data() noexcept
588  {
589  return data;
590  }
591 
593  constexpr data_type const & raw_data() const noexcept
594  {
595  return data;
596  }
598 
615  bool empty() const noexcept
616  {
617  return size() == 0;
618  }
619 
633  size_type size() const noexcept
634  {
635  return data.size();
636  }
637 
654  size_type max_size() const noexcept
655  {
656  return data.max_size();
657  }
658 
676  size_type capacity() const noexcept
677  {
678  return data.capacity();
679  }
680 
701  void reserve(size_type const new_cap)
702  {
703  data.reserve(new_cap);
704  }
705 
724  {
725  data.shrink_to_fit();
726  }
728 
745  void clear() noexcept
746  {
747  data.clear();
748  }
749 
771  {
772  return insert(pos, 1, value);
773  }
774 
797  {
798  auto const pos_as_num = std::distance(cbegin(), pos); // we want to insert BEFORE this position
799 
800  data.insert(data.begin() + pos_as_num, count, to_rank(value));
801 
802  return begin() + pos_as_num;
803  }
804 
831  template <std::forward_iterator begin_iterator_type, typename end_iterator_type>
832  iterator insert(const_iterator pos, begin_iterator_type begin_it, end_iterator_type end_it)
834  requires std::sentinel_for<end_iterator_type, begin_iterator_type> &&
835  std::common_reference_with<std::iter_value_t<begin_iterator_type>, value_type>
837  {
838  auto const pos_as_num = std::distance(cbegin(), pos);
839 
840  auto v = std::ranges::subrange<begin_iterator_type, end_iterator_type>{begin_it, end_it}
841  | views::convert<value_type>
842  | views::to_rank;
843  data.insert(data.begin() + pos_as_num, std::ranges::begin(v), std::ranges::end(v));
844 
845  return begin() + pos_as_num;
846  }
847 
869  {
870  return insert(pos, ilist.begin(), ilist.end());
871  }
872 
895  {
896  if (begin_it >= end_it) // [[unlikely]]
897  return begin() + std::distance(cbegin(), end_it);
898 
899  auto const begin_it_pos = std::distance(cbegin(), begin_it);
900  auto const end_it_pos = std::distance(cbegin(), end_it);
901 
902  data.erase(data.cbegin() + begin_it_pos,
903  data.cbegin() + end_it_pos);
904 
905  return begin() + begin_it_pos;
906  }
907 
930  {
931  return erase(pos, pos + 1);
932  }
933 
951  void push_back(value_type const value)
952  {
953  data.push_back(to_rank(value));
954  }
955 
974  void pop_back()
975  {
976  assert(size() > 0);
977  data.pop_back();
978  }
979 
1008  void resize(size_type const count)
1009  {
1010  assert(count < max_size());
1011  data.resize(count);
1012  }
1013 
1018  void resize(size_type const count, value_type const value)
1019  {
1020  assert(count < max_size());
1021  data.resize(count, to_rank(value));
1022  }
1023 
1037  constexpr void swap(bitpacked_sequence & rhs) noexcept
1038  {
1039  std::swap(data, rhs.data);
1040  }
1041 
1043  constexpr void swap(bitpacked_sequence && rhs) noexcept
1044  {
1045  std::swap(data, rhs.data);
1046  }
1047 
1062  friend constexpr void swap(bitpacked_sequence & lhs, bitpacked_sequence & rhs) noexcept
1063  {
1064  std::swap(lhs, rhs);
1065  }
1066 
1068  friend constexpr void swap(bitpacked_sequence && lhs, bitpacked_sequence && rhs) noexcept
1069  {
1070  std::swap(lhs, rhs);
1071  }
1073 
1082  constexpr bool operator==(bitpacked_sequence const & rhs) const noexcept
1083  {
1084  return data == rhs.data;
1085  }
1086 
1091  constexpr bool operator!=(bitpacked_sequence const & rhs) const noexcept
1092  {
1093  return data != rhs.data;
1094  }
1095 
1100  constexpr bool operator<(bitpacked_sequence const & rhs) const noexcept
1101  {
1102  return data < rhs.data;
1103  }
1104 
1109  constexpr bool operator>(bitpacked_sequence const & rhs) const noexcept
1110  {
1111  return data > rhs.data;
1112  }
1113 
1118  constexpr bool operator<=(bitpacked_sequence const & rhs) const noexcept
1119  {
1120  return data <= rhs.data;
1121  }
1122 
1127  constexpr bool operator>=(bitpacked_sequence const & rhs) const noexcept
1128  {
1129  return data >= rhs.data;
1130  }
1132 
1140  template <cereal_archive archive_t>
1141  void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
1142  {
1143  archive(data);
1144  }
1146 };
1147 
1148 #ifdef SEQAN3_DEPRECATED_310
1150 #if SEQAN3_DOXYGEN_ONLY(1)0
1151 template <writable_semialphabet alphabet_type>
1152 struct bitcompressed_vector : public bitpacked_sequence<alphabet_type>
1153 {};
1154 #endif // SEQAN3_DOXYGEN_ONLY(1)0
1155 
1157 template <writable_semialphabet alphabet_type>
1158  requires std::regular<alphabet_type>
1161 #endif // SEQAN3_DEPRECATED_310
1162 
1163 } // namespace seqan3
Provides seqan3::views::to_char.
Provides seqan3::views::to_rank.
Provides seqan3::alphabet_proxy.
T begin(T... args)
Adaptions of concepts from the Cereal library.
A CRTP-base that eases the definition of proxy types returned in place of regular alphabets.
Definition: alphabet_proxy.hpp:66
constexpr auto to_rank() const noexcept
Returns the rank.
Definition: alphabet_proxy.hpp:220
A space-optimised version of std::vector that compresses multiple letters into a single byte.
Definition: bitpacked_sequence.hpp:68
void assign(size_type const count, value_type const value)
Assign with count times value.
Definition: bitpacked_sequence.hpp:333
alphabet_type value_type
Equals the alphabet_type.
Definition: bitpacked_sequence.hpp:139
std::ranges::range_difference_t< data_type > difference_type
A signed integer type (usually std::ptrdiff_t)
Definition: bitpacked_sequence.hpp:165
constexpr bitpacked_sequence(bitpacked_sequence &&)=default
Defaulted.
iterator erase(const_iterator pos)
Removes specified elements from the container.
Definition: bitpacked_sequence.hpp:929
reference_proxy_type reference
A proxy type (models seqan3::writable_semialphabet) that enables assignment, think of it as value_typ...
Definition: bitpacked_sequence.hpp:145
constexpr bool operator>(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is greater than rhs.
Definition: bitpacked_sequence.hpp:1109
size_type max_size() const noexcept
Returns the maximum number of elements the container is able to hold due to system or library impleme...
Definition: bitpacked_sequence.hpp:654
void assign(begin_iterator_type begin_it, end_iterator_type end_it)
Assign from pair of iterators.
Definition: bitpacked_sequence.hpp:357
bool empty() const noexcept
Checks whether the container is empty.
Definition: bitpacked_sequence.hpp:615
reference operator[](size_type const i) noexcept
Return the i-th element.
Definition: bitpacked_sequence.hpp:509
const_reference back() const noexcept
Return the last element.
Definition: bitpacked_sequence.hpp:572
const_iterator cbegin() const noexcept
Returns an iterator to the first element of the container.
Definition: bitpacked_sequence.hpp:417
iterator erase(const_iterator begin_it, const_iterator end_it)
Removes specified elements from the container.
Definition: bitpacked_sequence.hpp:894
iterator insert(const_iterator pos, value_type const value)
Inserts value before position in the container.
Definition: bitpacked_sequence.hpp:770
~bitpacked_sequence()=default
Defaulted.
void resize(size_type const count, value_type const value)
Resizes the container to contain count elements.
Definition: bitpacked_sequence.hpp:1018
void clear() noexcept
Removes all elements from the container.
Definition: bitpacked_sequence.hpp:745
constexpr bool operator<=(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is less than or equal to rhs.
Definition: bitpacked_sequence.hpp:1118
const_iterator end() const noexcept
Returns an iterator to the element following the last element of the container.
Definition: bitpacked_sequence.hpp:443
constexpr bool operator>=(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is greater than or equal to rhs.
Definition: bitpacked_sequence.hpp:1127
constexpr bitpacked_sequence(bitpacked_sequence const &)=default
Defaulted.
void push_back(value_type const value)
Appends the given element value to the end of the container.
Definition: bitpacked_sequence.hpp:951
const_reference at(size_type const i) const
Return the i-th element.
Definition: bitpacked_sequence.hpp:483
constexpr bool operator!=(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is not equal to rhs.
Definition: bitpacked_sequence.hpp:1091
iterator end() noexcept
Returns an iterator to the element following the last element of the container.
Definition: bitpacked_sequence.hpp:437
void assign(std::initializer_list< value_type > ilist)
Assign from std::initializer_list.
Definition: bitpacked_sequence.hpp:380
void reserve(size_type const new_cap)
Increase the capacity to a value that's greater or equal to new_cap.
Definition: bitpacked_sequence.hpp:701
bitpacked_sequence(begin_iterator_type begin_it, end_iterator_type end_it)
Construct from pair of iterators.
Definition: bitpacked_sequence.hpp:249
const_iterator begin() const noexcept
Returns an iterator to the first element of the container.
Definition: bitpacked_sequence.hpp:411
detail::random_access_iterator< bitpacked_sequence const > const_iterator
The const_iterator type of this container (a random access iterator).
Definition: bitpacked_sequence.hpp:160
bitpacked_sequence(std::initializer_list< value_type > ilist)
Construct from std::initializer_list.
Definition: bitpacked_sequence.hpp:271
constexpr bitpacked_sequence & operator=(bitpacked_sequence const &)=default
Defaulted.
void shrink_to_fit()
Requests the removal of unused capacity.
Definition: bitpacked_sequence.hpp:723
const_reference operator[](size_type const i) const noexcept
Return the i-th element.
Definition: bitpacked_sequence.hpp:516
iterator begin() noexcept
Returns an iterator to the first element of the container.
Definition: bitpacked_sequence.hpp:405
std::ranges::range_size_t< data_type > size_type
An unsigned integer type (usually std::size_t)
Definition: bitpacked_sequence.hpp:170
size_type capacity() const noexcept
Returns the number of elements that the container has currently allocated space for.
Definition: bitpacked_sequence.hpp:676
bitpacked_sequence(other_range_t &&range)
Construct from a different range.
Definition: bitpacked_sequence.hpp:209
detail::random_access_iterator< bitpacked_sequence > iterator
The iterator type of this container (a random access iterator).
Definition: bitpacked_sequence.hpp:155
alphabet_type const_reference
Equals the alphabet_type / value_type.
Definition: bitpacked_sequence.hpp:150
iterator insert(const_iterator pos, std::initializer_list< value_type > const &ilist)
Inserts elements from initializer list before position in the container.
Definition: bitpacked_sequence.hpp:868
reference at(size_type const i)
Return the i-th element.
Definition: bitpacked_sequence.hpp:473
iterator insert(const_iterator pos, begin_iterator_type begin_it, end_iterator_type end_it)
Inserts elements from range [begin_it, end_it) before position in the container.
Definition: bitpacked_sequence.hpp:832
constexpr friend void swap(bitpacked_sequence &&lhs, bitpacked_sequence &&rhs) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: bitpacked_sequence.hpp:1068
constexpr friend void swap(bitpacked_sequence &lhs, bitpacked_sequence &rhs) noexcept
Swap contents with another instance.
Definition: bitpacked_sequence.hpp:1062
const_reference front() const noexcept
Return the first element. Calling front on an empty container is undefined.
Definition: bitpacked_sequence.hpp:544
constexpr bool operator==(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is equal to rhs.
Definition: bitpacked_sequence.hpp:1082
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to underlying data structures.
Definition: bitpacked_sequence.hpp:587
const_iterator cend() const noexcept
Returns an iterator to the element following the last element of the container.
Definition: bitpacked_sequence.hpp:449
bitpacked_sequence(size_type const count, value_type const value)
Construct with count times value.
Definition: bitpacked_sequence.hpp:227
void resize(size_type const count)
Resizes the container to contain count elements.
Definition: bitpacked_sequence.hpp:1008
void pop_back()
Removes the last element of the container.
Definition: bitpacked_sequence.hpp:974
reference back() noexcept
Return the last element.
Definition: bitpacked_sequence.hpp:565
iterator insert(const_iterator pos, size_type const count, value_type const value)
Inserts count copies of value before position in the container.
Definition: bitpacked_sequence.hpp:796
bitpacked_sequence()=default
Defaulted.
constexpr void swap(bitpacked_sequence &&rhs) noexcept
Swap contents with another instance.
Definition: bitpacked_sequence.hpp:1043
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to underlying data structures.
Definition: bitpacked_sequence.hpp:593
void assign(other_range_t &&range)
Assign from a different range.
Definition: bitpacked_sequence.hpp:310
constexpr bool operator<(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is less than rhs.
Definition: bitpacked_sequence.hpp:1100
constexpr bitpacked_sequence & operator=(bitpacked_sequence &&)=default
Defaulted.
constexpr void swap(bitpacked_sequence &rhs) noexcept
Swap contents with another instance.
Definition: bitpacked_sequence.hpp:1037
size_type size() const noexcept
Returns the number of elements in the container, i.e. std::distance(begin(), end()).
Definition: bitpacked_sequence.hpp:633
bitpacked_sequence & operator=(std::initializer_list< value_type > ilist)
Assign from std::initializer_list.
Definition: bitpacked_sequence.hpp:288
reference front() noexcept
Return the first element. Calling front on an empty container is undefined.
Definition: bitpacked_sequence.hpp:537
The Concepts library.
Provides the seqan3::detail::random_access_iterator class.
T distance(T... args)
T end(T... args)
constexpr auto assign_rank_to
Assign a rank to an alphabet object.
Definition: concept.hpp:291
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: traits.hpp:169
auto const to_rank
A view that calls seqan3::to_rank() on each element in the input range.
Definition: to_rank.hpp:71
Refines seqan3::alphabet and adds assignability.
Provides C++20 additions to the <iterator> header.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
SeqAn specific customisations in the standard namespace.
#define SEQAN3_DEPRECATED_310
Deprecation message for SeqAn 3.1.0 release.
Definition: platform.hpp:203
Adaptations of concepts from the Ranges TS.
Definition: bitpacked_sequence.hpp:1153
T swap(T... args)
Provides math related functionality.
Provides seqan3::views::convert.