SeqAn3 3.4.2-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
bitpacked_sequence.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2026 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2026 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <concepts>
13#include <iterator>
14#include <ranges>
15#include <type_traits>
16
20#include <seqan3/contrib/sdsl-lite.hpp>
25
26namespace seqan3
27{
28
59template <writable_semialphabet alphabet_type>
60 requires std::regular<alphabet_type>
62{
63private:
65 static constexpr size_t bits_per_letter = detail::ceil_log2(alphabet_size<alphabet_type>);
66
67 static_assert(bits_per_letter <= 64, "alphabet must be representable in at most 64bit.");
68
70 using data_type = seqan3::contrib::sdsl::int_vector<bits_per_letter>;
71
73 data_type data;
74
76 class reference_proxy_type : public alphabet_proxy<reference_proxy_type, alphabet_type>
77 {
78 private:
82 friend base_t;
83
85 std::ranges::range_reference_t<data_type> internal_proxy;
86
88 constexpr void on_update() noexcept
89 {
90 // Alternative patch in the SDSL.
91 // The problem is that, in the Fedora build type, there is a warning about accessing lo_set[65],
92 // which is out of bounds (size is 64).
93 // However, during runtime, lo_set[65] is never actually accessed (bogus warning).
94 // Making `len > 64` undefined behaviour resolves the warning.
95 // Note that the SDSL uses an older C++ standard and has not `std::unreachable()` available.
96 // Also, `__builtin_unreachable()` is only valid for GCC and Clang. The SDSL also supports MSVC,
97 // where there is an equivalent but different builtin for that.
98 // The diff is for the amalgamated seqan3/contrib/sdsl-lite.hpp; but the patch should be applied upstream.
99 // ```diff
100 // diff --git a/include/seqan3/contrib/sdsl-lite.hpp b/include/seqan3/contrib/sdsl-lite.hpp
101 // index a82da4ac2..b59628a56 100644
102 // --- a/include/seqan3/contrib/sdsl-lite.hpp
103 // +++ b/include/seqan3/contrib/sdsl-lite.hpp
104 // @@ -510,6 +510,8 @@ constexpr uint32_t bits_impl<T>::sel11(uint64_t x, uint32_t i, uint32_t c)
105 // template <typename T>
106 // constexpr void bits_impl<T>::write_int(uint64_t * word, uint64_t x, uint8_t offset, const uint8_t len)
107 // {
108 // + if (len > 64)
109 // + __builtin_unreachable();
110 // x &= bits_impl<T>::lo_set[len];
111 // if (offset + len < 64)
112 // {
113 // ```
115 internal_proxy = base_t::to_rank();
117 }
118
119 public:
120 // Import from base:
121 using base_t::operator=;
122
127 reference_proxy_type() = delete;
128 constexpr reference_proxy_type(reference_proxy_type const &) noexcept = default;
129 constexpr reference_proxy_type(reference_proxy_type &&) noexcept = default;
130 constexpr reference_proxy_type & operator=(reference_proxy_type const &) noexcept = default;
131 constexpr reference_proxy_type & operator=(reference_proxy_type &&) noexcept = default;
132 ~reference_proxy_type() noexcept = default;
133
135 reference_proxy_type(std::ranges::range_reference_t<data_type> const internal) noexcept :
136 internal_proxy{internal}
137 {
138 // Call alphabet_base's assign_rank to prevent calling on_update() during construction
139 // which is not necessary, because internal_proxy is already correctly initialised!
141 }
143 };
144
147 //NOTE(h-2): it is entirely unclear to me why we need this
148 template <typename t>
149 requires std::is_same_v<std::ranges::range_value_t<std::remove_cvref_t<t>>, alphabet_type>
150 static constexpr bool has_same_value_type_v = true;
152
153public:
161 using value_type = alphabet_type;
167 using reference = reference_proxy_type;
172 using const_reference = alphabet_type;
177 using iterator = detail::random_access_iterator<bitpacked_sequence>;
182 using const_iterator = detail::random_access_iterator<bitpacked_sequence const>;
187 using difference_type = std::ranges::range_difference_t<data_type>;
192 using size_type = std::ranges::range_size_t<data_type>;
194
198 bitpacked_sequence() = default;
199 constexpr bitpacked_sequence(bitpacked_sequence const &) = default;
200 constexpr bitpacked_sequence(bitpacked_sequence &&) = default;
201 constexpr bitpacked_sequence & operator=(bitpacked_sequence const &) = default;
204
220 template <typename other_range_t>
221 requires (!std::same_as<bitpacked_sequence, std::remove_cvref_t<other_range_t>>)
222 && std::ranges::input_range<other_range_t> && has_same_value_type_v<other_range_t>
223 explicit bitpacked_sequence(other_range_t && range) :
224 bitpacked_sequence{std::ranges::begin(range), std::ranges::end(range)}
225 {}
226
241 bitpacked_sequence(size_type const count, value_type const value) : data(count, to_rank(value))
242 {}
243
261 template <std::forward_iterator begin_iterator_type, typename end_iterator_type>
262 bitpacked_sequence(begin_iterator_type begin_it, end_iterator_type end_it)
263 requires std::sentinel_for<end_iterator_type, begin_iterator_type>
264 && std::common_reference_with<std::iter_value_t<begin_iterator_type>, value_type>
265 {
266 insert(cend(), begin_it, end_it);
267 }
268
284
299 {
300 assign(std::begin(ilist), std::end(ilist));
301 return *this;
302 }
303
319 template <std::ranges::input_range other_range_t>
320 void assign(other_range_t && range)
321 requires std::common_reference_with<std::ranges::range_value_t<other_range_t>, value_type>
322 {
323 bitpacked_sequence rhs{std::forward<other_range_t>(range)};
324 swap(rhs);
325 }
326
341 void assign(size_type const count, value_type const value)
342 {
343 bitpacked_sequence rhs{count, value};
344 swap(rhs);
345 }
346
364 template <std::forward_iterator begin_iterator_type, typename end_iterator_type>
365 void assign(begin_iterator_type begin_it, end_iterator_type end_it)
366 requires std::sentinel_for<end_iterator_type, begin_iterator_type>
367 && std::common_reference_with<std::iter_value_t<begin_iterator_type>, value_type>
368 {
369 bitpacked_sequence rhs{begin_it, end_it};
370 swap(rhs);
371 }
372
387 {
388 assign(std::begin(ilist), std::end(ilist));
389 }
390
392
411 iterator begin() noexcept
412 {
413 return iterator{*this};
414 }
415
417 const_iterator begin() const noexcept
418 {
419 return const_iterator{*this};
420 }
421
423 const_iterator cbegin() const noexcept
424 {
425 return const_iterator{*this};
426 }
427
443 iterator end() noexcept
444 {
445 return iterator{*this, size()};
446 }
447
449 const_iterator end() const noexcept
450 {
451 return const_iterator{*this, size()};
452 }
453
455 const_iterator cend() const noexcept
456 {
457 return const_iterator{*this, size()};
458 }
460
480 {
481 if (i >= size()) // [[unlikely]]
482 {
483 throw std::out_of_range{"Trying to access element behind the last in bitpacked_sequence."};
484 }
485 return (*this)[i];
486 }
487
490 {
491 if (i >= size()) // [[unlikely]]
492 {
493 throw std::out_of_range{"Trying to access element behind the last in bitpacked_sequence."};
494 }
495 return (*this)[i];
496 }
497
516 {
517 assert(i < size());
518 return data[i];
519 }
520
522 const_reference operator[](size_type const i) const noexcept
523 {
524 assert(i < size());
525 return assign_rank_to(data[i], const_reference{});
526 }
527
543 reference front() noexcept
544 {
545 assert(size() > 0);
546 return (*this)[0];
547 }
548
550 const_reference front() const noexcept
551 {
552 assert(size() > 0);
553 return (*this)[0];
554 }
555
571 reference back() noexcept
572 {
573 assert(size() > 0);
574 return (*this)[size() - 1];
575 }
576
578 const_reference back() const noexcept
579 {
580 assert(size() > 0);
581 return (*this)[size() - 1];
582 }
583
593 constexpr data_type & raw_data() noexcept
594 {
595 return data;
596 }
597
599 constexpr data_type const & raw_data() const noexcept
600 {
601 return data;
602 }
604
621 bool empty() const noexcept
622 {
623 return size() == 0;
624 }
625
639 size_type size() const noexcept
640 {
641 return data.size();
642 }
643
660 size_type max_size() const noexcept
661 {
662 return data.max_size();
663 }
664
678 size_type capacity() const noexcept
679 {
680 return data.capacity();
681 }
682
703 void reserve(size_type const new_cap)
704 {
705 data.reserve(new_cap);
706 }
707
726 {
727 data.shrink_to_fit();
728 }
730
746 void clear() noexcept
747 {
748 data.clear();
749 }
750
772 {
773 return insert(pos, 1, value);
774 }
775
797 iterator insert(const_iterator pos, size_type const count, value_type const value)
798 {
799 auto const pos_as_num = std::distance(cbegin(), pos); // we want to insert BEFORE this position
800
801 data.insert(data.begin() + pos_as_num, count, to_rank(value));
802
803 return begin() + pos_as_num;
804 }
805
832 template <std::forward_iterator begin_iterator_type, typename end_iterator_type>
833 iterator insert(const_iterator pos, begin_iterator_type begin_it, end_iterator_type end_it)
834 requires std::sentinel_for<end_iterator_type, begin_iterator_type>
835 && std::common_reference_with<std::iter_value_t<begin_iterator_type>, value_type>
836 {
837 auto const pos_as_num = std::distance(cbegin(), pos);
838
839 auto v = std::ranges::subrange<begin_iterator_type, end_iterator_type>{begin_it, end_it}
840 | views::convert<value_type> | views::to_rank;
841 data.insert(data.begin() + pos_as_num, std::ranges::begin(v), std::ranges::end(v));
842
843 return begin() + pos_as_num;
844 }
845
867 {
868 return insert(pos, ilist.begin(), ilist.end());
869 }
870
893 {
894 if (begin_it >= end_it) // [[unlikely]]
895 return begin() + std::distance(cbegin(), end_it);
896
897 auto const begin_it_pos = std::distance(cbegin(), begin_it);
898 auto const end_it_pos = std::distance(cbegin(), end_it);
899
900 data.erase(data.cbegin() + begin_it_pos, data.cbegin() + end_it_pos);
901
902 return begin() + begin_it_pos;
903 }
904
927 {
928 return erase(pos, pos + 1);
929 }
930
948 void push_back(value_type const value)
949 {
950 data.push_back(to_rank(value));
951 }
952
971 void pop_back()
972 {
973 assert(size() > 0);
974 data.pop_back();
975 }
976
1005 void resize(size_type const count)
1006 {
1007 assert(count < max_size());
1008 data.resize(count);
1009 }
1010
1015 void resize(size_type const count, value_type const value)
1016 {
1017 assert(count < max_size());
1018 data.resize(count, to_rank(value));
1019 }
1020
1034 constexpr void swap(bitpacked_sequence & rhs) noexcept
1035 {
1036 std::swap(data, rhs.data);
1037 }
1038
1040 constexpr void swap(bitpacked_sequence && rhs) noexcept
1041 {
1042 std::swap(data, rhs.data);
1043 }
1044
1059 friend constexpr void swap(bitpacked_sequence & lhs, bitpacked_sequence & rhs) noexcept
1060 {
1061 std::swap(lhs, rhs);
1062 }
1063
1065 friend constexpr void swap(bitpacked_sequence && lhs, bitpacked_sequence && rhs) noexcept
1066 {
1067 std::swap(lhs, rhs);
1068 }
1070
1079 constexpr bool operator==(bitpacked_sequence const & rhs) const noexcept
1080 {
1081 return data == rhs.data;
1082 }
1083
1088 constexpr bool operator!=(bitpacked_sequence const & rhs) const noexcept
1089 {
1090 return data != rhs.data;
1091 }
1092
1097 constexpr bool operator<(bitpacked_sequence const & rhs) const noexcept
1098 {
1099 return data < rhs.data;
1100 }
1101
1106 constexpr bool operator>(bitpacked_sequence const & rhs) const noexcept
1107 {
1108 return data > rhs.data;
1109 }
1110
1115 constexpr bool operator<=(bitpacked_sequence const & rhs) const noexcept
1116 {
1117 return data <= rhs.data;
1118 }
1119
1124 constexpr bool operator>=(bitpacked_sequence const & rhs) const noexcept
1125 {
1126 return data >= rhs.data;
1127 }
1129
1137 template <cereal_archive archive_t>
1138 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
1139 {
1140 archive(data);
1141 }
1143};
1144
1145} // namespace seqan3
Provides seqan3::alphabet_proxy.
T begin(T... args)
Adaptions of concepts from the Cereal library.
A CRTP-base that eases the definition of proxy types returned in place of regular alphabets.
Definition alphabet_proxy.hpp:62
constexpr reference_proxy_type & assign_rank(alphabet_rank_t< alphabet_type > const r) noexcept
Assigns a rank.
Definition alphabet_proxy.hpp:142
constexpr auto to_rank() const noexcept
Returns the rank.
Definition alphabet_proxy.hpp:204
A space-optimised version of std::vector that compresses multiple letters into a single byte.
Definition bitpacked_sequence.hpp:62
void assign(size_type const count, value_type const value)
Assign with count times value.
Definition bitpacked_sequence.hpp:341
alphabet_type value_type
Equals the alphabet_type.
Definition bitpacked_sequence.hpp:161
std::ranges::range_difference_t< data_type > difference_type
A signed integer type (usually std::ptrdiff_t)
Definition bitpacked_sequence.hpp:187
iterator insert(const_iterator pos, begin_iterator_type begin_it, end_iterator_type end_it)
Inserts elements from range [begin_it, end_it) before position in the container.
Definition bitpacked_sequence.hpp:833
constexpr bitpacked_sequence(bitpacked_sequence &&)=default
Defaulted.
iterator erase(const_iterator pos)
Removes specified elements from the container.
Definition bitpacked_sequence.hpp:926
reference_proxy_type reference
A proxy type (models seqan3::writable_semialphabet) that enables assignment, think of it as value_typ...
Definition bitpacked_sequence.hpp:167
constexpr bool operator>(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is greater than rhs.
Definition bitpacked_sequence.hpp:1106
size_type max_size() const noexcept
Returns the maximum number of elements the container is able to hold due to system or library impleme...
Definition bitpacked_sequence.hpp:660
bool empty() const noexcept
Checks whether the container is empty.
Definition bitpacked_sequence.hpp:621
reference operator[](size_type const i) noexcept
Return the i-th element.
Definition bitpacked_sequence.hpp:515
const_reference back() const noexcept
Return the last element.
Definition bitpacked_sequence.hpp:578
const_iterator cbegin() const noexcept
Returns an iterator to the first element of the container.
Definition bitpacked_sequence.hpp:423
iterator erase(const_iterator begin_it, const_iterator end_it)
Removes specified elements from the container.
Definition bitpacked_sequence.hpp:892
iterator insert(const_iterator pos, value_type const value)
Inserts value before position in the container.
Definition bitpacked_sequence.hpp:771
~bitpacked_sequence()=default
Defaulted.
void resize(size_type const count, value_type const value)
Resizes the container to contain count elements.
Definition bitpacked_sequence.hpp:1015
void clear() noexcept
Removes all elements from the container.
Definition bitpacked_sequence.hpp:746
constexpr bool operator<=(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is less than or equal to rhs.
Definition bitpacked_sequence.hpp:1115
const_iterator end() const noexcept
Returns an iterator to the element following the last element of the container.
Definition bitpacked_sequence.hpp:449
constexpr bool operator>=(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is greater than or equal to rhs.
Definition bitpacked_sequence.hpp:1124
constexpr bitpacked_sequence(bitpacked_sequence const &)=default
Defaulted.
void push_back(value_type const value)
Appends the given element value to the end of the container.
Definition bitpacked_sequence.hpp:948
const_reference at(size_type const i) const
Return the i-th element.
Definition bitpacked_sequence.hpp:489
constexpr bool operator!=(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is not equal to rhs.
Definition bitpacked_sequence.hpp:1088
iterator end() noexcept
Returns an iterator to the element following the last element of the container.
Definition bitpacked_sequence.hpp:443
void assign(std::initializer_list< value_type > ilist)
Assign from std::initializer_list.
Definition bitpacked_sequence.hpp:386
void reserve(size_type const new_cap)
Increase the capacity to a value that's greater or equal to new_cap.
Definition bitpacked_sequence.hpp:703
const_iterator begin() const noexcept
Returns an iterator to the first element of the container.
Definition bitpacked_sequence.hpp:417
detail::random_access_iterator< bitpacked_sequence const > const_iterator
The const_iterator type of this container (a random access iterator).
Definition bitpacked_sequence.hpp:182
bitpacked_sequence(std::initializer_list< value_type > ilist)
Construct from std::initializer_list.
Definition bitpacked_sequence.hpp:282
void shrink_to_fit()
Requests the removal of unused capacity.
Definition bitpacked_sequence.hpp:725
const_reference operator[](size_type const i) const noexcept
Return the i-th element.
Definition bitpacked_sequence.hpp:522
iterator begin() noexcept
Returns an iterator to the first element of the container.
Definition bitpacked_sequence.hpp:411
std::ranges::range_size_t< data_type > size_type
An unsigned integer type (usually std::size_t)
Definition bitpacked_sequence.hpp:192
size_type capacity() const noexcept
Returns the number of elements that the container has currently allocated space for.
Definition bitpacked_sequence.hpp:678
void assign(other_range_t &&range)
Assign from a different range.
Definition bitpacked_sequence.hpp:320
detail::random_access_iterator< bitpacked_sequence > iterator
The iterator type of this container (a random access iterator).
Definition bitpacked_sequence.hpp:177
alphabet_type const_reference
Equals the alphabet_type / value_type.
Definition bitpacked_sequence.hpp:172
iterator insert(const_iterator pos, std::initializer_list< value_type > const &ilist)
Inserts elements from initializer list before position in the container.
Definition bitpacked_sequence.hpp:866
reference at(size_type const i)
Return the i-th element.
Definition bitpacked_sequence.hpp:479
bitpacked_sequence(begin_iterator_type begin_it, end_iterator_type end_it)
Construct from pair of iterators.
Definition bitpacked_sequence.hpp:262
friend constexpr void swap(bitpacked_sequence &lhs, bitpacked_sequence &rhs) noexcept
Swap contents with another instance.
Definition bitpacked_sequence.hpp:1059
const_reference front() const noexcept
Return the first element. Calling front on an empty container is undefined.
Definition bitpacked_sequence.hpp:550
constexpr bitpacked_sequence & operator=(bitpacked_sequence const &)=default
Defaulted.
constexpr data_type const & raw_data() const noexcept
Provides direct, unsafe access to underlying data structures.
Definition bitpacked_sequence.hpp:599
constexpr bool operator==(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is equal to rhs.
Definition bitpacked_sequence.hpp:1079
friend constexpr void swap(bitpacked_sequence &&lhs, bitpacked_sequence &&rhs) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition bitpacked_sequence.hpp:1065
const_iterator cend() const noexcept
Returns an iterator to the element following the last element of the container.
Definition bitpacked_sequence.hpp:455
bitpacked_sequence(size_type const count, value_type const value)
Construct with count times value.
Definition bitpacked_sequence.hpp:241
bitpacked_sequence & operator=(std::initializer_list< value_type > ilist)
Assign from std::initializer_list.
Definition bitpacked_sequence.hpp:298
void resize(size_type const count)
Resizes the container to contain count elements.
Definition bitpacked_sequence.hpp:1005
void pop_back()
Removes the last element of the container.
Definition bitpacked_sequence.hpp:971
reference back() noexcept
Return the last element.
Definition bitpacked_sequence.hpp:571
iterator insert(const_iterator pos, size_type const count, value_type const value)
Inserts count copies of value before position in the container.
Definition bitpacked_sequence.hpp:797
bitpacked_sequence()=default
Defaulted.
constexpr void swap(bitpacked_sequence &&rhs) noexcept
Swap contents with another instance.
Definition bitpacked_sequence.hpp:1040
bitpacked_sequence(other_range_t &&range)
Construct from a different range.
Definition bitpacked_sequence.hpp:223
constexpr data_type & raw_data() noexcept
Provides direct, unsafe access to underlying data structures.
Definition bitpacked_sequence.hpp:593
constexpr bool operator<(bitpacked_sequence const &rhs) const noexcept
Checks whether *this is less than rhs.
Definition bitpacked_sequence.hpp:1097
constexpr void swap(bitpacked_sequence &rhs) noexcept
Swap contents with another instance.
Definition bitpacked_sequence.hpp:1034
size_type size() const noexcept
Returns the number of elements in the container, i.e. std::distance(begin(), end()).
Definition bitpacked_sequence.hpp:639
reference front() noexcept
Return the first element. Calling front on an empty container is undefined.
Definition bitpacked_sequence.hpp:543
constexpr bitpacked_sequence & operator=(bitpacked_sequence &&)=default
Defaulted.
void assign(begin_iterator_type begin_it, end_iterator_type end_it)
Assign from pair of iterators.
Definition bitpacked_sequence.hpp:365
T distance(T... args)
T end(T... args)
auto const to_rank
A view that calls seqan3::to_rank() on each element in the input range.
Definition to_rank.hpp:63
decltype(seqan3::to_rank(std::declval< semi_alphabet_type >())) alphabet_rank_t
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition alphabet/concept.hpp:164
constexpr auto assign_rank_to
Assign a rank to an alphabet object.
Definition alphabet/concept.hpp:288
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition alphabet/concept.hpp:152
Refines seqan3::alphabet and adds assignability.
Provides math related functionality.
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
SeqAn specific customisations in the standard namespace.
#define SEQAN3_WORKAROUND_GCC_BOGUS_MEMCPY_START(...)
Denotes the start of a block where diagnostics are ignored.
Definition platform.hpp:268
#define SEQAN3_WORKAROUND_GCC_BOGUS_MEMCPY_STOP
Denotes the end of a block where diagnostics are ignored.
Definition platform.hpp:277
Provides the seqan3::detail::random_access_iterator class.
T swap(T... args)
Provides seqan3::views::to_char.
Provides seqan3::views::to_rank.
Provides seqan3::views::convert.
Hide me