15 #include <sdsl/suffix_trees.hpp> 33 template <
typename index_t>
34 class fm_index_cursor;
36 template <
typename index_t>
37 class bi_fm_index_cursor;
76 sdsl::csa_wt<sdsl::wt_blcd<sdsl::bit_vector,
77 sdsl::rank_support_v<>,
78 sdsl::select_support_scan<>,
79 sdsl::select_support_scan<0>>,
82 sdsl::sa_order_sa_sampling<>,
127 template <
bool is_collection = false, detail::SdslIndex sdsl_index_type_ = default_sdsl_index_type>
163 using size_type =
typename sdsl_index_type::size_type;
169 template <
typename bi_fm_index_t>
172 template <
typename fm_index_t>
175 template <
typename fm_index_t>
176 friend class detail::fm_index_cursor_node;
196 template <std::ranges::Range text_t>
222 template <std::ranges::Range text_t>
230 static_assert(dimension_v<text_t> == 1,
"The input cannot be a text collection.");
236 constexpr
auto cexpr_sigma = alphabet_size<innermost_value_type_t<text_t>>;
243 sdsl::int_vector<8> tmp_text(text.size());
249 if constexpr (cexpr_sigma == 256)
253 "character alphabets the last one/two values are reserved" 254 "(single sequence/collection).");
261 sdsl::construct_im(
index, tmp_text, 0);
270 template <std::ranges::Range text_t>
278 "The elements of the text collection must model BidirectionalRange.");
280 static_assert(dimension_v<text_t> == 2,
"The input must be a text collection.");
289 bool all_empty =
true;
291 for (
auto && t : text)
297 text_size += 1 + t.size();
303 constexpr
auto cexpr_sigma = alphabet_size<innermost_value_type_t<text_t>>;
307 sdsl::bit_vector pos(text_size, 0);
308 size_t prefix_sum{0};
310 for (
auto && t : text)
313 prefix_sum += t.size() + 1;
320 sdsl::int_vector<8> tmp_text(text_size - 1);
322 constexpr uint8_t delimiter = cexpr_sigma >= 255 ? 255 : cexpr_sigma + 1;
330 if constexpr (cexpr_sigma >= 255)
334 " for full character alphabets the last one/" 335 "two values are reserved (single sequence/" 354 sdsl::construct_im(
index, tmp_text, 0);
419 return !(*
this == rhs);
448 template <CerealArchive archive_t>
449 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
469 template <std::ranges::Range text_t>
471 fm_index(text_t &&) -> fm_index<dimension_v<text_t> != 1>;
fm_index(text_t &&) -> fm_index< dimension_v< text_t > !=1 >
Deduces the dimensions of the text.
sdsl::csa_wt< sdsl::wt_blcd< sdsl::bit_vector, sdsl::rank_support_v<>, sdsl::select_support_scan<>, sdsl::select_support_scan< 0 > >, 16, 10000000, sdsl::sa_order_sa_sampling<>, sdsl::isa_sampling<>, sdsl::plain_byte_alphabet > sdsl_wt_index_type
The FM Index Configuration using a Wavelet Tree.
Definition: fm_index.hpp:84
Provides the concepts for seqan3::fm_index and seqan3::bi_fm_index and its traits and cursors...
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: fm_index.hpp:368
Provides various shortcuts for common std::ranges functions.
constexpr auto reverse
A range adaptor that presents the underlying range in reverse order.
Definition: ranges:721
size_t sigma
The alphabet size of the text.
Definition: fm_index.hpp:132
bool empty() const noexcept
Checks whether the index is empty.
Definition: fm_index.hpp:384
Provides an alphabet mapping that implements an identity map (i.e. each character is mapped to its ra...
sdsl_index_type index
Underlying index from the SDSL.
Definition: fm_index.hpp:150
The main SeqAn3 namespace.
sdsl::select_support_sd< 1 > text_begin_ss
Select support for text_begin.
Definition: fm_index.hpp:155
constexpr auto join
Flattens a View of ranges into a View.
Definition: ranges:683
sdsl::rank_support_sd< 1 > text_begin_rs
Rank support for text_begin.
Definition: fm_index.hpp:157
bool operator==(fm_index const &rhs) const noexcept
Compares two indices.
Definition: fm_index.hpp:400
Provides seqan3::view::to_rank.
Specifies requirements of a Range type for which begin returns a type that models std::BidirectionalI...
static constexpr bool is_collection_
Indicates whether index is built over a collection.
Definition: fm_index.hpp:134
void construct(text_t &&text)
Constructs the index given a range. The range cannot be an rvalue (i.e. a temporary object) and has t...
Definition: fm_index.hpp:223
bool operator!=(fm_index const &rhs) const noexcept
Compares two indices.
Definition: fm_index.hpp:417
sdsl_index_type_ sdsl_index_type
The type of the underlying SDSL index.
Definition: fm_index.hpp:140
Adaptations of concepts from the Ranges TS.
fm_index()=default
Default constructor.
typename sdsl_index_type::alphabet_type::sigma_type sdsl_sigma_type
The type of the alphabet size of the underlying SDSL index.
Definition: fm_index.hpp:146
typename innermost_value_type< t >::type innermost_value_type_t
Shortcut for seqan3::innermost_value_type (TransformationTrait shortcut).
Definition: range.hpp:191
::ranges::begin begin
Alias for ranges::begin. Returns an iterator to the beginning of a range.
Definition: ranges:174
::ranges::copy copy
Alias for ranges::copy. Copies a range of elements to a new location.
Definition: algorithm:44
fm_index(text_t &&text)
Constructor that immediately constructs the index given a range. The range cannot be empty...
Definition: fm_index.hpp:197
fm_index & operator=(fm_index const &)=default
Copy assignment.
sdsl_wt_index_type default_sdsl_index_type
The default FM Index Configuration.
Definition: fm_index.hpp:90
The SeqAn Bidirectional FM Index Cursor.
Definition: bi_fm_index_cursor.hpp:54
Byte alphabet that does no mapping of char_type to comp_char_type and vice versa. ...
Definition: csa_alphabet_strategy.hpp:35
cursor_type begin() const noexcept
Returns a seqan3::fm_index_cursor on the index that can be used for searching.
Definition: fm_index.hpp:436
Adaptations of algorithms from the Ranges TS.
typename sdsl_index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: fm_index.hpp:164
sdsl::sd_vector text_begin
Bitvector storing begin positions for collections.
Definition: fm_index.hpp:153
~fm_index()=default
Destructor.
Provides various transformation traits used by the range module.
typename reference< t >::type reference_t
Shortcut for seqan3::reference (TransformationTrait shortcut).
Definition: pre.hpp:77
Provides the seqan3::fm_index_cursor for searching in the unidirectional seqan3::fm_index.
Provides the internal representation of a node of the seqan3::fm_index_cursor.
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:678
typename sdsl_index_type::alphabet_type::char_type sdsl_char_type
The type of the reduced alphabet type. (The reduced alphabet might be smaller than the original alpha...
Definition: fm_index.hpp:144
::ranges::end end
Alias for ranges::end. Returns an iterator to the end of a range.
Definition: ranges:179
constexpr auto transform
A range adaptor that takes a invocable and returns a view of the elements with the invocable applied...
Definition: ranges:911
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:64
auto const to_rank
A view that calls seqan3::to_rank() on each element in the input range.
Definition: to_rank.hpp:68
The SeqAn FM Index.
Definition: fm_index.hpp:128
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...