19#include <sdsl/suffix_trees.hpp>
27namespace seqan3::detail
32struct fm_index_validator
48 template <semialphabet alphabet_t, text_layout text_layout_mode_, std::ranges::range text_t>
49 static void validate(text_t && text)
53 static_assert(std::ranges::bidirectional_range<text_t>,
"The text must model bidirectional_range.");
54 static_assert(std::convertible_to<range_innermost_value_t<text_t>, alphabet_t>,
55 "The alphabet of the text collection must be convertible to the alphabet of the index.");
56 static_assert(range_dimension_v<text_t> == 1,
"The input cannot be a text collection.");
58 if (std::ranges::empty(text))
63 static_assert(std::ranges::bidirectional_range<text_t>,
64 "The text collection must model bidirectional_range.");
65 static_assert(std::ranges::bidirectional_range<std::ranges::range_reference_t<text_t>>,
66 "The elements of the text collection must model bidirectional_range.");
67 static_assert(std::convertible_to<range_innermost_value_t<text_t>, alphabet_t>,
68 "The alphabet of the text collection must be convertible to the alphabet of the index.");
69 static_assert(range_dimension_v<text_t> == 2,
"The input must be a text collection.");
71 if (std::ranges::empty(text))
74 static_assert(alphabet_size<range_innermost_value_t<text_t>> <= 256,
"The alphabet is too big.");
84template <
typename index_t>
87template <
typename index_t>
88class bi_fm_index_cursor;
92template <semialphabet alphabet_t, text_layout text_layout_mode_, detail::sdsl_index sdsl_index_type_>
93class reverse_fm_index;
130 sdsl::csa_wt<sdsl::wt_blcd<sdsl::bit_vector,
131 sdsl::rank_support_v<>,
132 sdsl::select_support_scan<>,
133 sdsl::select_support_scan<0>>,
136 sdsl::sa_order_sa_sampling<>,
137 sdsl::isa_sampling<>,
138 sdsl::plain_byte_alphabet>;
195 using sdsl_index_type = sdsl_index_type_;
199 using sdsl_char_type =
typename sdsl_index_type::alphabet_type::char_type;
201 using sdsl_sigma_type =
typename sdsl_index_type::alphabet_type::sigma_type;
204 friend class detail::reverse_fm_index<alphabet_t, text_layout_mode_, sdsl_index_type_>;
207 sdsl_index_type index;
210 sdsl::sd_vector<> text_begin;
212 sdsl::select_support_sd<1> text_begin_ss;
214 sdsl::rank_support_sd<1> text_begin_rs;
217 template <
typename output_it_t,
typename sequence_t>
218 static output_it_t copy_sequence_ranks_shifted_by_one(output_it_t output_it, sequence_t &&
sequence)
220 constexpr size_t sigma = alphabet_size<alphabet_t>;
223 constexpr auto warn_if_rank_out_of_range = [](uint8_t
const rank)
225 if (rank >= max_sigma - 1)
227 "character alphabets the last one/two values are reserved"
228 "(single sequence/collection).");
233 [&warn_if_rank_out_of_range](
auto const & chr)
236 if constexpr (sigma >= max_sigma)
237 warn_if_rank_out_of_range(rank);
262 template <std::ranges::range text_t>
264 void construct(text_t && text)
266 detail::fm_index_validator::validate<alphabet_t, text_layout_mode_>(text);
273 sdsl::int_vector<8> tmp_text(std::ranges::distance(text));
276 copy_sequence_ranks_shifted_by_one(
std::ranges::begin(tmp_text), text | std::views::reverse);
278 sdsl::construct_im(index, tmp_text, 0);
287 template <std::ranges::range text_t>
289 void construct(text_t && text,
bool reverse =
false)
291 detail::fm_index_validator::validate<alphabet_t, text_layout_mode_>(text);
295 for (
auto && t : text)
296 text_sizes.
push_back(std::ranges::distance(t));
298 size_t const number_of_texts{text_sizes.
size()};
303 if (number_of_texts == text_size)
306 constexpr auto sigma = alphabet_size<alphabet_t>;
311 sdsl::sd_vector_builder builder(text_size, number_of_texts);
312 size_t prefix_sum{0};
314 for (
auto &&
size : text_sizes)
316 builder.set(prefix_sum);
317 prefix_sum +=
size + 1;
320 text_begin = sdsl::sd_vector<>(builder);
321 text_begin_ss = sdsl::select_support_sd<1>(&text_begin);
322 text_begin_rs = sdsl::rank_support_sd<1>(&text_begin);
325 sdsl::int_vector<8> tmp_text(text_size - (number_of_texts > 1));
327 constexpr uint8_t delimiter = sigma >= 255 ? 255 : sigma + 1;
329 auto copy_join_with = [](
auto output_it,
auto &&
collection)
333 auto const collection_sentinel = std::ranges::end(
collection);
334 if (collection_it == collection_sentinel)
337 output_it = copy_sequence_ranks_shifted_by_one(output_it, *collection_it);
340 for (; collection_it != collection_sentinel; ++collection_it)
342 *output_it = delimiter;
344 output_it = copy_sequence_ranks_shifted_by_one(output_it, *collection_it);
354 if (number_of_texts == 1)
355 tmp_text.back() = delimiter;
364 if (number_of_texts == 1)
367 tmp_text.back() = delimiter;
375 sdsl::construct_im(index, tmp_text, 0);
393 template <
typename bi_fm_index_t>
396 template <
typename fm_index_t>
399 template <
typename fm_index_t>
400 friend class detail::fm_index_cursor_node;
410 text_begin{rhs.text_begin},
411 text_begin_ss{rhs.text_begin_ss},
412 text_begin_rs{rhs.text_begin_rs}
414 text_begin_ss.set_vector(&text_begin);
415 text_begin_rs.set_vector(&text_begin);
420 index{
std::move(rhs.index)},
421 text_begin{
std::move(rhs.text_begin)},
422 text_begin_ss{
std::move(rhs.text_begin_ss)},
423 text_begin_rs{
std::move(rhs.text_begin_rs)}
425 text_begin_ss.set_vector(&text_begin);
426 text_begin_rs.set_vector(&text_begin);
432 index = std::move(rhs.index);
433 text_begin = std::move(rhs.text_begin);
434 text_begin_ss = std::move(rhs.text_begin_ss);
435 text_begin_rs = std::move(rhs.text_begin_rs);
437 text_begin_ss.set_vector(&text_begin);
438 text_begin_rs.set_vector(&text_begin);
453 template <std::ranges::b
idirectional_range text_t>
456 construct(std::forward<text_t>(text));
506 return (index == rhs.index) && (text_begin == rhs.text_begin);
522 return !(*
this == rhs);
551 template <cereal_archive archive_t>
552 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
556 archive(text_begin_ss);
557 text_begin_ss.set_vector(&text_begin);
558 archive(text_begin_rs);
559 text_begin_rs.set_vector(&text_begin);
561 auto sigma = alphabet_size<alphabet_t>;
563 if (sigma != alphabet_size<alphabet_t>)
566 +
" but it is being read into an fm_index with an alphabet of size "
575 + (tmp ?
"text collection" :
"single text")
576 +
" but it is being read into an fm_index expecting a "
587template <std::ranges::range text_t>
592namespace seqan3::detail
611class reverse_fm_index :
public fm_index<alphabet_t, text_layout_mode, sdsl_index_type>
615 template <std::ranges::range text_t>
616 void construct_(text_t && text)
620 auto reverse_text = text | std::views::reverse;
621 this->construct(reverse_text);
625 auto reverse_text = text | views::deep{std::views::reverse} | std::views::reverse;
626 this->construct(reverse_text,
true);
634 template <std::ranges::b
idirectional_range text_t>
635 explicit reverse_fm_index(text_t && text)
637 construct_(std::forward<text_t>(text));
The SeqAn Bidirectional FM Index Cursor.
Definition: bi_fm_index_cursor.hpp:55
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:87
The SeqAn FM Index.
Definition: fm_index.hpp:189
fm_index & operator=(fm_index rhs)
When copy/move assigning, also update internal data structures.
Definition: fm_index.hpp:430
fm_index()=default
Defaulted.
static constexpr text_layout text_layout_mode
Indicates whether index is built over a collection.
Definition: fm_index.hpp:380
cursor_type cursor() const noexcept
Returns a seqan3::fm_index_cursor on the index that can be used for searching. .
Definition: fm_index.hpp:539
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: fm_index.hpp:471
bool operator!=(fm_index const &rhs) const noexcept
Compares two indices.
Definition: fm_index.hpp:520
bool empty() const noexcept
Checks whether the index is empty.
Definition: fm_index.hpp:487
bool operator==(fm_index const &rhs) const noexcept
Compares two indices.
Definition: fm_index.hpp:503
~fm_index()=default
Defaulted.
alphabet_t alphabet_type
The type of the underlying character of the indexed text.
Definition: fm_index.hpp:386
fm_index(fm_index &&rhs)
When move constructing, also update internal data structures.
Definition: fm_index.hpp:419
typename sdsl_index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: fm_index.hpp:388
fm_index(text_t &&text)
Constructor that immediately constructs the index given a range. The range cannot be empty.
Definition: fm_index.hpp:454
fm_index(fm_index const &rhs)
When copy constructing, also update internal data structures.
Definition: fm_index.hpp:408
Provides various transformation traits used by the range module.
Provides the internal representation of a node of the seqan3::fm_index_cursor.
Provides the seqan3::fm_index_cursor for searching in the unidirectional seqan3::fm_index.
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
text_layout
The possible text layouts (single, collection) the seqan3::fm_index and seqan3::bi_fm_index can suppo...
Definition: concept.hpp:91
sdsl_wt_index_type default_sdsl_index_type
The default FM Index Configuration.
Definition: fm_index.hpp:145
sdsl::csa_wt< sdsl::wt_blcd< sdsl::bit_vector, sdsl::rank_support_v<>, sdsl::select_support_scan<>, sdsl::select_support_scan< 0 > >, 16, 10 '000 '000, sdsl::sa_order_sa_sampling<>, sdsl::isa_sampling<>, sdsl::plain_byte_alphabet > sdsl_wt_index_type
The FM Index Configuration using a Wavelet Tree.
Definition: fm_index.hpp:138
@ single
The text is a single range.
Definition: concept.hpp:93
@ collection
The text is a range of ranges.
Definition: concept.hpp:95
The basis for seqan3::alphabet, but requires only rank interface (not char).
The generic concept for a (biological) sequence.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
fm_index(text_t &&) -> fm_index< range_innermost_value_t< text_t >, text_layout
Deduces the alphabet and dimensions of the text.
Definition: fm_index.hpp:588
SeqAn specific customisations in the standard namespace.
Provides the concept for seqan3::detail::sdsl_index.
Provides seqan3::views::to_rank.