17#include <seqan3/contrib/sdsl-lite.hpp>
23namespace seqan3::detail
28struct fm_index_validator
44 template <semialphabet alphabet_t, text_layout text_layout_mode_, std::ranges::range text_t>
45 static void validate(text_t && text)
49 static_assert(std::ranges::bidirectional_range<text_t>,
"The text must model bidirectional_range.");
50 static_assert(std::convertible_to<range_innermost_value_t<text_t>, alphabet_t>,
51 "The alphabet of the text collection must be convertible to the alphabet of the index.");
52 static_assert(range_dimension_v<text_t> == 1,
"The input cannot be a text collection.");
54 if (std::ranges::empty(text))
59 static_assert(std::ranges::bidirectional_range<text_t>,
60 "The text collection must model bidirectional_range.");
61 static_assert(std::ranges::bidirectional_range<std::ranges::range_reference_t<text_t>>,
62 "The elements of the text collection must model bidirectional_range.");
63 static_assert(std::convertible_to<range_innermost_value_t<text_t>, alphabet_t>,
64 "The alphabet of the text collection must be convertible to the alphabet of the index.");
65 static_assert(range_dimension_v<text_t> == 2,
"The input must be a text collection.");
67 if (std::ranges::empty(text))
70 static_assert(alphabet_size<range_innermost_value_t<text_t>> <= 256,
"The alphabet is too big.");
80template <
typename index_t>
83template <
typename index_t>
84class bi_fm_index_cursor;
88template <semialphabet alphabet_t, text_layout text_layout_mode_, detail::sdsl_index sdsl_index_type_>
89class reverse_fm_index;
126 seqan3::contrib::sdsl::wt_blcd<seqan3::contrib::sdsl::bit_vector,
127 seqan3::contrib::sdsl::rank_support_v<>,
128 seqan3::contrib::sdsl::select_support_scan<>,
129 seqan3::contrib::sdsl::select_support_scan<0>>,
132 seqan3::contrib::sdsl::sa_order_sa_sampling<>,
133 seqan3::contrib::sdsl::isa_sampling<>,
134 seqan3::contrib::sdsl::plain_byte_alphabet>;
191 using sdsl_index_type = sdsl_index_type_;
195 using sdsl_char_type =
typename sdsl_index_type::alphabet_type::char_type;
197 using sdsl_sigma_type =
typename sdsl_index_type::alphabet_type::sigma_type;
200 friend class detail::reverse_fm_index<alphabet_t,
text_layout_mode_, sdsl_index_type_>;
203 sdsl_index_type index;
206 seqan3::contrib::sdsl::sd_vector<> text_begin;
208 seqan3::contrib::sdsl::select_support_sd<1> text_begin_ss;
210 seqan3::contrib::sdsl::rank_support_sd<1> text_begin_rs;
213 template <
typename output_it_t,
typename sequence_t>
223 "character alphabets the last one/two values are reserved"
224 "(single sequence/collection).");
260 template <std::ranges::range text_t>
264 detail::fm_index_validator::validate<alphabet_t, text_layout_mode_>(
text);
271 seqan3::contrib::sdsl::int_vector<8>
tmp_text(std::ranges::distance(
text));
276 seqan3::contrib::sdsl::construct_im(index,
tmp_text, 0);
285 template <std::ranges::range text_t>
287 void construct(
text_t &&
text,
bool reverse =
false)
289 detail::fm_index_validator::validate<alphabet_t, text_layout_mode_>(
text);
293 for (
auto && t :
text)
294 text_sizes.push_back(std::ranges::distance(t));
318 text_begin = seqan3::contrib::sdsl::sd_vector<>(
builder);
319 text_begin_ss = seqan3::contrib::sdsl::select_support_sd<1>(&text_begin);
320 text_begin_rs = seqan3::contrib::sdsl::rank_support_sd<1>(&text_begin);
325 constexpr uint8_t
delimiter = sigma >= 255 ? 255 : sigma + 1;
373 seqan3::contrib::sdsl::construct_im(index,
tmp_text, 0);
391 template <
typename bi_fm_index_t>
394 template <
typename fm_index_t>
397 template <
typename fm_index_t>
398 friend struct detail::fm_index_cursor_node;
408 text_begin{
rhs.text_begin},
409 text_begin_ss{
rhs.text_begin_ss},
410 text_begin_rs{
rhs.text_begin_rs}
412 text_begin_ss.set_vector(&text_begin);
413 text_begin_rs.set_vector(&text_begin);
418 index{
std::move(
rhs.index)},
419 text_begin{
std::move(
rhs.text_begin)},
420 text_begin_ss{
std::move(
rhs.text_begin_ss)},
421 text_begin_rs{
std::move(
rhs.text_begin_rs)}
423 text_begin_ss.set_vector(&text_begin);
424 text_begin_rs.set_vector(&text_begin);
430 index = std::move(
rhs.index);
431 text_begin = std::move(
rhs.text_begin);
432 text_begin_ss = std::move(
rhs.text_begin_ss);
433 text_begin_rs = std::move(
rhs.text_begin_rs);
435 text_begin_ss.set_vector(&text_begin);
436 text_begin_rs.set_vector(&text_begin);
451 template <std::ranges::b
idirectional_range text_t>
454 construct(std::forward<text_t>(
text));
504 return (index ==
rhs.index) && (text_begin ==
rhs.text_begin);
520 return !(*
this ==
rhs);
549 template <cereal_archive archive_t>
555 text_begin_ss.set_vector(&text_begin);
557 text_begin_rs.set_vector(&text_begin);
564 +
" but it is being read into an fm_index with an alphabet of size "
573 + (tmp ?
"text collection" :
"single text")
574 +
" but it is being read into an fm_index expecting a "
585template <std::ranges::range text_t>
590namespace seqan3::detail
609class reverse_fm_index :
public fm_index<alphabet_t, text_layout_mode, sdsl_index_type>
613 template <std::ranges::range text_t>
614 void construct_(text_t && text)
618 auto reverse_text = text | std::views::reverse;
619 this->construct(reverse_text);
623 auto reverse_text = text | views::deep{std::views::reverse} | std::views::reverse;
624 this->construct(reverse_text,
true);
632 template <std::ranges::b
idirectional_range text_t>
633 explicit reverse_fm_index(text_t && text)
635 construct_(std::forward<text_t>(text));
The SeqAn Bidirectional FM Index Cursor.
Definition bi_fm_index_cursor.hpp:51
A "pretty printer" for most SeqAn data structures and related types.
Definition debug_stream_type.hpp:79
The SeqAn FM Index Cursor.
Definition fm_index_cursor.hpp:83
The SeqAn FM Index.
Definition fm_index.hpp:185
fm_index & operator=(fm_index rhs)
When copy/move assigning, also update internal data structures.
Definition fm_index.hpp:428
fm_index()=default
Defaulted.
static constexpr text_layout text_layout_mode
Indicates whether index is built over a collection.
Definition fm_index.hpp:378
cursor_type cursor() const noexcept
Returns a seqan3::fm_index_cursor on the index that can be used for searching. .
Definition fm_index.hpp:537
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition fm_index.hpp:469
bool operator!=(fm_index const &rhs) const noexcept
Compares two indices.
Definition fm_index.hpp:518
bool empty() const noexcept
Checks whether the index is empty.
Definition fm_index.hpp:485
bool operator==(fm_index const &rhs) const noexcept
Compares two indices.
Definition fm_index.hpp:501
~fm_index()=default
Defaulted.
alphabet_t alphabet_type
The type of the underlying character of the indexed text.
Definition fm_index.hpp:384
fm_index(fm_index &&rhs)
When move constructing, also update internal data structures.
Definition fm_index.hpp:417
typename sdsl_index_type::size_type size_type
Type for representing positions in the indexed text.
Definition fm_index.hpp:386
fm_index(text_t &&text)
Constructor that immediately constructs the index given a range. The range cannot be empty.
Definition fm_index.hpp:452
fm_index(fm_index const &rhs)
When copy constructing, also update internal data structures.
Definition fm_index.hpp:406
Provides various transformation traits used by the range module.
Provides the internal representation of a node of the seqan3::fm_index_cursor.
Provides the seqan3::fm_index_cursor for searching in the unidirectional seqan3::fm_index.
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition alphabet/concept.hpp:152
text_layout
The possible text layouts (single, collection) the seqan3::fm_index and seqan3::bi_fm_index can suppo...
Definition search/fm_index/concept.hpp:69
sdsl_wt_index_type default_sdsl_index_type
The default FM Index Configuration.
Definition fm_index.hpp:141
seqan3::contrib::sdsl::csa_wt< seqan3::contrib::sdsl::wt_blcd< seqan3::contrib::sdsl::bit_vector, seqan3::contrib::sdsl::rank_support_v<>, seqan3::contrib::sdsl::select_support_scan<>, seqan3::contrib::sdsl::select_support_scan< 0 > >, 16, 10 '000 '000, seqan3::contrib::sdsl::sa_order_sa_sampling<>, seqan3::contrib::sdsl::isa_sampling<>, seqan3::contrib::sdsl::plain_byte_alphabet > sdsl_wt_index_type
The FM Index Configuration using a Wavelet Tree.
Definition fm_index.hpp:134
@ single
The text is a single range.
Definition search/fm_index/concept.hpp:71
@ collection
The text is a range of ranges.
Definition search/fm_index/concept.hpp:73
The basis for seqan3::alphabet, but requires only rank interface (not char).
The generic concept for a (biological) sequence.
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
fm_index(text_t &&) -> fm_index< range_innermost_value_t< text_t >, text_layout
Deduces the alphabet and dimensions of the text.
Definition fm_index.hpp:586
SeqAn specific customisations in the standard namespace.
Provides the concept for seqan3::detail::sdsl_index.
Provides seqan3::views::to_rank.