15 #include <sdsl/suffix_trees.hpp>
28 namespace seqan3::detail
33 struct fm_index_validator
49 template <semialphabet alphabet_t, text_layout text_layout_mode_, std::ranges::range text_t>
50 static void validate(text_t && text)
54 static_assert(std::ranges::bidirectional_range<text_t>,
"The text must model bidirectional_range.");
55 static_assert(std::convertible_to<range_innermost_value_t<text_t>, alphabet_t>,
56 "The alphabet of the text collection must be convertible to the alphabet of the index.");
57 static_assert(range_dimension_v<text_t> == 1,
"The input cannot be a text collection.");
59 if (std::ranges::empty(text))
64 static_assert(std::ranges::bidirectional_range<text_t>,
65 "The text collection must model bidirectional_range.");
66 static_assert(std::ranges::bidirectional_range<std::ranges::range_reference_t<text_t>>,
67 "The elements of the text collection must model bidirectional_range.");
68 static_assert(std::convertible_to<range_innermost_value_t<text_t>, alphabet_t>,
69 "The alphabet of the text collection must be convertible to the alphabet of the index.");
70 static_assert(range_dimension_v<text_t> == 2,
"The input must be a text collection.");
72 if (std::ranges::empty(text))
75 static_assert(
alphabet_size<range_innermost_value_t<text_t>> <= 256,
"The alphabet is too big.");
85 template <
typename index_t>
86 class fm_index_cursor;
88 template <
typename index_t>
89 class bi_fm_index_cursor;
95 detail::sdsl_index sdsl_index_type_>
96 class reverse_fm_index;
136 sdsl::csa_wt<sdsl::wt_blcd<sdsl::bit_vector,
137 sdsl::rank_support_v<>,
138 sdsl::select_support_scan<>,
139 sdsl::select_support_scan<0>>,
142 sdsl::sa_order_sa_sampling<>,
143 sdsl::isa_sampling<>,
144 sdsl::plain_byte_alphabet>;
199 using sdsl_index_type = sdsl_index_type_;
204 using sdsl_char_type =
typename sdsl_index_type::alphabet_type::char_type;
206 using sdsl_sigma_type =
typename sdsl_index_type::alphabet_type::sigma_type;
209 friend class detail::reverse_fm_index<alphabet_t, text_layout_mode_, sdsl_index_type_>;
212 sdsl_index_type index;
215 sdsl::sd_vector<> text_begin;
217 sdsl::select_support_sd<1> text_begin_ss;
219 sdsl::rank_support_sd<1> text_begin_rs;
240 template <std::ranges::range text_t>
244 void construct(text_t && text)
246 detail::fm_index_validator::validate<alphabet_t, text_layout_mode_>(text);
248 constexpr
auto sigma = alphabet_size<alphabet_t>;
255 sdsl::int_vector<8> tmp_text(std::ranges::distance(text));
261 if constexpr (sigma == 256)
265 "character alphabets the last one/two values are reserved"
266 "(single sequence/collection).");
270 | std::views::reverse,
271 std::ranges::begin(tmp_text));
273 sdsl::construct_im(index, tmp_text, 0);
282 template <std::ranges::range text_t>
286 void construct(text_t && text,
bool reverse =
false)
288 detail::fm_index_validator::validate<alphabet_t, text_layout_mode_>(text);
292 for (
auto && t : text)
293 text_sizes.
push_back(std::ranges::distance(t));
295 size_t const number_of_texts{text_sizes.
size()};
300 if (number_of_texts == text_size)
303 constexpr
auto sigma = alphabet_size<alphabet_t>;
308 sdsl::sd_vector_builder builder(text_size, number_of_texts);
309 size_t prefix_sum{0};
311 for (
auto &&
size : text_sizes)
313 builder.set(prefix_sum);
314 prefix_sum +=
size + 1;
317 text_begin = sdsl::sd_vector<>(builder);
318 text_begin_ss = sdsl::select_support_sd<1>(&text_begin);
319 text_begin_rs = sdsl::rank_support_sd<1>(&text_begin);
322 sdsl::int_vector<8> tmp_text(text_size - (number_of_texts > 1));
324 constexpr uint8_t delimiter = sigma >= 255 ? 255 : sigma + 1;
332 if constexpr (sigma >= 255)
336 " for full character alphabets the last one/"
337 "two values are reserved (single sequence/"
343 | views::join(delimiter),
344 std::ranges::begin(tmp_text));
349 if (number_of_texts == 1)
350 tmp_text.back() = delimiter;
352 std::ranges::reverse(tmp_text);
359 if (number_of_texts == 1)
361 std::ranges::reverse(tmp_text.begin(), tmp_text.end() - 1);
362 tmp_text.back() = delimiter;
366 std::ranges::reverse(tmp_text);
370 sdsl::construct_im(index, tmp_text, 0);
388 template <
typename bi_fm_index_t>
391 template <
typename fm_index_t>
394 template <
typename fm_index_t>
395 friend class detail::fm_index_cursor_node;
404 index{rhs.index}, text_begin{rhs.text_begin}, text_begin_ss{rhs.text_begin_ss}, text_begin_rs{rhs.text_begin_rs}
406 text_begin_ss.set_vector(&text_begin);
407 text_begin_rs.set_vector(&text_begin);
413 text_begin_rs{
std::move(rhs.text_begin_rs)}
415 text_begin_ss.set_vector(&text_begin);
416 text_begin_rs.set_vector(&text_begin);
424 text_begin_ss =
std::move(rhs.text_begin_ss);
425 text_begin_rs =
std::move(rhs.text_begin_rs);
427 text_begin_ss.set_vector(&text_begin);
428 text_begin_rs.set_vector(&text_begin);
443 template <std::ranges::b
idirectional_range text_t>
446 construct(std::forward<text_t>(text));
496 return (index == rhs.index) && (text_begin == rhs.text_begin);
512 return !(*
this == rhs);
541 template <cereal_archive archive_t>
542 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
546 archive(text_begin_ss);
547 text_begin_ss.set_vector(&text_begin);
548 archive(text_begin_rs);
549 text_begin_rs.set_vector(&text_begin);
551 auto sigma = alphabet_size<alphabet_t>;
553 if (sigma != alphabet_size<alphabet_t>)
556 " but it is being read into an fm_index with an alphabet of size " +
565 (tmp ?
"text collection" :
"single text") +
566 " but it is being read into an fm_index expecting a " +
577 template <std::ranges::range text_t>
585 namespace seqan3::detail
604 class reverse_fm_index :
public fm_index<alphabet_t, text_layout_mode, sdsl_index_type>
608 template <std::ranges::range text_t>
609 void construct_(text_t && text)
613 auto reverse_text = text | std::views::reverse;
614 this->construct(reverse_text);
618 auto reverse_text = text | views::deep{std::views::reverse} | std::views::reverse;
619 this->construct(reverse_text,
true);
627 template <std::ranges::b
idirectional_range text_t>
628 explicit reverse_fm_index(text_t && text)
630 construct_(std::forward<text_t>(text));