17 #include <sdsl/suffix_trees.hpp>
55 template <
typename index_t>
65 using size_type =
typename index_type::size_type;
74 index_type::text_layout_mode,
75 typename index_type::sdsl_index_type>>;
80 using sdsl_char_type =
typename index_type::sdsl_char_type;
82 using sdsl_index_type =
typename index_t::sdsl_index_type;
84 using sdsl_sigma_type =
typename index_type::sdsl_sigma_type;
86 using index_alphabet_type =
typename index_t::alphabet_type;
109 sdsl_sigma_type sigma;
126 sdsl_char_type _last_char;
137 bool fwd_cursor_last_used =
false;
148 template <
typename csa_t>
150 requires (std::same_as<csa_t, typename index_type::sdsl_index_type> ||
151 std::same_as<csa_t, typename index_type::rev_sdsl_index_type>)
153 bool bidirectional_search(csa_t
const & csa, sdsl_char_type
const c,
157 assert((l_fwd <= r_fwd) && (r_fwd < csa.size()));
158 assert(r_fwd + 1 >= l_fwd);
159 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
161 size_type _l_fwd, _r_fwd, _l_bwd, _r_bwd;
164 if constexpr(!std::same_as<index_alphabet_type, sdsl::plain_byte_alphabet>)
166 cc = csa.char2comp[c];
167 if (cc == 0 && c > 0)
172 if (r_fwd + 1 - l_fwd == csa.size())
176 _r_fwd = csa.C[cc + 1] - 1;
182 auto const r_s_b = csa.wavelet_tree.lex_count(l_fwd, r_fwd + 1, c);
183 size_type const rank_l = std::get<0>(r_s_b);
184 size_type const s = std::get<1>(r_s_b), b = std::get<2>(r_s_b);
185 size_type const rank_r = r_fwd - l_fwd - s - b + rank_l;
186 _l_fwd = c_begin + rank_l;
187 _r_fwd = c_begin + rank_r;
192 if (_r_fwd >= _l_fwd)
198 assert(r_fwd + 1 >= l_fwd);
199 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
206 template <
typename csa_t>
208 requires (std::same_as<csa_t, typename index_type::sdsl_index_type> ||
209 std::same_as<csa_t, typename index_type::rev_sdsl_index_type>)
211 bool bidirectional_search_cycle(csa_t
const & csa, sdsl_char_type
const c,
216 assert((l_parent <= r_parent) && (r_parent < csa.size()));
219 if constexpr(std::same_as<index_alphabet_type, sdsl::plain_byte_alphabet>)
222 c_begin = csa.C[csa.char2comp[c]];
224 auto const r_s_b = csa.wavelet_tree.lex_count(l_parent, r_parent + 1, c);
226 b = std::get<2>(r_s_b),
227 rank_l = std::get<0>(r_s_b),
228 rank_r = r_parent - l_parent - s - b + rank_l;
230 size_type const _l_fwd = c_begin + rank_l;
231 size_type const _r_fwd = c_begin + rank_r;
233 size_type const _r_bwd = r_bwd + 1 + rank_r - rank_l;
235 if (_r_fwd >= _l_fwd)
241 assert(r_fwd + 1 >= l_fwd);
242 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
265 fwd_lb(0), fwd_rb(_index.
size() - 1),
266 rev_lb(0), rev_rb(_index.
size() - 1),
267 sigma(_index.fwd_fm.index.sigma - index_t::text_layout_mode),
286 assert(index !=
nullptr);
288 assert(!(fwd_lb == rhs.fwd_lb && fwd_rb == rhs.fwd_rb && depth == rhs.depth) ||
290 (parent_lb == rhs.parent_lb && parent_rb == rhs.parent_rb && _last_char == rhs._last_char));
292 return std::tie(fwd_lb, fwd_rb, depth) ==
std::tie(rhs.fwd_lb, rhs.fwd_rb, rhs.depth);
309 assert(index !=
nullptr);
311 return !(*
this == rhs);
334 fwd_cursor_last_used =
true;
337 assert(index !=
nullptr);
339 size_type new_parent_lb = fwd_lb, new_parent_rb = fwd_rb;
341 sdsl_char_type c = 1;
343 !bidirectional_search(index->fwd_fm.index, index->fwd_fm.index.comp2char[c],
344 fwd_lb, fwd_rb, rev_lb, rev_rb))
351 parent_lb = new_parent_lb;
352 parent_rb = new_parent_rb;
382 fwd_cursor_last_used =
false;
385 assert(index !=
nullptr);
387 size_type new_parent_lb = rev_lb, new_parent_rb = rev_rb;
389 sdsl_char_type c = 1;
391 !bidirectional_search(index->rev_fm.index, index->rev_fm.index.comp2char[c],
392 rev_lb, rev_rb, fwd_lb, fwd_rb))
399 parent_lb = new_parent_lb;
400 parent_rb = new_parent_rb;
423 template <
typename char_t>
425 requires std::convertible_to<char_t, index_alphabet_type>
430 fwd_cursor_last_used =
true;
433 assert(index !=
nullptr);
439 size_type new_parent_lb = fwd_lb, new_parent_rb = fwd_rb;
441 auto c_char =
seqan3::to_rank(
static_cast<index_alphabet_type
>(c)) + 1;
442 if (bidirectional_search(index->fwd_fm.index, c_char, fwd_lb, fwd_rb, rev_lb, rev_rb))
444 parent_lb = new_parent_lb;
445 parent_rb = new_parent_rb;
456 template <
typename char_type>
458 requires seqan3::detail::is_char_adaptation_v<char_type>
478 template <
typename char_t>
480 requires std::convertible_to<char_t, index_alphabet_type>
485 fwd_cursor_last_used =
false;
488 assert(index !=
nullptr);
494 size_type new_parent_lb = rev_lb, new_parent_rb = rev_rb;
496 auto c_char =
seqan3::to_rank(
static_cast<index_alphabet_type
>(c)) + 1;
497 if (bidirectional_search(index->rev_fm.index, c_char, rev_lb, rev_rb, fwd_lb, fwd_rb))
499 parent_lb = new_parent_lb;
500 parent_rb = new_parent_rb;
511 template <
typename char_type>
513 requires seqan3::detail::is_char_adaptation_v<char_type>
536 template <std::ranges::range seq_t>
539 static_assert(std::ranges::forward_range<seq_t>,
"The query must model forward_range.");
541 "The alphabet of the sequence must be convertible to the alphabet of the index.");
543 assert(index !=
nullptr);
545 auto first = std::ranges::begin(
seq);
546 auto last = std::ranges::end(
seq);
549 fwd_cursor_last_used = (first != last);
552 size_type _fwd_lb = fwd_lb, _fwd_rb = fwd_rb, _rev_lb = rev_lb, _rev_rb = rev_rb;
553 size_type new_parent_lb = parent_lb, new_parent_rb = parent_rb;
554 sdsl_char_type c = _last_char;
557 for (
auto it = first; it != last; ++len, ++it)
566 new_parent_lb = _fwd_lb;
567 new_parent_rb = _fwd_rb;
568 if (!bidirectional_search(index->fwd_fm.index, c, _fwd_lb, _fwd_rb, _rev_lb, _rev_rb))
577 parent_lb = new_parent_lb;
578 parent_rb = new_parent_rb;
606 template <std::ranges::range seq_t>
609 static_assert(std::ranges::bidirectional_range<seq_t>,
"The query must model bidirectional_range.");
611 "The alphabet of the sequence must be convertible to the alphabet of the index.");
612 assert(index !=
nullptr);
614 auto rev_seq = std::views::reverse(
seq);
615 auto first = std::ranges::begin(rev_seq);
616 auto last = std::ranges::end(rev_seq);
620 fwd_cursor_last_used =
false;
623 size_type _fwd_lb = fwd_lb, _fwd_rb = fwd_rb,
624 _rev_lb = rev_lb, _rev_rb = rev_rb;
625 size_type new_parent_lb = parent_lb, new_parent_rb = parent_rb;
626 sdsl_char_type c = _last_char;
629 for (
auto it = first; it != last; ++len, ++it)
638 new_parent_lb = _rev_lb;
639 new_parent_rb = _rev_rb;
640 if (!bidirectional_search(index->rev_fm.index, c, _rev_lb, _rev_rb, _fwd_lb, _fwd_rb))
649 parent_lb = new_parent_lb;
650 parent_rb = new_parent_rb;
687 assert(fwd_cursor_last_used);
692 sdsl_char_type c = _last_char + 1;
695 !bidirectional_search_cycle(index->fwd_fm.index, index->fwd_fm.index.comp2char[c],
696 parent_lb, parent_rb, fwd_lb, fwd_rb, rev_lb, rev_rb))
740 assert(!fwd_cursor_last_used);
745 sdsl_char_type c = _last_char + 1;
747 !bidirectional_search_cycle(index->rev_fm.index, index->rev_fm.index.comp2char[c],
748 parent_lb, parent_rb, rev_lb, rev_rb, fwd_lb, fwd_rb))
783 return index->fwd_fm.index.comp2char[_last_char] - 1;
800 assert(index !=
nullptr);
806 fwd_rb == index->size() - 1));
832 assert(index !=
nullptr);
835 cur.parent_lb = parent_lb;
836 cur.parent_rb = parent_rb;
837 cur.node = {fwd_lb, fwd_rb, depth, _last_char};
840 if (!fwd_cursor_last_used)
867 template <std::ranges::range text_t>
873 static_assert(std::ranges::input_range<text_t>,
"The text must model input_range.");
874 static_assert(range_dimension_v<text_t> == 1,
"The input cannot be a text collection.");
876 "The alphabet types of the given text and index differ.");
877 assert(index !=
nullptr);
879 size_type const query_begin = offset() - index->fwd_fm.index[fwd_lb];
884 template <std::ranges::range text_t>
890 static_assert(std::ranges::input_range<text_t>,
"The text collection must model input_range.");
891 static_assert(range_dimension_v<text_t> == 2,
"The input must be a text collection.");
893 "The alphabet types of the given text and index differ.");
894 assert(index !=
nullptr);
897 size_type const location = offset() - index->fwd_fm.index[fwd_lb];
901 size_type const rank = index->fwd_fm.text_begin_rs.rank(location + 1);
906 size_type const start_location = index->fwd_fm.text_begin_ss.select(rank);
908 size_type const query_begin = location - start_location;
927 assert(index !=
nullptr && (1 + fwd_rb - fwd_lb == 1 + rev_rb - rev_lb));
929 return 1 + fwd_rb - fwd_lb;
948 assert(index !=
nullptr);
954 occ.emplace_back(0, offset() - index->fwd_fm.index[fwd_lb + i]);
965 assert(index !=
nullptr);
971 size_type loc = offset() - index->fwd_fm.index[fwd_lb + i];
972 size_type sequence_rank = index->fwd_fm.text_begin_rs.rank(loc + 1);
973 size_type sequence_position = loc - index->fwd_fm.text_begin_ss.select(sequence_rank);
996 assert(index !=
nullptr);
998 return std::views::iota(fwd_lb, fwd_lb +
count())
1011 assert(index !=
nullptr);
1013 return std::views::iota(fwd_lb, fwd_lb +
count())
1016 return _offset - index->fwd_fm.index[sa_pos];
1020 size_type sequence_rank = index->fwd_fm.text_begin_rs.rank(loc + 1);
1021 size_type sequence_position = loc - index->fwd_fm.text_begin_ss.select(sequence_rank);