18 #include <sdsl/suffix_trees.hpp>
56 template <
typename index_t>
75 index_type::text_layout_mode,
76 typename index_type::sdsl_index_type>>;
81 using sdsl_char_type =
typename index_type::sdsl_char_type;
83 using sdsl_index_type =
typename index_t::sdsl_index_type;
85 using sdsl_sigma_type =
typename index_type::sdsl_sigma_type;
87 using index_alphabet_type =
typename index_t::alphabet_type;
110 sdsl_sigma_type sigma{};
127 sdsl_char_type _last_char{};
138 bool fwd_cursor_last_used =
false;
149 template <
typename csa_t>
151 requires (std::same_as<csa_t, typename index_type::sdsl_index_type> ||
152 std::same_as<csa_t, typename index_type::rev_sdsl_index_type>)
154 bool bidirectional_search(csa_t
const & csa, sdsl_char_type
const c,
158 assert((l_fwd <= r_fwd) && (r_fwd < csa.size()));
159 assert(r_fwd + 1 >= l_fwd);
160 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
162 size_type _l_fwd, _r_fwd, _l_bwd, _r_bwd;
165 if constexpr(!std::same_as<index_alphabet_type, sdsl::plain_byte_alphabet>)
167 cc = csa.char2comp[c];
168 if (cc == 0 && c > 0)
173 if (r_fwd + 1 - l_fwd == csa.size())
177 _r_fwd = csa.C[cc + 1] - 1;
183 auto const r_s_b = csa.wavelet_tree.lex_count(l_fwd, r_fwd + 1, c);
184 size_type const rank_l = std::get<0>(r_s_b);
185 size_type const s = std::get<1>(r_s_b), b = std::get<2>(r_s_b);
186 size_type const rank_r = r_fwd - l_fwd - s - b + rank_l;
187 _l_fwd = c_begin + rank_l;
188 _r_fwd = c_begin + rank_r;
193 if (_r_fwd >= _l_fwd)
199 assert(r_fwd + 1 >= l_fwd);
200 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
207 template <
typename csa_t>
209 requires (std::same_as<csa_t, typename index_type::sdsl_index_type> ||
210 std::same_as<csa_t, typename index_type::rev_sdsl_index_type>)
212 bool bidirectional_search_cycle(csa_t
const & csa, sdsl_char_type
const c,
217 assert((l_parent <= r_parent) && (r_parent < csa.size()));
220 if constexpr(std::same_as<index_alphabet_type, sdsl::plain_byte_alphabet>)
223 c_begin = csa.C[csa.char2comp[c]];
225 auto const r_s_b = csa.wavelet_tree.lex_count(l_parent, r_parent + 1, c);
227 b = std::get<2>(r_s_b),
228 rank_l = std::get<0>(r_s_b),
229 rank_r = r_parent - l_parent - s - b + rank_l;
231 size_type const _l_fwd = c_begin + rank_l;
232 size_type const _r_fwd = c_begin + rank_r;
234 size_type const _r_bwd = r_bwd + 1 + rank_r - rank_l;
236 if (_r_fwd >= _l_fwd)
242 assert(r_fwd + 1 >= l_fwd);
243 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
266 fwd_lb(0), fwd_rb(_index.
size() - 1),
267 rev_lb(0), rev_rb(_index.
size() - 1),
268 sigma(_index.fwd_fm.index.sigma - index_t::text_layout_mode),
287 assert(index !=
nullptr);
289 assert(!(fwd_lb == rhs.fwd_lb && fwd_rb == rhs.fwd_rb && depth == rhs.depth) ||
291 (parent_lb == rhs.parent_lb && parent_rb == rhs.parent_rb && _last_char == rhs._last_char));
293 return std::tie(fwd_lb, fwd_rb, depth) ==
std::tie(rhs.fwd_lb, rhs.fwd_rb, rhs.depth);
310 assert(index !=
nullptr);
312 return !(*
this == rhs);
335 fwd_cursor_last_used =
true;
338 assert(index !=
nullptr);
340 size_type new_parent_lb = fwd_lb, new_parent_rb = fwd_rb;
342 sdsl_char_type c = 1;
344 !bidirectional_search(index->fwd_fm.index, index->fwd_fm.index.comp2char[c],
345 fwd_lb, fwd_rb, rev_lb, rev_rb))
352 parent_lb = new_parent_lb;
353 parent_rb = new_parent_rb;
383 fwd_cursor_last_used =
false;
386 assert(index !=
nullptr);
388 size_type new_parent_lb = rev_lb, new_parent_rb = rev_rb;
390 sdsl_char_type c = 1;
392 !bidirectional_search(index->rev_fm.index, index->rev_fm.index.comp2char[c],
393 rev_lb, rev_rb, fwd_lb, fwd_rb))
400 parent_lb = new_parent_lb;
401 parent_rb = new_parent_rb;
424 template <
typename char_t>
426 requires std::convertible_to<char_t, index_alphabet_type>
431 fwd_cursor_last_used =
true;
434 assert(index !=
nullptr);
440 size_type new_parent_lb = fwd_lb, new_parent_rb = fwd_rb;
442 auto c_char =
seqan3::to_rank(
static_cast<index_alphabet_type
>(c)) + 1;
443 if (bidirectional_search(index->fwd_fm.index, c_char, fwd_lb, fwd_rb, rev_lb, rev_rb))
445 parent_lb = new_parent_lb;
446 parent_rb = new_parent_rb;
457 template <
typename char_type>
459 requires seqan3::detail::is_char_adaptation_v<char_type>
479 template <
typename char_t>
481 requires std::convertible_to<char_t, index_alphabet_type>
486 fwd_cursor_last_used =
false;
489 assert(index !=
nullptr);
495 size_type new_parent_lb = rev_lb, new_parent_rb = rev_rb;
497 auto c_char =
seqan3::to_rank(
static_cast<index_alphabet_type
>(c)) + 1;
498 if (bidirectional_search(index->rev_fm.index, c_char, rev_lb, rev_rb, fwd_lb, fwd_rb))
500 parent_lb = new_parent_lb;
501 parent_rb = new_parent_rb;
512 template <
typename char_type>
514 requires seqan3::detail::is_char_adaptation_v<char_type>
537 template <std::ranges::range seq_t>
540 static_assert(std::ranges::forward_range<seq_t>,
"The query must model forward_range.");
542 "The alphabet of the sequence must be convertible to the alphabet of the index.");
544 assert(index !=
nullptr);
546 auto first = std::ranges::begin(
seq);
547 auto last = std::ranges::end(
seq);
550 fwd_cursor_last_used = (first != last);
553 size_type _fwd_lb = fwd_lb, _fwd_rb = fwd_rb, _rev_lb = rev_lb, _rev_rb = rev_rb;
554 size_type new_parent_lb = parent_lb, new_parent_rb = parent_rb;
555 sdsl_char_type c = _last_char;
558 for (
auto it = first; it != last; ++len, ++it)
567 new_parent_lb = _fwd_lb;
568 new_parent_rb = _fwd_rb;
569 if (!bidirectional_search(index->fwd_fm.index, c, _fwd_lb, _fwd_rb, _rev_lb, _rev_rb))
578 parent_lb = new_parent_lb;
579 parent_rb = new_parent_rb;
607 template <std::ranges::range seq_t>
610 static_assert(std::ranges::bidirectional_range<seq_t>,
"The query must model bidirectional_range.");
612 "The alphabet of the sequence must be convertible to the alphabet of the index.");
613 assert(index !=
nullptr);
615 auto rev_seq = std::views::reverse(
seq);
616 auto first = std::ranges::begin(rev_seq);
617 auto last = std::ranges::end(rev_seq);
621 fwd_cursor_last_used =
false;
624 size_type _fwd_lb = fwd_lb, _fwd_rb = fwd_rb,
625 _rev_lb = rev_lb, _rev_rb = rev_rb;
626 size_type new_parent_lb = parent_lb, new_parent_rb = parent_rb;
627 sdsl_char_type c = _last_char;
630 for (
auto it = first; it != last; ++len, ++it)
639 new_parent_lb = _rev_lb;
640 new_parent_rb = _rev_rb;
641 if (!bidirectional_search(index->rev_fm.index, c, _rev_lb, _rev_rb, _fwd_lb, _fwd_rb))
650 parent_lb = new_parent_lb;
651 parent_rb = new_parent_rb;
688 assert(fwd_cursor_last_used);
693 sdsl_char_type c = _last_char + 1;
696 !bidirectional_search_cycle(index->fwd_fm.index, index->fwd_fm.index.comp2char[c],
697 parent_lb, parent_rb, fwd_lb, fwd_rb, rev_lb, rev_rb))
741 assert(!fwd_cursor_last_used);
746 sdsl_char_type c = _last_char + 1;
748 !bidirectional_search_cycle(index->rev_fm.index, index->rev_fm.index.comp2char[c],
749 parent_lb, parent_rb, rev_lb, rev_rb, fwd_lb, fwd_rb))
784 return index->fwd_fm.index.comp2char[_last_char] - 1;
801 assert(index !=
nullptr);
807 fwd_rb == index->size() - 1));
833 assert(index !=
nullptr);
836 cur.parent_lb = parent_lb;
837 cur.parent_rb = parent_rb;
838 cur.node = {fwd_lb, fwd_rb, depth, _last_char};
841 if (!fwd_cursor_last_used)
868 template <std::ranges::range text_t>
874 static_assert(std::ranges::input_range<text_t>,
"The text must model input_range.");
875 static_assert(range_dimension_v<text_t> == 1,
"The input cannot be a text collection.");
877 "The alphabet types of the given text and index differ.");
878 assert(index !=
nullptr);
880 size_type const query_begin = offset() - index->fwd_fm.index[fwd_lb];
885 template <std::ranges::range text_t>
891 static_assert(std::ranges::input_range<text_t>,
"The text collection must model input_range.");
892 static_assert(range_dimension_v<text_t> == 2,
"The input must be a text collection.");
894 "The alphabet types of the given text and index differ.");
895 assert(index !=
nullptr);
898 size_type const location = offset() - index->fwd_fm.index[fwd_lb];
902 size_type const rank = index->fwd_fm.text_begin_rs.rank(location + 1);
907 size_type const start_location = index->fwd_fm.text_begin_ss.select(rank);
909 size_type const query_begin = location - start_location;
928 assert(index !=
nullptr && (1 + fwd_rb - fwd_lb == 1 + rev_rb - rev_lb));
930 return 1 + fwd_rb - fwd_lb;
949 assert(index !=
nullptr);
955 occ.emplace_back(0, offset() - index->fwd_fm.index[fwd_lb + i]);
966 assert(index !=
nullptr);
972 size_type loc = offset() - index->fwd_fm.index[fwd_lb + i];
973 size_type sequence_rank = index->fwd_fm.text_begin_rs.rank(loc + 1);
974 size_type sequence_position = loc - index->fwd_fm.text_begin_ss.select(sequence_rank);
997 assert(index !=
nullptr);
999 return std::views::iota(fwd_lb, fwd_lb +
count())
1012 assert(index !=
nullptr);
1014 return std::views::iota(fwd_lb, fwd_lb +
count())
1017 auto loc = _offset - index->fwd_fm.index[sa_pos];
1018 size_type sequence_rank = index->fwd_fm.text_begin_rs.rank(loc + 1);
1019 size_type sequence_position = loc - index->fwd_fm.text_begin_ss.select(sequence_rank);
1031 template <cereal_archive archive_t>
1032 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
1041 archive(_last_char);
Core alphabet concept and free function/type trait wrappers.
Provides alphabet adaptations for standard char types.
The SeqAn Bidirectional FM Index Cursor.
Definition: bi_fm_index_cursor.hpp:58
bool extend_right() noexcept
Tries to extend the query by the smallest possible character to the right such that the query is foun...
Definition: bi_fm_index_cursor.hpp:332
size_type count() const noexcept
Counts the number of occurrences of the searched query in the text.
Definition: bi_fm_index_cursor.hpp:926
bool extend_left(seq_t &&seq) noexcept
Tries to extend the query by seq to the left.
Definition: bi_fm_index_cursor.hpp:608
bool operator==(bi_fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition: bi_fm_index_cursor.hpp:285
bool extend_left(char_type const *cstring) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: bi_fm_index_cursor.hpp:516
auto path_label(text_t &&text) const noexcept
Returns the searched query.
Definition: bi_fm_index_cursor.hpp:869
bool extend_left() noexcept
Tries to extend the query by the smallest possible character to the left such that the query is found...
Definition: bi_fm_index_cursor.hpp:380
locate_result_type locate() const
Locates the occurrences of the searched query in the text.
Definition: bi_fm_index_cursor.hpp:944
std::vector< std::pair< size_type, size_type > > locate() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: bi_fm_index_cursor.hpp:961
bool cycle_back() noexcept
Tries to replace the rightmost character of the query by the next lexicographically larger character ...
Definition: bi_fm_index_cursor.hpp:684
bool cycle_front() noexcept
Tries to replace the leftmost character of the query by the next lexicographically larger character s...
Definition: bi_fm_index_cursor.hpp:737
fwd_cursor to_fwd_cursor() const noexcept
Returns a unidirectional seqan3::fm_index_cursor on the original text. path_label() on the returned u...
Definition: bi_fm_index_cursor.hpp:831
size_type query_length() const noexcept
Returns the depth of the cursor node in the implicit suffix tree, i.e. the length of the sequence sea...
Definition: bi_fm_index_cursor.hpp:799
size_type last_rank() noexcept
Outputs the rightmost respectively leftmost rank depending on whether extend_right() or extend_left()...
Definition: bi_fm_index_cursor.hpp:780
index_t index_type
Type of the index.
Definition: bi_fm_index_cursor.hpp:61
typename index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: bi_fm_index_cursor.hpp:67
bool extend_left(char_t const c) noexcept
Tries to extend the query by the character c to the left.
Definition: bi_fm_index_cursor.hpp:483
bool operator!=(bi_fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition: bi_fm_index_cursor.hpp:308
bi_fm_index_cursor() noexcept=default
Default constructor. Accessing member functions on a default constructed object is undefined behavior...
bool extend_right(char_type const *cstring) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: bi_fm_index_cursor.hpp:461
bool extend_right(seq_t &&seq) noexcept
Tries to extend the query by seq to the right.
Definition: bi_fm_index_cursor.hpp:538
auto lazy_locate() const
Locates the occurrences of the searched query in the text on demand, i.e. a std::ranges::view is retu...
Definition: bi_fm_index_cursor.hpp:992
bool extend_right(char_t const c) noexcept
Tries to extend the query by the character c to the right.
Definition: bi_fm_index_cursor.hpp:428
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:90
The SeqAn FM Index.
Definition: fm_index.hpp:193
Provides various transformation traits used by the range module.
T emplace_back(T... args)
Provides the unidirectional seqan3::fm_index.
Provides the seqan3::fm_index_cursor for searching in the unidirectional seqan3::fm_index.
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
typename range_innermost_value< t >::type range_innermost_value_t
Shortcut for seqan3::range_innermost_value (transformation_trait shortcut).
Definition: type_traits.hpp:242
@ seq
The "sequence", usually a range of nucleotides or amino acids.
text_layout
The possible text layouts (single, collection) the seqan3::fm_index and seqan3::bi_fm_index can suppo...
Definition: concept.hpp:81
@ single
The text is a single range.
Definition: concept.hpp:83
@ collection
The text is a range of ranges.
Definition: concept.hpp:85
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:471
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:189
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Adaptations of concepts from the Ranges TS.
Exposes the size_type of another type.
Definition: pre.hpp:296
Provides alphabet adaptations for standard uint types.
Provides seqan3::views::slice.