15#include <sdsl/suffix_trees.hpp>
50template <
typename index_t>
69 index_type::text_layout_mode,
70 typename index_type::sdsl_index_type>>;
75 using sdsl_char_type =
typename index_type::sdsl_char_type;
77 using sdsl_index_type =
typename index_t::sdsl_index_type;
79 using sdsl_sigma_type =
typename index_type::sdsl_sigma_type;
81 using index_alphabet_type =
typename index_t::alphabet_type;
104 sdsl_sigma_type sigma{};
121 sdsl_char_type _last_char{};
132 bool fwd_cursor_last_used =
false;
143 template <
typename csa_t>
144 requires (std::same_as<csa_t, typename index_type::sdsl_index_type>
145 || std::same_as<csa_t, typename index_type::rev_sdsl_index_type>)
146 bool bidirectional_search(csa_t
const & csa,
147 sdsl_char_type
const c,
153 assert((l_fwd <= r_fwd) && (r_fwd < csa.size()));
154 assert(r_fwd + 1 >= l_fwd);
155 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
157 size_type _l_fwd, _r_fwd, _l_bwd, _r_bwd;
160 if constexpr (!std::same_as<index_alphabet_type, sdsl::plain_byte_alphabet>)
162 cc = csa.char2comp[c];
163 if (cc == 0 && c > 0)
168 if (r_fwd + 1 - l_fwd == csa.size())
172 _r_fwd = csa.C[cc + 1] - 1;
178 auto const r_s_b = csa.wavelet_tree.lex_count(l_fwd, r_fwd + 1, c);
179 size_type const rank_l = std::get<0>(r_s_b);
180 size_type const s = std::get<1>(r_s_b), b = std::get<2>(r_s_b);
181 size_type const rank_r = r_fwd - l_fwd - s - b + rank_l;
182 _l_fwd = c_begin + rank_l;
183 _r_fwd = c_begin + rank_r;
188 if (_r_fwd >= _l_fwd)
194 assert(r_fwd + 1 >= l_fwd);
195 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
202 template <
typename csa_t>
203 requires (std::same_as<csa_t, typename index_type::sdsl_index_type>
204 || std::same_as<csa_t, typename index_type::rev_sdsl_index_type>)
205 bool bidirectional_search_cycle(csa_t
const & csa,
206 sdsl_char_type
const c,
214 assert((l_parent <= r_parent) && (r_parent < csa.size()));
217 if constexpr (std::same_as<index_alphabet_type, sdsl::plain_byte_alphabet>)
220 c_begin = csa.C[csa.char2comp[c]];
222 auto const r_s_b = csa.wavelet_tree.lex_count(l_parent, r_parent + 1, c);
223 size_type const s = std::get<1>(r_s_b), b = std::get<2>(r_s_b), rank_l = std::get<0>(r_s_b),
224 rank_r = r_parent - l_parent - s - b + rank_l;
226 size_type const _l_fwd = c_begin + rank_l;
227 size_type const _r_fwd = c_begin + rank_r;
229 size_type const _r_bwd = r_bwd + 1 + rank_r - rank_l;
231 if (_r_fwd >= _l_fwd)
237 assert(r_fwd + 1 >= l_fwd);
238 assert(r_bwd + 1 - l_bwd == r_fwd + 1 - l_fwd);
262 fwd_rb(
_index.size() - 1),
264 rev_rb(
_index.size() - 1),
265 sigma(
_index.fwd_fm.index.sigma - index_t::text_layout_mode),
286 assert(!(fwd_lb ==
rhs.fwd_lb && fwd_rb ==
rhs.fwd_rb && depth ==
rhs.depth) || (depth == 0)
287 || (parent_lb ==
rhs.parent_lb && parent_rb ==
rhs.parent_rb && _last_char ==
rhs._last_char));
308 return !(*
this ==
rhs);
331 fwd_cursor_last_used =
true;
338 sdsl_sigma_type c = 1;
340 && !bidirectional_search(index->fwd_fm.index,
341 index->fwd_fm.index.comp2char[c],
356 _last_char =
static_cast<sdsl_char_type
>(c);
384 fwd_cursor_last_used =
false;
391 sdsl_sigma_type c = 1;
393 && !bidirectional_search(index->rev_fm.index,
394 index->rev_fm.index.comp2char[c],
409 _last_char =
static_cast<sdsl_char_type
>(c);
430 template <
typename char_t>
431 requires std::convertible_to<char_t, index_alphabet_type>
435 fwd_cursor_last_used =
true;
447 if (bidirectional_search(index->fwd_fm.index,
c_char, fwd_lb, fwd_rb, rev_lb, rev_rb))
461 template <
typename char_type>
462 requires seqan3::detail::is_char_adaptation_v<char_type>
481 template <
typename char_t>
482 requires std::convertible_to<char_t, index_alphabet_type>
486 fwd_cursor_last_used =
false;
498 if (bidirectional_search(index->rev_fm.index,
c_char, rev_lb, rev_rb, fwd_lb, fwd_rb))
512 template <
typename char_type>
513 requires seqan3::detail::is_char_adaptation_v<char_type>
535 template <std::ranges::range seq_t>
538 static_assert(std::ranges::forward_range<seq_t>,
"The query must model forward_range.");
539 static_assert(std::convertible_to<range_innermost_value_t<seq_t>, index_alphabet_type>,
540 "The alphabet of the sequence must be convertible to the alphabet of the index.");
545 auto last = std::ranges::end(
seq);
548 fwd_cursor_last_used = (first != last);
553 sdsl_char_type c = _last_char;
556 for (
auto it = first;
it != last; ++
len, ++
it)
605 template <std::ranges::range seq_t>
608 static_assert(std::ranges::bidirectional_range<seq_t>,
"The query must model bidirectional_range.");
609 static_assert(std::convertible_to<range_innermost_value_t<seq_t>, index_alphabet_type>,
610 "The alphabet of the sequence must be convertible to the alphabet of the index.");
615 auto last = std::ranges::end(
rev_seq);
619 fwd_cursor_last_used =
false;
624 sdsl_char_type c = _last_char;
627 for (
auto it = first;
it != last; ++
len, ++
it)
685 assert(fwd_cursor_last_used);
690 sdsl_sigma_type c = _last_char + 1;
693 && !bidirectional_search_cycle(index->fwd_fm.index,
694 index->fwd_fm.index.comp2char[c],
708 _last_char =
static_cast<sdsl_char_type
>(c);
745 assert(!fwd_cursor_last_used);
750 sdsl_sigma_type c = _last_char + 1;
752 && !bidirectional_search_cycle(index->rev_fm.index,
753 index->rev_fm.index.comp2char[c],
767 _last_char =
static_cast<sdsl_char_type
>(c);
794 return index->fwd_fm.index.comp2char[_last_char] - 1;
813 assert(depth != 0 || (fwd_lb == rev_lb && fwd_rb == rev_rb && fwd_lb == 0 && fwd_rb == index->size() - 1));
842 cur.parent_lb = parent_lb;
843 cur.parent_rb = parent_rb;
844 cur.node = {fwd_lb, fwd_rb, depth, _last_char};
847 if (!fwd_cursor_last_used)
874 template <std::ranges::range text_t>
878 static_assert(std::ranges::input_range<text_t>,
"The text must model input_range.");
880 static_assert(std::same_as<range_innermost_value_t<text_t>, index_alphabet_type>,
881 "The alphabet types of the given text and index differ.");
889 template <std::ranges::range text_t>
893 static_assert(std::ranges::input_range<text_t>,
"The text collection must model input_range.");
895 static_assert(std::same_as<range_innermost_value_t<text_t>, index_alphabet_type>,
896 "The alphabet types of the given text and index differ.");
930 assert(index !=
nullptr && (1 + fwd_rb - fwd_lb == 1 + rev_rb - rev_lb));
932 return 1 + fwd_rb - fwd_lb;
955 occ.emplace_back(0, offset() - index->fwd_fm.index[fwd_lb +
i]);
995 return std::views::iota(fwd_lb, fwd_lb +
count())
996 | std::views::transform(
1007 assert(index !=
nullptr);
1009 return std::views::iota(fwd_lb, fwd_lb +
count())
1010 | std::views::transform(
1027 template <cereal_archive archive_t>
Core alphabet concept and free function/type trait wrappers.
Provides alphabet adaptations for standard char types.
The SeqAn Bidirectional FM Index Cursor.
Definition bi_fm_index_cursor.hpp:52
bool extend_right() noexcept
Tries to extend the query by the smallest possible character to the right such that the query is foun...
Definition bi_fm_index_cursor.hpp:328
size_type count() const noexcept
Counts the number of occurrences of the searched query in the text.
Definition bi_fm_index_cursor.hpp:928
bool extend_left(seq_t &&seq) noexcept
Tries to extend the query by seq to the left.
Definition bi_fm_index_cursor.hpp:606
bool operator==(bi_fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition bi_fm_index_cursor.hpp:282
bool extend_left(char_type const *cstring) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition bi_fm_index_cursor.hpp:514
auto path_label(text_t &&text) const noexcept
Returns the searched query.
Definition bi_fm_index_cursor.hpp:875
bool extend_left() noexcept
Tries to extend the query by the smallest possible character to the left such that the query is found...
Definition bi_fm_index_cursor.hpp:381
auto path_label(text_t &&text) const noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition bi_fm_index_cursor.hpp:890
locate_result_type locate() const
Locates the occurrences of the searched query in the text.
Definition bi_fm_index_cursor.hpp:946
bool cycle_back() noexcept
Tries to replace the rightmost character of the query by the next lexicographically larger character ...
Definition bi_fm_index_cursor.hpp:681
bool cycle_front() noexcept
Tries to replace the leftmost character of the query by the next lexicographically larger character s...
Definition bi_fm_index_cursor.hpp:741
auto lazy_locate() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition bi_fm_index_cursor.hpp:1004
fwd_cursor to_fwd_cursor() const noexcept
Returns a unidirectional seqan3::fm_index_cursor on the original text. path_label() on the returned u...
Definition bi_fm_index_cursor.hpp:837
size_type query_length() const noexcept
Returns the depth of the cursor node in the implicit suffix tree, i.e. the length of the sequence sea...
Definition bi_fm_index_cursor.hpp:809
bool extend_right(char_type const *cstring) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition bi_fm_index_cursor.hpp:463
size_type last_rank() noexcept
Outputs the rightmost respectively leftmost rank depending on whether extend_right() or extend_left()...
Definition bi_fm_index_cursor.hpp:790
index_t index_type
Type of the index.
Definition bi_fm_index_cursor.hpp:55
typename index_type::size_type size_type
Type for representing positions in the indexed text.
Definition bi_fm_index_cursor.hpp:61
bool operator!=(bi_fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition bi_fm_index_cursor.hpp:304
std::vector< std::pair< size_type, size_type > > locate() const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition bi_fm_index_cursor.hpp:961
bi_fm_index_cursor() noexcept=default
Default constructor. Accessing member functions on a default constructed object is undefined behavior...
auto lazy_locate() const
Locates the occurrences of the searched query in the text on demand, i.e. a std::ranges::view is retu...
Definition bi_fm_index_cursor.hpp:990
bool extend_right(seq_t &&seq) noexcept
Tries to extend the query by seq to the right.
Definition bi_fm_index_cursor.hpp:536
bool extend_right(char_t const c) noexcept
Tries to extend the query by the character c to the right.
Definition bi_fm_index_cursor.hpp:432
bool extend_left(char_t const c) noexcept
Tries to extend the query by the character c to the left.
Definition bi_fm_index_cursor.hpp:483
A "pretty printer" for most SeqAn data structures and related types.
Definition debug_stream_type.hpp:79
The SeqAn FM Index Cursor.
Definition fm_index_cursor.hpp:84
The SeqAn FM Index.
Definition fm_index.hpp:186
Provides various transformation traits used by the range module.
Provides the unidirectional seqan3::fm_index.
Provides the seqan3::fm_index_cursor for searching in the unidirectional seqan3::fm_index.
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition alphabet/concept.hpp:152
@ seq
The "sequence", usually a range of nucleotides or amino acids.
text_layout
The possible text layouts (single, collection) the seqan3::fm_index and seqan3::bi_fm_index can suppo...
Definition search/fm_index/concept.hpp:70
@ single
The text is a single range.
Definition search/fm_index/concept.hpp:72
@ collection
The text is a range of ranges.
Definition search/fm_index/concept.hpp:74
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition slice.hpp:137
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
Provides seqan3::views::slice.
Provides alphabet adaptations for standard uint types.