17 #include <type_traits>
19 #include <sdsl/suffix_trees.hpp>
53 return lhs.begin_position == rhs.begin_position && lhs.end_position == rhs.end_position;
88 template <
typename index_t>
106 using node_type = detail::fm_index_cursor_node<index_t>;
108 using sdsl_char_type =
typename index_type::sdsl_char_type;
110 using sdsl_index_type =
typename index_t::sdsl_index_type;
112 using sdsl_sigma_type =
typename index_type::sdsl_sigma_type;
114 using index_alphabet_type =
typename index_t::alphabet_type;
130 sdsl_sigma_type sigma{};
132 template <
typename _index_t>
133 friend class bi_fm_index_cursor;
143 bool backward_search(sdsl_index_type
const & csa,
144 sdsl_char_type
const c,
148 assert(l <= r && r < csa.size());
153 if constexpr(!std::same_as<index_alphabet_type, sdsl::plain_byte_alphabet>)
155 cc = csa.char2comp[c];
156 if (cc == 0 && c > 0)
161 if (l == 0 && r + 1 == csa.size())
164 _r = csa.C[cc + 1] - 1;
169 _l = c_begin + csa.bwt.rank(l, c);
170 _r = c_begin + csa.bwt.rank(r + 1, c) - 1;
177 assert(r + 1 - l >= 0);
201 node({0, _index.index.size() - 1, 0, 0}),
202 sigma(_index.index.sigma - index_t::text_layout_mode)
204 assert(_index.index.size() != 0);
222 assert(index !=
nullptr);
223 assert(node != rhs.node || (
query_length() == 0 || (parent_lb == rhs.parent_lb && parent_rb == rhs.parent_rb)));
227 return node == rhs.node;
244 assert(index !=
nullptr);
246 return !(*
this == rhs);
270 assert(index !=
nullptr);
272 sdsl_char_type c = 1;
274 while (c < sigma && !backward_search(index->index, index->index.comp2char[c], _lb, _rb))
283 node = {_lb, _rb, node.depth + 1, c};
302 template <
typename char_t>
304 requires std::convertible_to<char_t, index_alphabet_type>
308 assert(index !=
nullptr);
316 sdsl_char_type c_char =
seqan3::to_rank(
static_cast<index_alphabet_type
>(c)) + 1;
318 if (backward_search(index->index, c_char, _lb, _rb))
322 node = {_lb, _rb, node.depth + 1, c_char};
329 template <
typename char_type>
331 requires detail::is_char_adaptation_v<char_type>
354 template <std::ranges::range seq_t>
357 static_assert(std::ranges::forward_range<seq_t>,
"The query must model forward_range.");
359 "The alphabet of the sequence must be convertible to the alphabet of the index.");
361 assert(index !=
nullptr);
364 size_type new_parent_lb = parent_lb, new_parent_rb = parent_rb;
369 for (
auto it = std::ranges::begin(
seq); it != std::ranges::end(
seq); ++len, ++it)
380 if (!backward_search(index->index, c, _lb, _rb))
384 parent_lb = new_parent_lb;
385 parent_rb = new_parent_rb;
386 node = {_lb, _rb, len + node.depth, c};
420 assert(parent_lb <= parent_rb);
422 sdsl_char_type c = node.last_char + 1;
423 size_type _lb = parent_lb, _rb = parent_rb;
425 while (c < sigma && !backward_search(index->index, index->index.comp2char[c], _lb, _rb))
432 node = {_lb, _rb, node.depth, c};
456 assert(index !=
nullptr &&
query_length() > 0 && parent_lb <= parent_rb);
458 return index->index.comp2char[node.last_char] - 1;
478 assert(index !=
nullptr);
480 return {node.lb, node.rb + 1};
503 assert(index !=
nullptr);
504 assert(node.depth != 0 || (node.lb == 0 && node.rb == index->size() - 1));
526 template <std::ranges::range text_t>
532 static_assert(std::ranges::input_range<text_t>,
"The text must model input_range.");
533 static_assert(range_dimension_v<text_t> == 1,
"The input cannot be a text collection.");
535 "The alphabet types of the given text and index differ.");
536 assert(index !=
nullptr);
538 size_type const query_begin = offset() - index->index[node.lb];
543 template <std::ranges::range text_t>
549 static_assert(std::ranges::input_range<text_t>,
"The text collection must model input_range.");
550 static_assert(range_dimension_v<text_t> == 2,
"The input must be a text collection.");
552 "The alphabet types of the given text and index differ.");
553 assert(index !=
nullptr);
556 size_type const location = offset() - index->index[node.lb];
560 size_type const rank = index->text_begin_rs.rank(location + 1);
565 size_type const start_location = index->text_begin_ss.select(rank);
567 size_type const query_begin = location - start_location;
586 assert(index !=
nullptr);
588 return 1 + node.rb - node.lb;
607 assert(index !=
nullptr);
613 occ.emplace_back(0, offset() - index->index[node.lb + i]);
625 assert(index !=
nullptr);
631 size_type loc = offset() - index->index[node.lb + i];
632 size_type sequence_rank = index->text_begin_rs.rank(loc + 1);
633 size_type sequence_position = loc - index->text_begin_ss.select(sequence_rank);
656 assert(index !=
nullptr);
658 return std::views::iota(node.lb, node.lb +
count())
671 assert(index !=
nullptr);
673 return std::views::iota(node.lb, node.lb +
count())
676 auto loc = _offset - index->index[sa_pos];
677 size_type sequence_rank = index->text_begin_rs.rank(loc + 1);
678 size_type sequence_position = loc - index->text_begin_ss.select(sequence_rank);
690 template <cereal_archive archive_t>
691 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
Core alphabet concept and free function/type trait wrappers.
Provides alphabet adaptations for standard char types.
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:90
bool extend_right(seq_t &&seq) noexcept
Tries to extend the query by seq to the right.
Definition: fm_index_cursor.hpp:355
locate_result_type locate() const
Locates the occurrences of the searched query in the text.
Definition: fm_index_cursor.hpp:602
bool extend_right(char_t const c) noexcept
Tries to extend the query by the character c to the right.
Definition: fm_index_cursor.hpp:306
auto path_label(text_t &&text) const noexcept
Returns the searched query.
Definition: fm_index_cursor.hpp:527
bool cycle_back() noexcept
Tries to replace the rightmost character of the query by the next lexicographically larger character ...
Definition: fm_index_cursor.hpp:416
bool operator!=(fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition: fm_index_cursor.hpp:242
seqan3::suffix_array_interval suffix_array_interval() const noexcept
Returns the half-open suffix array interval.
Definition: fm_index_cursor.hpp:476
bool operator==(fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition: fm_index_cursor.hpp:220
size_type count() const noexcept
Counts the number of occurrences of the searched query in the text.
Definition: fm_index_cursor.hpp:584
size_type last_rank() const noexcept
Outputs the rightmost rank.
Definition: fm_index_cursor.hpp:453
bool extend_right() noexcept
Tries to extend the query by the smallest possible character to the right such that the query is foun...
Definition: fm_index_cursor.hpp:266
size_type query_length() const noexcept
Returns the length of the searched query.
Definition: fm_index_cursor.hpp:501
index_t index_type
Type of the index.
Definition: fm_index_cursor.hpp:96
typename index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: fm_index_cursor.hpp:98
auto lazy_locate() const
Locates the occurrences of the searched query in the text on demand, i.e. a std::ranges::view is retu...
Definition: fm_index_cursor.hpp:651
bool extend_right(char_type const *cstring) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: fm_index_cursor.hpp:333
fm_index_cursor() noexcept=default
Default constructor. Accessing member functions on a default constructed object is undefined behavior...
Provides various transformation traits used by the range module.
Provides the internal representation of a node of the seqan3::fm_index_cursor.
T emplace_back(T... args)
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
typename range_innermost_value< t >::type range_innermost_value_t
Shortcut for seqan3::range_innermost_value (transformation_trait shortcut).
Definition: type_traits.hpp:242
@ seq
The "sequence", usually a range of nucleotides or amino acids.
text_layout
The possible text layouts (single, collection) the seqan3::fm_index and seqan3::bi_fm_index can suppo...
Definition: concept.hpp:81
@ single
The text is a single range.
Definition: concept.hpp:83
@ collection
The text is a range of ranges.
Definition: concept.hpp:85
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:471
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:189
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Adaptations of concepts from the Ranges TS.
Provides the concept for seqan3::detail::sdsl_index.
Exposes the size_type of another type.
Definition: pre.hpp:296
The underlying suffix array interval.
Definition: fm_index_cursor.hpp:37
size_t end_position
The exclusive end position of the interval ("right boundary").
Definition: fm_index_cursor.hpp:41
size_t begin_position
The begin position of the interval ("left boundary").
Definition: fm_index_cursor.hpp:39
friend bool operator==(suffix_array_interval const &lhs, suffix_array_interval const &rhs) noexcept
Test for equality.
Definition: fm_index_cursor.hpp:51
friend bool operator!=(suffix_array_interval const &lhs, suffix_array_interval const &rhs) noexcept
Test for inequality.
Definition: fm_index_cursor.hpp:61
Provides seqan3::views::slice.