16 #include <type_traits> 18 #include <sdsl/suffix_trees.hpp> 20 #include <range/v3/view/slice.hpp> 63 template <
typename index_t>
84 using node_type = detail::fm_index_cursor_node<index_t>;
87 using sdsl_char_type =
typename index_type::sdsl_char_type;
89 using sdsl_sigma_type =
typename index_type::sdsl_sigma_type;
101 sdsl_sigma_type sigma;
103 template <
typename _index_t>
116 template <detail::SdslIndex csa_t>
117 bool backward_search(csa_t
const & csa, sdsl_char_type
const c,
size_type & l,
size_type & r)
const noexcept
119 assert(l <= r && r < csa.size());
126 cc = csa.char2comp[c];
127 if (cc == 0 && c > 0)
132 if (l == 0 && r + 1 == csa.size())
135 _r = csa.C[cc + 1] - 1;
140 _l = c_begin + csa.bwt.rank(l, c);
141 _r = c_begin + csa.bwt.rank(r + 1, c) - 1;
148 assert(r + 1 - l >= 0);
170 fm_index_cursor(index_t const & _index) noexcept : index(&_index), node({0, _index.index.size() - 1, 0, 0}),
171 sigma(_index.index.sigma - index_t::is_collection_)
189 assert(index !=
nullptr);
190 assert(node != rhs.node || (
query_length() == 0 || (parent_lb == rhs.parent_lb && parent_rb == rhs.parent_rb)));
194 return node == rhs.node;
211 assert(index !=
nullptr);
213 return !(*
this == rhs);
237 assert(index !=
nullptr);
239 sdsl_char_type c = 1;
241 while (c < sigma && !backward_search(index->index, index->index.comp2char[c], _lb, _rb))
250 node = {_lb, _rb, node.depth + 1, c};
270 template <Alphabet
char_t>
273 assert(index !=
nullptr);
274 assert(index->sigma == alphabet_size<char_t>);
278 sdsl_char_type c_char =
to_rank(c) + 1;
280 if (backward_search(index->index, c_char, _lb, _rb))
284 node = {_lb, _rb, node.depth + 1, c_char};
306 template <std::ranges::Range seq_t>
310 assert(index !=
nullptr);
314 size_type new_parent_lb = parent_lb, new_parent_rb = parent_rb;
325 if (!backward_search(index->index, c, _lb, _rb))
329 parent_lb = new_parent_lb;
330 parent_rb = new_parent_rb;
331 node = {_lb, _rb, len + node.depth, c};
365 assert(parent_lb <= parent_rb);
367 sdsl_char_type c = node.last_char + 1;
368 size_type _lb = parent_lb, _rb = parent_rb;
370 while (c < sigma && !backward_search(index->index, index->index.comp2char[c], _lb, _rb))
377 node = {_lb, _rb, node.depth, c};
401 assert(index !=
nullptr &&
query_length() > 0 && parent_lb <= parent_rb);
403 return index->index.comp2char[node.last_char] - 1;
422 assert(index !=
nullptr);
423 assert(node.depth != 0 || (node.lb == 0 && node.rb == index->size() - 1));
445 template <std::ranges::Range text_t>
448 requires !index_t::is_collection_
452 static_assert(!(dimension_v<text_t> != 1),
"The input cannot be a text collection.");
453 assert(index !=
nullptr);
456 size_type const query_begin = offset() - index->index[node.lb];
461 template <std::ranges::Range text_t>
464 requires index_t::is_collection_
468 static_assert(!(dimension_v<text_t> != 2),
"The input must be a text collection.");
469 assert(index !=
nullptr);
472 size_type const loc = offset() - index->index[node.lb];
473 size_type const query_begin = loc - index->text_begin_rs.rank(loc + 1) + 1;
490 assert(index !=
nullptr);
492 return 1 + node.rb - node.lb;
508 requires !index_t::is_collection_
511 assert(index !=
nullptr);
516 occ[i] = offset() - index->index[node.lb + i];
524 requires index_t::is_collection_
527 assert(index !=
nullptr);
533 size_type loc = offset() - index->index[node.lb + i];
534 size_type sequence_rank = index->text_begin_rs.rank(loc + 1);
535 size_type sequence_position = loc - index->text_begin_ss.select(sequence_rank);
555 requires !index_t::is_collection_
558 assert(index !=
nullptr);
561 |
std::view::transform([*
this, _offset = offset()] (
auto sa_pos) {
return _offset - index->index[sa_pos]; });
567 requires index_t::is_collection_
570 assert(index !=
nullptr);
573 |
std::view::transform([*
this, _offset = offset()] (
auto sa_pos) {
return _offset - index->index[sa_pos]; })
576 size_type sequence_rank = index->text_begin_rs.rank(loc + 1);
577 size_type sequence_position = loc - index->text_begin_ss.select(sequence_rank);
fm_index_cursor() noexcept=default
Default constructor. Accessing member functions on a default constructed object is undefined behavior...
bool operator==(fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition: fm_index_cursor.hpp:187
typename value_type< t >::type value_type_t
Shortcut for seqan3::value_type (TransformationTrait shortcut).
Definition: pre.hpp:48
typename index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: fm_index_cursor.hpp:75
constexpr sequenced_policy seq
Global execution policy object for sequenced execution policy.
Definition: execution.hpp:54
bool extend_right(char_t const c) noexcept
Tries to extend the query by the character c to the right.
Definition: fm_index_cursor.hpp:271
bool operator!=(fm_index_cursor const &rhs) const noexcept
Compares two cursors.
Definition: fm_index_cursor.hpp:209
size_type last_rank() const noexcept
Outputs the rightmost rank.
Definition: fm_index_cursor.hpp:398
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:103
std::vector< size_type > locate() const
Locates the occurrences of the searched query in the text.
Definition: fm_index_cursor.hpp:506
size_type count() const noexcept
Counts the number of occurrences of the searched query in the text.
Definition: fm_index_cursor.hpp:488
Provides an alphabet mapping that implements an identity map (i.e. each character is mapped to its ra...
The main SeqAn3 namespace.
constexpr auto join
Flattens a View of ranges into a View.
Definition: ranges:683
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:144
index_t index_type
Type of the index.
Definition: fm_index_cursor.hpp:73
Meta-header for the alphabet module.
Adaptations of concepts from the Ranges TS.
typename innermost_value_type< t >::type innermost_value_type_t
Shortcut for seqan3::innermost_value_type (TransformationTrait shortcut).
Definition: range.hpp:191
::ranges::begin begin
Alias for ranges::begin. Returns an iterator to the beginning of a range.
Definition: ranges:174
Exposes the size_type of another type.
Definition: pre.hpp:188
constexpr auto iota
Generates a sequence of elements by repeatedly incrementing an initial value.
Definition: ranges:647
bool extend_right(seq_t &&seq) noexcept
Tries to extend the query by seq to the right.
Definition: fm_index_cursor.hpp:307
auto path_label(text_t &&text) const noexcept
Returns the searched query.
Definition: fm_index_cursor.hpp:446
index_type const * index
Type of the underlying FM index.
Definition: bi_fm_index_cursor.hpp:83
size_type query_length() const noexcept
Returns the length of the searched query.
Definition: fm_index_cursor.hpp:420
The SeqAn Bidirectional FM Index Cursor.
Definition: bi_fm_index_cursor.hpp:54
Provides seqan3::view::slice.
bool extend_right() noexcept
Tries to extend the query by the smallest possible character to the right such that the query is foun...
Definition: fm_index_cursor.hpp:233
std::vector< std::pair< size_type, size_type > > locate() const
Definition: fm_index_cursor.hpp:522
bool cycle_back() noexcept
Tries to replace the rightmost character of the query by the next lexicographically larger character ...
Definition: fm_index_cursor.hpp:361
Provides various transformation traits used by the range module.
Provides the internal representation of a node of the seqan3::fm_index_cursor.
Specifies requirements of a Range type for which begin returns a type that models std::ForwardIterato...
Provides the unidirectional seqan3::fm_index.
The concept std::Same<T, U> is satisfied if and only if T and U denote the same type.
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:678
auto lazy_locate() const
Locates the occurrences of the searched query in the text on demand, i.e. a ranges::view is returned ...
Definition: fm_index_cursor.hpp:553
::ranges::end end
Alias for ranges::end. Returns an iterator to the end of a range.
Definition: ranges:179
constexpr auto transform
A range adaptor that takes a invocable and returns a view of the elements with the invocable applied...
Definition: ranges:911
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:64
T emplace_back(T... args)