SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
seqan3::fm_index_cursor< index_t > Class Template Reference

The SeqAn FM Index Cursor. More...

#include <seqan3/search/fm_index/fm_index_cursor.hpp>

+ Inheritance diagram for seqan3::fm_index_cursor< index_t >:

Public Member Functions

Constructors, destructor and assignment
 fm_index_cursor () noexcept=default
 Default constructor. Accessing member functions on a default constructed object is undefined behavior.
 
 fm_index_cursor (fm_index_cursor const &) noexcept=default
 Defaulted.
 
fm_index_cursoroperator= (fm_index_cursor const &) noexcept=default
 Defaulted.
 
 fm_index_cursor (fm_index_cursor &&) noexcept=default
 Defaulted.
 
fm_index_cursoroperator= (fm_index_cursor &&) noexcept=default
 Defaulted.
 
 ~fm_index_cursor ()=default
 Defaulted.
 
 fm_index_cursor (index_t const &_index) noexcept
 Construct from given index.
 
bool operator== (fm_index_cursor const &rhs) const noexcept
 Compares two cursors.
 
bool operator!= (fm_index_cursor const &rhs) const noexcept
 Compares two cursors.
 
bool extend_right () noexcept
 Tries to extend the query by the smallest possible character to the right such that the query is found in the text. Goes down the leftmost (i.e. lexicographically smallest) edge. .
 
template<typename char_t >
requires std::convertible_to<char_t, index_alphabet_type>
bool extend_right (char_t const c) noexcept
 Tries to extend the query by the character c to the right.
 
template<typename char_type >
requires detail::is_char_adaptation_v<char_type>
bool extend_right (char_type const *cstring) noexcept
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<std::ranges::range seq_t>
bool extend_right (seq_t &&seq) noexcept
 Tries to extend the query by seq to the right.
 
bool cycle_back () noexcept
 Tries to replace the rightmost character of the query by the next lexicographically larger character such that the query is found in the text. Moves the cursor to the right sibling of the current suffix tree node. It would be equivalent to going up an edge and going down that edge with the smallest character that is larger than the previous searched character. Calling cycle_back() on an cursor pointing to the root node is undefined behaviour! .
 
size_type last_rank () const noexcept
 Outputs the rightmost rank.
 
seqan3::suffix_array_interval suffix_array_interval () const noexcept
 Returns the half-open suffix array interval.
 
size_type query_length () const noexcept
 Returns the length of the searched query. Returns the depth of the cursor node in the implicit suffix tree. .
 
template<std::ranges::range text_t>
requires (index_t::text_layout_mode == text_layout::single)
auto path_label (text_t &&text) const noexcept
 Returns the searched query.
 
template<std::ranges::range text_t>
requires (index_t::text_layout_mode == text_layout::collection)
auto path_label (text_t &&text) const noexcept
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
size_type count () const noexcept
 Counts the number of occurrences of the searched query in the text.
 
locate_result_type locate () const
 Locates the occurrences of the searched query in the text.
 
locate_result_type locate () const
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
auto lazy_locate () const
 Locates the occurrences of the searched query in the text on demand, i.e. a std::ranges::view is returned and every position is located once it is accessed.
 
auto lazy_locate () const
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<cereal_archive archive_t>
void serialize (archive_t &archive)
 Serialisation support function.
 

Private Member Functions

bool backward_search (sdsl_index_type const &csa, sdsl_char_type const c, size_type &l, size_type &r) const noexcept
 Optimized backward search without alphabet mapping.
 
size_type offset () const noexcept
 Helper function to recompute text positions since the indexed text is reversed.
 

Private Attributes

index_type const * index {nullptr}
 Underlying FM index.
 
node_type node {}
 Underlying index from the SDSL.
 
size_type parent_lb {}
 Left suffix array interval of the parent node. Needed for cycle_back().
 
size_type parent_rb {}
 Right suffix array interval of the parent node. Needed for cycle_back().
 
sdsl_sigma_type sigma {}
 Alphabet size of the index without delimiters.
 

Friends

template<typename _index_t >
class bi_fm_index_cursor
 

Member types

using index_type = index_t
 Type of the index.
 
using size_type = typename index_type::size_type
 Type for representing positions in the indexed text.
 
using node_type = detail::fm_index_cursor_node< index_t >
 Type of the representation of a suffix tree node.
 
using sdsl_char_type = typename index_type::sdsl_char_type
 Type of the representation of characters in the underlying SDSL index.
 
using sdsl_index_type = typename index_t::sdsl_index_type
 Type of the SDSL index.
 
using sdsl_sigma_type = typename index_type::sdsl_sigma_type
 Type of the alphabet size in the underlying SDSL index.
 
using index_alphabet_type = typename index_t::alphabet_type
 Alphabet type of the index.
 
using locate_result_value_type = std::pair< size_type, size_type >
 The result value type when calling locate, a pair of reference id and reference position.
 
using locate_result_type = std::vector< locate_result_value_type >
 The result vector type when calling locate.
 

Detailed Description

template<typename index_t>
class seqan3::fm_index_cursor< index_t >

The SeqAn FM Index Cursor.

Template Parameters
index_tThe type of the underlying index. This is normally seqan3::fm_index.

The cursor's interface provides searching a string from left to right in the indexed text. All methods modifying the cursor (e.g. extending by a character with extend_right()) return a bool value whether the operation was successful or not. In case of an unsuccessful operation the cursor remains unmodified, i.e. a cursor can never be in an invalid state except default constructed cursors that are always invalid.

The behaviour is equivalent to a suffix tree with the space and time efficiency of the underlying pure FM index. The cursor traverses the implicit suffix tree beginning at the root node. The implicit suffix tree is not compacted, i.e. going down an edge using extend_right(char) will increase the query by only one character.

The asymptotic running times for using the cursor depend on the SDSL index configuration. To determine the exact running times, you have to additionally look up the running times of the used traits (configuration).

Constructor & Destructor Documentation

◆ fm_index_cursor()

template<typename index_t >
seqan3::fm_index_cursor< index_t >::fm_index_cursor ( )
defaultnoexcept

Default constructor. Accessing member functions on a default constructed object is undefined behavior.

Defaulted.

Member Function Documentation

◆ count()

template<typename index_t >
size_type seqan3::fm_index_cursor< index_t >::count ( ) const
inlinenoexcept

Counts the number of occurrences of the searched query in the text.

Returns
Number of occurrences of the searched query in the text.

Complexity

Constant.

Exceptions

No-throw guarantee.

◆ cycle_back()

template<typename index_t >
bool seqan3::fm_index_cursor< index_t >::cycle_back ( )
inlinenoexcept

Tries to replace the rightmost character of the query by the next lexicographically larger character such that the query is found in the text. Moves the cursor to the right sibling of the current suffix tree node. It would be equivalent to going up an edge and going down that edge with the smallest character that is larger than the previous searched character. Calling cycle_back() on an cursor pointing to the root node is undefined behaviour! .

Returns
true if there exists a query in the text where the rightmost character of the query is lexicographically larger than the current rightmost character of the query.

Example

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <vector>
int main()
{
using namespace seqan3::literals;
std::vector<seqan3::dna4> genome{"AATAATAAC"_dna4};
seqan3::fm_index index{genome}; // build the index
auto cur = index.cursor(); // create a cursor
// cur.cycle_back(); // cycle_back on begin() is undefined behaviour!
cur.extend_right("AAC"_dna4); // search the sequence "AAC"
seqan3::debug_stream << cur.path_label(genome) << '\n'; // prints "AAC"
seqan3::debug_stream << cur.last_rank() << '\n'; // prints 1
seqan3::debug_stream << cur.query_length() << '\n'; // prints 3
auto [left_bound, right_bound] = cur.suffix_array_interval(); // Get the half-open suffix array interval.
seqan3::debug_stream << '[' << left_bound << ',' << right_bound << ")\n"; // prints "[7,8)"
cur.cycle_back(); // search the sequence "AAT"
seqan3::debug_stream << cur.path_label(genome) << '\n'; // prints "AAT"
seqan3::debug_stream << cur.last_rank() << '\n'; // prints 3
seqan3::debug_stream << cur.query_length() << '\n'; // prints 3
auto interval = cur.suffix_array_interval(); // Get the half-open suffix array interval.
seqan3::debug_stream << '[' << interval.begin_position << ',' << interval.end_position << ")\n"; // prints "[8,10)"
cur.cycle_back(); // "cur" doesn't change because the rightmost char
// is already the largest dna4 char.
seqan3::debug_stream << cur.path_label(genome) << '\n'; // prints "AAT"
seqan3::debug_stream << cur.last_rank() << '\n'; // prints 3
seqan3::debug_stream << cur.query_length() << '\n'; // prints 3
auto && [lb, rb] = cur.suffix_array_interval(); // Get the half-open suffix array interval.
seqan3::debug_stream << '[' << lb << ',' << rb << ")\n"; // prints "[8,10)"
}
bool extend_right() noexcept
Tries to extend the query by the smallest possible character to the right such that the query is foun...
Definition fm_index_cursor.hpp:257
index_type const * index
Underlying FM index.
Definition fm_index_cursor.hpp:116
The SeqAn FM Index.
Definition fm_index.hpp:186
cursor_type cursor() const noexcept
Returns a seqan3::fm_index_cursor on the index that can be used for searching. Cursor is pointing to...
Definition fm_index.hpp:538
Provides seqan3::debug_stream and related types.
Provides seqan3::dna4, container aliases and string literals.
debug_stream_type debug_stream
A global instance of seqan3::debug_stream_type.
Definition debug_stream.hpp:37
The SeqAn namespace for literals.
Meta-header for the Search / FM Index submodule .

Complexity

\(O(\Sigma) * O(T_{BACKWARD\_SEARCH})\)

It scans linearly over the alphabet starting from the rightmost character until it finds the query with a larger rightmost character.

Exceptions

No-throw guarantee.

◆ extend_right() [1/3]

template<typename index_t >
bool seqan3::fm_index_cursor< index_t >::extend_right ( )
inlinenoexcept

Tries to extend the query by the smallest possible character to the right such that the query is found in the text. Goes down the leftmost (i.e. lexicographically smallest) edge. .

Returns
true if the cursor could extend the query successfully.

Complexity

\(O(\Sigma) * O(T_{BACKWARD\_SEARCH})\)

It scans linearly over the alphabet until it finds the smallest character that is represented by an edge.

Exceptions

No-throw guarantee.

◆ extend_right() [2/3]

template<typename index_t >
template<typename char_t >
requires std::convertible_to<char_t, index_alphabet_type>
bool seqan3::fm_index_cursor< index_t >::extend_right ( char_t const  c)
inlinenoexcept

Tries to extend the query by the character c to the right.

Template Parameters
char_tType of the character needs to be convertible to the character type char_type of the index.
Parameters
[in]cCharacter to extend the query with to the right.
Returns
true if the cursor could extend the query successfully.

Complexity

\(O(T_{BACKWARD\_SEARCH})\)

Exceptions

No-throw guarantee.

◆ extend_right() [3/3]

template<typename index_t >
template<std::ranges::range seq_t>
bool seqan3::fm_index_cursor< index_t >::extend_right ( seq_t &&  seq)
inlinenoexcept

Tries to extend the query by seq to the right.

Template Parameters
seq_tThe type of range of the sequence to search; must model std::ranges::forward_range.
Parameters
[in]seqSequence to extend the query with to the right.
Returns
true if the cursor could extend the query successfully.

If extending fails in the middle of the sequence, all previous computations are rewound to restore the cursor's state before calling this method.

Complexity

\(|seq| * O(T_{BACKWARD\_SEARCH})\)

Exceptions

No-throw guarantee.

◆ last_rank()

template<typename index_t >
size_type seqan3::fm_index_cursor< index_t >::last_rank ( ) const
inlinenoexcept

Outputs the rightmost rank.

Returns
Rightmost rank.

Example

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <vector>
int main()
{
using namespace seqan3::literals;
std::vector<seqan3::dna4> genome{"AATAATAAC"_dna4};
seqan3::fm_index index{genome}; // build the index
auto cur = index.cursor(); // create a cursor
// cur.cycle_back(); // cycle_back on begin() is undefined behaviour!
cur.extend_right("AAC"_dna4); // search the sequence "AAC"
seqan3::debug_stream << cur.path_label(genome) << '\n'; // prints "AAC"
seqan3::debug_stream << cur.last_rank() << '\n'; // prints 1
seqan3::debug_stream << cur.query_length() << '\n'; // prints 3
auto [left_bound, right_bound] = cur.suffix_array_interval(); // Get the half-open suffix array interval.
seqan3::debug_stream << '[' << left_bound << ',' << right_bound << ")\n"; // prints "[7,8)"
cur.cycle_back(); // search the sequence "AAT"
seqan3::debug_stream << cur.path_label(genome) << '\n'; // prints "AAT"
seqan3::debug_stream << cur.last_rank() << '\n'; // prints 3
seqan3::debug_stream << cur.query_length() << '\n'; // prints 3
auto interval = cur.suffix_array_interval(); // Get the half-open suffix array interval.
seqan3::debug_stream << '[' << interval.begin_position << ',' << interval.end_position << ")\n"; // prints "[8,10)"
cur.cycle_back(); // "cur" doesn't change because the rightmost char
// is already the largest dna4 char.
seqan3::debug_stream << cur.path_label(genome) << '\n'; // prints "AAT"
seqan3::debug_stream << cur.last_rank() << '\n'; // prints 3
seqan3::debug_stream << cur.query_length() << '\n'; // prints 3
auto && [lb, rb] = cur.suffix_array_interval(); // Get the half-open suffix array interval.
seqan3::debug_stream << '[' << lb << ',' << rb << ")\n"; // prints "[8,10)"
}

Complexity

Constant.

Exceptions

No-throw guarantee.

◆ lazy_locate()

template<typename index_t >
auto seqan3::fm_index_cursor< index_t >::lazy_locate ( ) const
inline

Locates the occurrences of the searched query in the text on demand, i.e. a std::ranges::view is returned and every position is located once it is accessed.

Returns
Positions in the text.

Complexity

\(count() * O(T_{BACKWARD\_SEARCH} * SAMPLING\_RATE)\)

Exceptions

Strong exception guarantee (no data is modified in case an exception is thrown).

◆ locate()

template<typename index_t >
locate_result_type seqan3::fm_index_cursor< index_t >::locate ( ) const
inline

Locates the occurrences of the searched query in the text.

Returns
Positions in the text.

Complexity

\(count() * O(T_{BACKWARD\_SEARCH} * SAMPLING\_RATE)\)

Exceptions

Strong exception guarantee (no data is modified in case an exception is thrown).

◆ operator!=()

template<typename index_t >
bool seqan3::fm_index_cursor< index_t >::operator!= ( fm_index_cursor< index_t > const &  rhs) const
inlinenoexcept

Compares two cursors.

Parameters
[in]rhsOther cursor to compare it to.
Returns
true if the cursors are not equal, false otherwise.

Complexity

Constant.

Exceptions

No-throw guarantee.

◆ operator==()

template<typename index_t >
bool seqan3::fm_index_cursor< index_t >::operator== ( fm_index_cursor< index_t > const &  rhs) const
inlinenoexcept

Compares two cursors.

Parameters
[in]rhsOther cursor to compare it to.
Returns
true if both cursors are equal, false otherwise.

Complexity

Constant.

Exceptions

No-throw guarantee.

◆ path_label()

template<typename index_t >
template<std::ranges::range text_t>
requires (index_t::text_layout_mode == text_layout::single)
auto seqan3::fm_index_cursor< index_t >::path_label ( text_t &&  text) const
inlinenoexcept

Returns the searched query.

Template Parameters
text_tThe type of the text used to build the index; must model std::ranges::input_range.
Parameters
[in]textText that was used to build the index.
Returns
Searched query.

Returns the concatenation of all edges from the root node to the cursors current node.

Complexity

\(O(SAMPLING\_RATE * T_{BACKWARD\_SEARCH}) + query\_length()\)

Exceptions

No-throw guarantee.

◆ query_length()

template<typename index_t >
size_type seqan3::fm_index_cursor< index_t >::query_length ( ) const
inlinenoexcept

Returns the length of the searched query. Returns the depth of the cursor node in the implicit suffix tree. .

Returns
Length of query.

Example

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <vector>
int main()
{
using namespace seqan3::literals;
std::vector<seqan3::dna4> genome{"AATAATAAC"_dna4};
seqan3::fm_index index{genome}; // build the index
auto cur = index.cursor(); // create a cursor
// cur.cycle_back(); // cycle_back on begin() is undefined behaviour!
cur.extend_right("AAC"_dna4); // search the sequence "AAC"
seqan3::debug_stream << cur.path_label(genome) << '\n'; // prints "AAC"
seqan3::debug_stream << cur.last_rank() << '\n'; // prints 1
seqan3::debug_stream << cur.query_length() << '\n'; // prints 3
auto [left_bound, right_bound] = cur.suffix_array_interval(); // Get the half-open suffix array interval.
seqan3::debug_stream << '[' << left_bound << ',' << right_bound << ")\n"; // prints "[7,8)"
cur.cycle_back(); // search the sequence "AAT"
seqan3::debug_stream << cur.path_label(genome) << '\n'; // prints "AAT"
seqan3::debug_stream << cur.last_rank() << '\n'; // prints 3
seqan3::debug_stream << cur.query_length() << '\n'; // prints 3
auto interval = cur.suffix_array_interval(); // Get the half-open suffix array interval.
seqan3::debug_stream << '[' << interval.begin_position << ',' << interval.end_position << ")\n"; // prints "[8,10)"
cur.cycle_back(); // "cur" doesn't change because the rightmost char
// is already the largest dna4 char.
seqan3::debug_stream << cur.path_label(genome) << '\n'; // prints "AAT"
seqan3::debug_stream << cur.last_rank() << '\n'; // prints 3
seqan3::debug_stream << cur.query_length() << '\n'; // prints 3
auto && [lb, rb] = cur.suffix_array_interval(); // Get the half-open suffix array interval.
seqan3::debug_stream << '[' << lb << ',' << rb << ")\n"; // prints "[8,10)"
}

Complexity

Constant.

Exceptions

No-throw guarantee.

◆ serialize()

template<typename index_t >
template<cereal_archive archive_t>
void seqan3::fm_index_cursor< index_t >::serialize ( archive_t &  archive)
inline

Serialisation support function.

Template Parameters
archive_tType of archive; must satisfy seqan3::cereal_archive.
Parameters
archiveThe archive being serialised from/to.
Attention
These functions are never called directly, see Serialisation for more details.

◆ suffix_array_interval()

template<typename index_t >
seqan3::suffix_array_interval seqan3::fm_index_cursor< index_t >::suffix_array_interval ( ) const
inlinenoexcept

Returns the half-open suffix array interval.

Returns
A seqan3::suffix_array_interval contains the half-open interval.

Example

// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
// SPDX-License-Identifier: CC0-1.0
#include <vector>
int main()
{
using namespace seqan3::literals;
std::vector<seqan3::dna4> genome{"AATAATAAC"_dna4};
seqan3::fm_index index{genome}; // build the index
auto cur = index.cursor(); // create a cursor
// cur.cycle_back(); // cycle_back on begin() is undefined behaviour!
cur.extend_right("AAC"_dna4); // search the sequence "AAC"
seqan3::debug_stream << cur.path_label(genome) << '\n'; // prints "AAC"
seqan3::debug_stream << cur.last_rank() << '\n'; // prints 1
seqan3::debug_stream << cur.query_length() << '\n'; // prints 3
auto [left_bound, right_bound] = cur.suffix_array_interval(); // Get the half-open suffix array interval.
seqan3::debug_stream << '[' << left_bound << ',' << right_bound << ")\n"; // prints "[7,8)"
cur.cycle_back(); // search the sequence "AAT"
seqan3::debug_stream << cur.path_label(genome) << '\n'; // prints "AAT"
seqan3::debug_stream << cur.last_rank() << '\n'; // prints 3
seqan3::debug_stream << cur.query_length() << '\n'; // prints 3
auto interval = cur.suffix_array_interval(); // Get the half-open suffix array interval.
seqan3::debug_stream << '[' << interval.begin_position << ',' << interval.end_position << ")\n"; // prints "[8,10)"
cur.cycle_back(); // "cur" doesn't change because the rightmost char
// is already the largest dna4 char.
seqan3::debug_stream << cur.path_label(genome) << '\n'; // prints "AAT"
seqan3::debug_stream << cur.last_rank() << '\n'; // prints 3
seqan3::debug_stream << cur.query_length() << '\n'; // prints 3
auto && [lb, rb] = cur.suffix_array_interval(); // Get the half-open suffix array interval.
seqan3::debug_stream << '[' << lb << ',' << rb << ")\n"; // prints "[8,10)"
}

Complexity

Constant.

Exceptions

No-throw guarantee.


The documentation for this class was generated from the following file:
Hide me