 |
SeqAn3
3.0.1
The Modern C++ library for sequence analysis.
|
|
Go to the documentation of this file.
15 #include <sdsl/suffix_trees.hpp>
35 template <
typename index_t>
36 class fm_index_cursor;
38 template <
typename index_t>
39 class bi_fm_index_cursor;
78 sdsl::csa_wt<sdsl::wt_blcd<sdsl::bit_vector,
79 sdsl::rank_support_v<>,
80 sdsl::select_support_scan<>,
81 sdsl::select_support_scan<0>>,
84 sdsl::sa_order_sa_sampling<>,
140 using sdsl_index_type = sdsl_index_type_;
145 using sdsl_char_type =
typename sdsl_index_type::alphabet_type::char_type;
147 using sdsl_sigma_type =
typename sdsl_index_type::alphabet_type::sigma_type;
151 sdsl_index_type index;
154 sdsl::sd_vector<> text_begin;
156 sdsl::select_support_sd<1> text_begin_ss;
158 sdsl::rank_support_sd<1> text_begin_rs;
179 template <std::ranges::range text_t>
183 void construct(text_t && text)
185 static_assert(std::ranges::bidirectional_range<text_t>,
"The text must model bidirectional_range.");
188 "The alphabet of the text collection must be convertible to the alphabet of the index.");
189 static_assert(dimension_v<text_t> == 1,
"The input cannot be a text collection.");
192 if (std::ranges::begin(text) == std::ranges::end(text))
195 constexpr
auto sigma = alphabet_size<alphabet_t>;
202 sdsl::int_vector<8> tmp_text(std::ranges::distance(text));
208 if constexpr (sigma == 256)
212 "character alphabets the last one/two values are reserved"
213 "(single sequence/collection).");
217 | std::views::reverse,
218 std::ranges::begin(tmp_text));
220 sdsl::construct_im(index, tmp_text, 0);
229 template <std::ranges::range text_t>
233 void construct(text_t && text)
235 static_assert(std::ranges::bidirectional_range<text_t>,
"The text collection must model bidirectional_range.");
237 "The elements of the text collection must model bidirectional_range.");
240 "The alphabet of the text collection must be convertible to the alphabet of the index.");
241 static_assert(dimension_v<text_t> == 2,
"The input must be a text collection.");
244 if (std::ranges::begin(text) == std::ranges::end(text))
250 bool all_empty =
true;
252 for (
auto && t : text)
254 if (std::ranges::begin(t) != std::ranges::end(t))
258 text_size += 1 + std::ranges::distance(t);
264 constexpr
auto sigma = alphabet_size<alphabet_t>;
269 sdsl::sd_vector_builder builder(text_size, std::ranges::distance(text));
270 size_t prefix_sum{0};
272 for (
auto && t : text)
274 builder.set(prefix_sum);
275 prefix_sum += std::ranges::distance(t) + 1;
278 text_begin = sdsl::sd_vector<>(builder);
279 text_begin_ss = sdsl::select_support_sd<1>(&text_begin);
280 text_begin_rs = sdsl::rank_support_sd<1>(&text_begin);
283 sdsl::int_vector<8> tmp_text(text_size - (std::ranges::distance(text) > 1));
285 constexpr uint8_t delimiter = sigma >= 255 ? 255 : sigma + 1;
294 if constexpr (sigma >= 255)
298 " for full character alphabets the last one/"
299 "two values are reserved (single sequence/"
305 | views::join(delimiter),
306 std::ranges::begin(tmp_text));
310 if (std::ranges::distance(text) == 1)
311 tmp_text.back() = delimiter;
313 std::ranges::reverse(tmp_text);
315 sdsl::construct_im(index, tmp_text, 0);
333 template <
typename bi_fm_index_t>
336 template <
typename fm_index_t>
339 template <
typename fm_index_t>
340 friend class detail::fm_index_cursor_node;
349 index{rhs.index}, text_begin{rhs.text_begin}, text_begin_ss{rhs.text_begin_ss}, text_begin_rs{rhs.text_begin_rs}
351 text_begin_ss.set_vector(&text_begin);
352 text_begin_rs.set_vector(&text_begin);
358 text_begin_rs{
std::move(rhs.text_begin_rs)}
360 text_begin_ss.set_vector(&text_begin);
361 text_begin_rs.set_vector(&text_begin);
369 text_begin_ss =
std::move(rhs.text_begin_ss);
370 text_begin_rs =
std::move(rhs.text_begin_rs);
372 text_begin_ss.set_vector(&text_begin);
373 text_begin_rs.set_vector(&text_begin);
388 template <std::ranges::range text_t>
391 construct(std::forward<text_t>(text));
441 return (index == rhs.index) && (text_begin == rhs.text_begin);
457 return !(*
this == rhs);
486 template <cereal_archive archive_t>
487 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
491 archive(text_begin_ss);
492 text_begin_ss.set_vector(&text_begin);
493 archive(text_begin_rs);
494 text_begin_rs.set_vector(&text_begin);
496 auto sigma = alphabet_size<alphabet_t>;
498 if (sigma != alphabet_size<alphabet_t>)
501 " but it is being read into an fm_index with an alphabet of size " +
510 (tmp ?
"text collection" :
"single text") +
511 " but it is being read into an fm_index expecting a " +
522 template <std::ranges::range text_t>
The text is a single range.
Definition: concept.hpp:84
Provides various shortcuts for common std::ranges functions.
bool empty() const noexcept
Checks whether the index is empty.
Definition: fm_index.hpp:422
The SeqAn FM Index.
Definition: fm_index.hpp:134
typename reference< t >::type reference_t
Shortcut for seqan3::reference (transformation_trait shortcut).
Definition: pre.hpp:77
fm_index()=default
Defaulted.
Provides the internal representation of a node of the seqan3::fm_index_cursor.
fm_index(fm_index const &rhs)
When copy constructing, also update internal data structures.
Definition: fm_index.hpp:348
const auto move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
const auto to_rank
A view that calls seqan3::to_rank() on each element in the input range.
Definition: to_rank.hpp:67
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
static constexpr text_layout text_layout_mode
Indicates whether index is built over a collection.
Definition: fm_index.hpp:320
Adaptations of algorithms from the Ranges TS.
fm_index(text_t &&text)
Constructor that immediately constructs the index given a range. The range cannot be empty.
Definition: fm_index.hpp:389
text_layout
The possible text layouts (single, collection) the seqan3::fm_index and seqan3::bi_fm_index can suppo...
Definition: concept.hpp:81
The text is a range of ranges.
Definition: concept.hpp:86
Provides seqan3::views::to.
alphabet_t alphabet_type
The type of the underlying character of the indexed text.
Definition: fm_index.hpp:326
bool operator!=(fm_index const &rhs) const noexcept
Compares two indices.
Definition: fm_index.hpp:455
Provides various transformation traits used by the range module.
Provides the seqan3::fm_index_cursor for searching in the unidirectional seqan3::fm_index.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:36
typename sdsl_index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: fm_index.hpp:328
The concept std::convertible_to<From, To> specifies that an expression of the type and value category...
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: fm_index.hpp:406
~fm_index()=default
Defaulted.
fm_index(fm_index &&rhs)
When move constructing, also update internal data structures.
Definition: fm_index.hpp:356
sdsl_wt_index_type default_sdsl_index_type
The default FM Index Configuration.
Definition: fm_index.hpp:92
Provides seqan3::views::join.
Adaptations of concepts from the Ranges TS.
Byte alphabet that does no mapping of char_type to comp_char_type and vice versa.
Definition: csa_alphabet_strategy.hpp:37
The SeqAn Bidirectional FM Index Cursor.
Definition: bi_fm_index_cursor.hpp:55
fm_index & operator=(fm_index rhs)
When copy/move assigning, also update internal data structures.
Definition: fm_index.hpp:365
Provides the concepts for seqan3::fm_index and seqan3::bi_fm_index and its traits and cursors.
typename innermost_value_type< t >::type innermost_value_type_t
Shortcut for seqan3::innermost_value_type (transformation_trait shortcut).
Definition: range.hpp:191
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:59
fm_index(text_t &&) -> fm_index< innermost_value_type_t< text_t >, text_layout
Deduces the alphabet and dimensions of the text.
Definition: fm_index.hpp:524
seqan3::type_list< trait_t< pack_t >... > transform
Apply a transformation trait to every type in the pack and return a seqan3::type_list of the results.
Definition: traits.hpp:307
cursor_type begin() const noexcept
Returns a seqan3::fm_index_cursor on the index that can be used for searching.
Definition: fm_index.hpp:474
The basis for seqan3::alphabet, but requires only rank interface (not char).
Provides an alphabet mapping that implements an identity map (i.e. each character is mapped to its ra...
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:706
sdsl::csa_wt< sdsl::wt_blcd< sdsl::bit_vector, sdsl::rank_support_v<>, sdsl::select_support_scan<>, sdsl::select_support_scan< 0 > >, 16, 10000000, sdsl::sa_order_sa_sampling<>, sdsl::isa_sampling<>, sdsl::plain_byte_alphabet > sdsl_wt_index_type
The FM Index Configuration using a Wavelet Tree.
Definition: fm_index.hpp:86
bool operator==(fm_index const &rhs) const noexcept
Compares two indices.
Definition: fm_index.hpp:438
Provides seqan3::views::to_rank.