SeqAn3 3.1.0
The Modern C++ library for sequence analysis.
bi_fm_index.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <seqan3/std/filesystem>
16#include <seqan3/std/ranges>
17#include <utility>
18
22
23namespace seqan3
24{
25
57template <semialphabet alphabet_t,
58 text_layout text_layout_mode_,
59 detail::sdsl_index sdsl_index_type_ = default_sdsl_index_type>
61{
62private:
67 using sdsl_index_type = sdsl_index_type_;
68
70 using rev_sdsl_index_type = sdsl::csa_wt<sdsl_wt_index_type::wavelet_tree_type, // Wavelet tree type
71 10'000'000, // Sampling rate of the suffix array
72 10'000'000, // Sampling rate of the inverse suffix array
73 sdsl::sa_order_sa_sampling<>, // Text or SA based sampling for SA
74 sdsl::isa_sampling<>, // Text or ISA based sampling for ISA
75 sdsl_wt_index_type::alphabet_type>; // How to represent the alphabet
76
80 using sdsl_char_type = typename sdsl_index_type::alphabet_type::char_type;
81
83 using sdsl_sigma_type = typename sdsl_index_type::alphabet_type::sigma_type;
84
87
89 using rev_fm_index_type = detail::reverse_fm_index<alphabet_t, text_layout_mode_, rev_sdsl_index_type>;
91
93 fm_index_type fwd_fm;
94
96 rev_fm_index_type rev_fm;
97
117 template <std::ranges::range text_t>
118 void construct(text_t && text)
119 {
120 detail::fm_index_validator::validate<alphabet_t, text_layout_mode_>(text);
121
122 fwd_fm = fm_index_type{text};
123 rev_fm = rev_fm_index_type{text};
124 }
125
126public:
128 static constexpr text_layout text_layout_mode = text_layout_mode_;
129
136 using size_type = typename sdsl_index_type::size_type;
138
146
148
149 template <typename bi_fm_index_t>
150 friend class bi_fm_index_cursor;
151
155 bi_fm_index() = default;
156 bi_fm_index(bi_fm_index const &) = default;
157 bi_fm_index & operator=(bi_fm_index const &) = default;
158 bi_fm_index(bi_fm_index &&) = default;
160 ~bi_fm_index() = default;
161
170 template <std::ranges::range text_t>
171 bi_fm_index(text_t && text)
172 {
173 construct(std::forward<text_t>(text));
174 }
176
188 size_type size() const noexcept
189 {
190 return fwd_fm.size();
191 }
192
204 bool empty() const noexcept
205 {
206 return size() == 0;
207 }
208
220 bool operator==(bi_fm_index const & rhs) const noexcept
221 {
222 return std::tie(fwd_fm, rev_fm) == std::tie(rhs.fwd_fm, rhs.rev_fm);
223 }
224
236 bool operator!=(bi_fm_index const & rhs) const noexcept
237 {
238 return !(*this == rhs);
239 }
240
255 cursor_type cursor() const noexcept
256 {
257 return {*this};
258 }
259
273 {
274 return {fwd_fm};
275 }
276
284 template <cereal_archive archive_t>
285 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
286 {
287 archive(fwd_fm);
288 archive(rev_fm);
289 }
291};
292
297template <std::ranges::range text_t>
298bi_fm_index(text_t &&) -> bi_fm_index<range_innermost_value_t<text_t>, text_layout{range_dimension_v<text_t> != 1}>;
300
301} // namespace seqan3
Provides the seqan3::bi_fm_index_cursor for searching in the bidirectional seqan3::bi_fm_index.
The SeqAn Bidirectional FM Index Cursor.
Definition: bi_fm_index_cursor.hpp:55
The SeqAn Bidirectional FM Index.
Definition: bi_fm_index.hpp:61
bi_fm_index(text_t &&text)
Constructor that immediately constructs the index given a range. The range cannot be empty.
Definition: bi_fm_index.hpp:171
bi_fm_index(bi_fm_index &&)=default
Defaulted.
bi_fm_index & operator=(bi_fm_index &&)=default
Defaulted.
bi_fm_index()=default
Defaulted.
fwd_cursor_type fwd_cursor() const noexcept
Returns a unidirectional seqan3::fm_index_cursor on the original text of the bidirectional index that...
Definition: bi_fm_index.hpp:272
bool operator==(bi_fm_index const &rhs) const noexcept
Compares two indices.
Definition: bi_fm_index.hpp:220
bool operator!=(bi_fm_index const &rhs) const noexcept
Compares two indices.
Definition: bi_fm_index.hpp:236
static constexpr text_layout text_layout_mode
Indicates whether index is built over a collection.
Definition: bi_fm_index.hpp:128
cursor_type cursor() const noexcept
Returns a seqan3::bi_fm_index_cursor on the index that can be used for searching. .
Definition: bi_fm_index.hpp:255
typename sdsl_index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: bi_fm_index.hpp:136
bool empty() const noexcept
Checks whether the index is empty.
Definition: bi_fm_index.hpp:204
bi_fm_index & operator=(bi_fm_index const &)=default
Defaulted.
bi_fm_index(bi_fm_index const &)=default
Defaulted.
typename fm_index_type::alphabet_type alphabet_type
The type of the underlying character of the indexed text.
Definition: bi_fm_index.hpp:134
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: bi_fm_index.hpp:188
~bi_fm_index()=default
Defaulted.
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:87
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: fm_index.hpp:459
alphabet_t alphabet_type
The type of the underlying character of the indexed text.
Definition: fm_index.hpp:379
Provides various transformation traits used by the range module.
The <filesystem> header from C++17's standard library.
Provides the unidirectional seqan3::fm_index.
text_layout
The possible text layouts (single, collection) the seqan3::fm_index and seqan3::bi_fm_index can suppo...
Definition: concept.hpp:72
sdsl_wt_index_type default_sdsl_index_type
The default FM Index Configuration.
Definition: fm_index.hpp:148
The basis for seqan3::alphabet, but requires only rank interface (not char).
The main SeqAn3 namespace.
Definition: cigar_operation_table.hpp:2
bi_fm_index(text_t &&) -> bi_fm_index< range_innermost_value_t< text_t >, text_layout
Deduces the dimensions of the text.
Definition: bi_fm_index.hpp:298
The <ranges> header from C++20's standard library.
T tie(T... args)