SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
bi_fm_index.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <utility>
16 
18 #include <seqan3/std/filesystem>
22 #include <seqan3/std/ranges>
23 
24 namespace seqan3
25 {
26 
61 template <semialphabet alphabet_t,
62  text_layout text_layout_mode_,
63  detail::sdsl_index sdsl_index_type_ = default_sdsl_index_type>
65 {
66 private:
70  using sdsl_index_type = sdsl_index_type_;
72 
74  using rev_sdsl_index_type = sdsl_index_type_;
75 
79  using sdsl_char_type = typename sdsl_index_type::alphabet_type::char_type;
80 
82  using sdsl_sigma_type = typename sdsl_index_type::alphabet_type::sigma_type;
83 
86 
90 
92  fm_index_type fwd_fm;
93 
95  rev_fm_index_type rev_fm;
96 
116  template <std::ranges::range text_t>
118  requires text_layout_mode_ == text_layout::single
120  void construct(text_t && text)
121  {
122  static_assert(std::ranges::bidirectional_range<text_t>, "The text must model bidirectional_range.");
123  static_assert(alphabet_size<innermost_value_type_t<text_t>> <= 256, "The alphabet is too big.");
124  static_assert(std::convertible_to<innermost_value_type_t<text_t>, alphabet_t>,
125  "The alphabet of the text collection must be convertible to the alphabet of the index.");
126  static_assert(dimension_v<text_t> == 1, "The input cannot be a text collection.");
127 
128  // text must not be empty
129  if (std::ranges::begin(text) == std::ranges::end(text))
130  throw std::invalid_argument("The text that is indexed cannot be empty.");
131 
132  auto rev_text = std::views::reverse(text);
133  fwd_fm = fm_index_type{text};
134  rev_fm = fm_index_type{rev_text};
135  }
136 
138  template <std::ranges::range text_t>
140  requires text_layout_mode_ == text_layout::collection
142  void construct(text_t && text)
143  {
144  static_assert(std::ranges::bidirectional_range<text_t>, "The text must model bidirectional_range.");
145  static_assert(std::ranges::bidirectional_range<reference_t<text_t>>,
146  "The elements of the text collection must model bidirectional_range.");
147  static_assert(alphabet_size<innermost_value_type_t<text_t>> <= 256, "The alphabet is too big.");
148  static_assert(std::convertible_to<innermost_value_type_t<text_t>, alphabet_t>,
149  "The alphabet of the text collection must be convertible to the alphabet of the index.");
150  static_assert(dimension_v<text_t> == 2, "The input must be a text collection.");
151 
152  // text must not be empty
153  if (std::ranges::begin(text) == std::ranges::end(text))
154  throw std::invalid_argument("The text that is indexed cannot be empty.");
155 
156  auto rev_text = text | views::deep{std::views::reverse} | std::views::reverse;
157 
158  fwd_fm = fm_index_type{text};
159  rev_fm = fm_index_type{rev_text};
160  }
161 
162 public:
164  static constexpr text_layout text_layout_mode = text_layout_mode_;
165 
172  using size_type = typename sdsl_index_type::size_type;
174 
184 
186 
187  template <typename bi_fm_index_t>
188  friend class bi_fm_index_cursor;
189 
193  bi_fm_index() = default;
194  bi_fm_index(bi_fm_index const &) = default;
195  bi_fm_index & operator=(bi_fm_index const &) = default;
196  bi_fm_index(bi_fm_index &&) = default;
197  bi_fm_index & operator=(bi_fm_index &&) = default;
198  ~bi_fm_index() = default;
199 
208  template <std::ranges::range text_t>
209  bi_fm_index(text_t && text)
210  {
211  construct(std::forward<text_t>(text));
212  }
214 
226  size_type size() const noexcept
227  {
228  return fwd_fm.size();
229  }
230 
242  bool empty() const noexcept
243  {
244  return size() == 0;
245  }
246 
258  bool operator==(bi_fm_index const & rhs) const noexcept
259  {
260  return std::tie(fwd_fm, rev_fm) == std::tie(rhs.fwd_fm, rhs.rev_fm);
261  }
262 
274  bool operator!=(bi_fm_index const & rhs) const noexcept
275  {
276  return !(*this == rhs);
277  }
278 
293  cursor_type begin() const noexcept
294  {
295  return {*this};
296  }
297 
310  fwd_cursor_type fwd_begin() const noexcept
311  {
312  return {fwd_fm};
313  }
314 
329  rev_cursor_type rev_begin() const noexcept
330  {
331  return {rev_fm};
332  }
333 
341  template <cereal_archive archive_t>
342  void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
343  {
344  archive(fwd_fm);
345  archive(rev_fm);
346  }
348 };
349 
353 template <std::ranges::range text_t>
355 bi_fm_index(text_t &&) -> bi_fm_index<innermost_value_type_t<text_t>, text_layout{dimension_v<text_t> != 1}>;
357 
359 
360 } // namespace seqan3
seqan3::single
The text is a single range.
Definition: concept.hpp:84
seqan3::bi_fm_index::empty
bool empty() const noexcept
Checks whether the index is empty.
Definition: bi_fm_index.hpp:242
utility
seqan3::fm_index< alphabet_t, text_layout_mode_, sdsl_index_type >
seqan3::bi_fm_index::fwd_begin
fwd_cursor_type fwd_begin() const noexcept
Returns a unidirectional seqan3::fm_index_cursor on the original text of the bidirectional index that...
Definition: bi_fm_index.hpp:310
seqan3::reference_t
typename reference< t >::type reference_t
Shortcut for seqan3::reference (transformation_trait shortcut).
Definition: pre.hpp:77
seqan3::bi_fm_index::~bi_fm_index
~bi_fm_index()=default
Defaulted.
seqan3::bi_fm_index::operator==
bool operator==(bi_fm_index const &rhs) const noexcept
Compares two indices.
Definition: bi_fm_index.hpp:258
seqan3::bi_fm_index::operator=
bi_fm_index & operator=(bi_fm_index const &)=default
Defaulted.
filesystem
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
seqan3::bi_fm_index
The SeqAn Bidirectional FM Index.
Definition: bi_fm_index.hpp:64
std::tie
T tie(T... args)
seqan3::bi_fm_index::alphabet_type
typename fm_index_type::alphabet_type alphabet_type
The type of the underlying character of the indexed text.
Definition: bi_fm_index.hpp:170
seqan3::text_layout
text_layout
The possible text layouts (single, collection) the seqan3::fm_index and seqan3::bi_fm_index can suppo...
Definition: concept.hpp:81
seqan3::collection
The text is a range of ranges.
Definition: concept.hpp:86
seqan3::bi_fm_index::size_type
typename sdsl_index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: bi_fm_index.hpp:172
seqan3::bi_fm_index
bi_fm_index(text_t &&) -> bi_fm_index< innermost_value_type_t< text_t >, text_layout
Deduces the dimensions of the text.
Definition: bi_fm_index.hpp:355
seqan3::bi_fm_index::text_layout_mode
static constexpr text_layout text_layout_mode
Indicates whether index is built over a collection.
Definition: bi_fm_index.hpp:164
seqan3::fm_index< alphabet_t, text_layout_mode_, sdsl_index_type >::alphabet_type
alphabet_t alphabet_type
The type of the underlying character of the indexed text.
Definition: fm_index.hpp:326
range.hpp
Provides various transformation traits used by the range module.
seqan3
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:36
persist.hpp
Provides seqan3::views::persist.
std::invalid_argument
convertible_to
The concept std::convertible_to<From, To> specifies that an expression of the type and value category...
seqan3::fm_index::size
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: fm_index.hpp:406
seqan3::default_sdsl_index_type
sdsl_wt_index_type default_sdsl_index_type
The default FM Index Configuration.
Definition: fm_index.hpp:92
ranges
Adaptations of concepts from the Ranges TS.
seqan3::bi_fm_index::bi_fm_index
bi_fm_index(text_t &&text)
Constructor that immediately constructs the index given a range. The range cannot be empty.
Definition: bi_fm_index.hpp:209
seqan3::bi_fm_index_cursor
The SeqAn Bidirectional FM Index Cursor.
Definition: bi_fm_index_cursor.hpp:55
seqan3::bi_fm_index::begin
cursor_type begin() const noexcept
Returns a seqan3::bi_fm_index_cursor on the index that can be used for searching.
Definition: bi_fm_index.hpp:293
seqan3::innermost_value_type_t
typename innermost_value_type< t >::type innermost_value_type_t
Shortcut for seqan3::innermost_value_type (transformation_trait shortcut).
Definition: range.hpp:191
seqan3::fm_index_cursor
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:59
bi_fm_index_cursor.hpp
Provides the seqan3::bi_fm_index_cursor for searching in the bidirectional seqan3::bi_fm_index.
seqan3::bi_fm_index::rev_begin
rev_cursor_type rev_begin() const noexcept
Returns a unidirectional seqan3::fm_index_cursor on the reversed text of the bidirectional index that...
Definition: bi_fm_index.hpp:329
seqan3::bi_fm_index::bi_fm_index
bi_fm_index()=default
Defaulted.
seqan3::bi_fm_index::operator!=
bool operator!=(bi_fm_index const &rhs) const noexcept
Compares two indices.
Definition: bi_fm_index.hpp:274
fm_index.hpp
Provides the unidirectional seqan3::fm_index.
semialphabet
The basis for seqan3::alphabet, but requires only rank interface (not char).
seqan3::alphabet_size
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:706
seqan3::bi_fm_index::size
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: bi_fm_index.hpp:226