SeqAn3  3.0.0
The Modern C++ library for sequence analysis.
bi_fm_index.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <utility>
16 
18 #include <seqan3/std/filesystem>
22 #include <seqan3/std/ranges>
23 
24 namespace seqan3
25 {
26 
59 template <bool is_collection = false, detail::SdslIndex sdsl_index_type_ = default_sdsl_index_type>
61 {
62 protected:
64  size_t sigma{0};
66  static constexpr bool is_collection_{is_collection};
67 
71  using sdsl_index_type = sdsl_index_type_;
73 
75  using rev_sdsl_index_type = sdsl_index_type_;
76 
80  using sdsl_char_type = typename sdsl_index_type::alphabet_type::char_type;
81 
83  using sdsl_sigma_type = typename sdsl_index_type::alphabet_type::sigma_type;
84 
87 
91 
94 
97 
98 public:
102  using size_type = typename sdsl_index_type::size_type;
105 
116 
117  template <typename fm_index_t>
118  friend class fm_index_cursor;
119 
123  bi_fm_index() = default;
124  bi_fm_index(bi_fm_index const &) = default;
125  bi_fm_index & operator=(bi_fm_index const &) = default;
126  bi_fm_index(bi_fm_index &&) = default;
127  bi_fm_index & operator=(bi_fm_index &&) = default;
128  ~bi_fm_index() = default;
129 
138  template <std::ranges::Range text_t>
139  bi_fm_index(text_t && text)
140  {
141  construct(std::forward<decltype(text)>(text));
142  }
144 
164  template <std::ranges::Range text_t>
166  requires !is_collection_
168  void construct(text_t && text)
169  {
170  static_assert(std::ranges::BidirectionalRange<text_t>, "The text must model BidirectionalRange.");
171  static_assert(alphabet_size<innermost_value_type_t<text_t>> <= 256, "The alphabet is too big.");
172  static_assert(dimension_v<text_t> == 1, "The input cannot be a text collection.");
173 
174  // text must not be empty
175  if (std::ranges::begin(text) == std::ranges::end(text))
176  throw std::invalid_argument("The text that is indexed cannot be empty.");
177 
178  auto rev_text = std::view::reverse(text);
179  fwd_fm.construct(text);
180  rev_fm.construct(rev_text);
181 
182  sigma = fwd_fm.sigma;
183  }
184 
186  template <std::ranges::Range text_t>
188  requires is_collection_
190  void construct(text_t && text)
191  {
192  static_assert(std::ranges::BidirectionalRange<text_t>, "The text must model BidirectionalRange.");
194  "The elements of the text collection must model BidirectionalRange.");
195  static_assert(alphabet_size<innermost_value_type_t<text_t>> <= 256, "The alphabet is too big.");
196  static_assert(dimension_v<text_t> == 2, "The input must be a text collection.");
197 
198  // text must not be empty
199  if (std::ranges::begin(text) == std::ranges::end(text))
200  throw std::invalid_argument("The text that is indexed cannot be empty.");
201 
202  auto rev_text = text | view::deep{std::view::reverse} | std::view::reverse;
203  fwd_fm.construct(text);
204  rev_fm.construct(rev_text);
205 
206  sigma = fwd_fm.sigma;
207  }
208 
220  size_type size() const noexcept
221  {
222  return fwd_fm.size();
223  }
224 
236  bool empty() const noexcept
237  {
238  return size() == 0;
239  }
240 
252  bool operator==(bi_fm_index const & rhs) const noexcept
253  {
254  return std::tie(fwd_fm, rev_fm) == std::tie(rhs.fwd_fm, rhs.rev_fm);
255  }
256 
268  bool operator!=(bi_fm_index const & rhs) const noexcept
269  {
270  return !(*this == rhs);
271  }
272 
287  cursor_type begin() const noexcept
288  {
289  return {*this};
290  }
291 
304  fwd_cursor_type fwd_begin() const noexcept
305  {
306  return {fwd_fm};
307  }
308 
323  rev_cursor_type rev_begin() const noexcept
324  {
325  return {rev_fm};
326  }
327 
335  template <CerealArchive archive_t>
336  void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
337  {
338  archive(fwd_fm);
339  archive(rev_fm);
340  }
342 };
343 
347 template <std::ranges::Range text_t>
349 bi_fm_index(text_t &&) -> bi_fm_index<dimension_v<text_t> != 1>;
351 
353 
354 } // namespace seqan3
~bi_fm_index()=default
Destructor.
typename sdsl_index_type::alphabet_type::sigma_type sdsl_sigma_type
The type of the alphabet size of the underlying SDSL index.
Definition: bi_fm_index.hpp:83
bool empty() const noexcept
Checks whether the index is empty.
Definition: bi_fm_index.hpp:236
T tie(T... args)
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: fm_index.hpp:368
typename sdsl_index_type::alphabet_type::char_type sdsl_char_type
The type of the reduced alphabet type. (The reduced alphabet might be smaller than the original alpha...
Definition: bi_fm_index.hpp:80
constexpr auto reverse
A range adaptor that presents the underlying range in reverse order.
Definition: ranges:721
size_t sigma
The alphabet size of the text.
Definition: fm_index.hpp:132
The main SeqAn3 namespace.
cursor_type begin() const noexcept
Returns a seqan3::bi_fm_index_cursor on the index that can be used for searching. ...
Definition: bi_fm_index.hpp:287
bi_fm_index(text_t &&) -> bi_fm_index< dimension_v< text_t > !=1 >
Deduces the dimensions of the text.
size_type size() const noexcept
Returns the length of the indexed text including sentinel characters.
Definition: bi_fm_index.hpp:220
sdsl_index_type_ rev_sdsl_index_type
The type of the underlying SDSL index for the reversed text.
Definition: bi_fm_index.hpp:75
Specifies requirements of a Range type for which begin returns a type that models std::BidirectionalI...
void construct(text_t &&text)
Constructs the index given a range. The range cannot be an rvalue (i.e. a temporary object) and has t...
Definition: fm_index.hpp:223
Adaptations of concepts from the Ranges TS.
typename innermost_value_type< t >::type innermost_value_type_t
Shortcut for seqan3::innermost_value_type (TransformationTrait shortcut).
Definition: range.hpp:191
::ranges::begin begin
Alias for ranges::begin. Returns an iterator to the beginning of a range.
Definition: ranges:174
fm_index_type fwd_fm
Underlying FM index for the original text.
Definition: bi_fm_index.hpp:93
void construct(text_t &&text)
Constructs the index given a range. The range cannot be an rvalue (i.e. a temporary object) and has t...
Definition: bi_fm_index.hpp:168
static constexpr bool is_collection_
Indicates whether index is built over a collection.
Definition: bi_fm_index.hpp:66
fwd_cursor_type fwd_begin() const noexcept
Returns a unidirectional seqan3::fm_index_cursor on the original text of the bidirectional index that...
Definition: bi_fm_index.hpp:304
rev_cursor_type rev_begin() const noexcept
Returns a unidirectional seqan3::fm_index_cursor on the reversed text of the bidirectional index that...
Definition: bi_fm_index.hpp:323
bi_fm_index()=default
Default constructor.
The SeqAn Bidirectional FM Index Cursor.
Definition: bi_fm_index_cursor.hpp:54
rev_fm_index_type rev_fm
Underlying FM index for the reversed text.
Definition: bi_fm_index.hpp:96
sdsl_index_type_ sdsl_index_type
The type of the underlying SDSL index for the original text.
Definition: bi_fm_index.hpp:72
typename sdsl_index_type::size_type size_type
Type for representing positions in the indexed text.
Definition: bi_fm_index.hpp:103
Provides seqan3::view::persist.
size_t sigma
The alphabet size of the text.
Definition: bi_fm_index.hpp:64
Provides various transformation traits used by the range module.
typename reference< t >::type reference_t
Shortcut for seqan3::reference (TransformationTrait shortcut).
Definition: pre.hpp:77
bool operator==(bi_fm_index const &rhs) const noexcept
Compares two indices.
Definition: bi_fm_index.hpp:252
The SeqAn Bidirectional FM Index.
Definition: bi_fm_index.hpp:60
Provides the unidirectional seqan3::fm_index.
bi_fm_index(text_t &&text)
Constructor that immediately constructs the index given a range. The range cannot be empty...
Definition: bi_fm_index.hpp:139
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:678
bool operator!=(bi_fm_index const &rhs) const noexcept
Compares two indices.
Definition: bi_fm_index.hpp:268
bi_fm_index & operator=(bi_fm_index const &)=default
Copy assignment.
T forward(T... args)
::ranges::end end
Alias for ranges::end. Returns an iterator to the end of a range.
Definition: ranges:179
The SeqAn FM Index Cursor.
Definition: fm_index_cursor.hpp:64
This header includes C++17 filesystem support and imports it into namespace seqan3::filesystem (indep...
Provides the seqan3::bi_fm_index_cursor for searching in the bidirectional seqan3::bi_fm_index.