SeqAn3  3.0.2
The Modern C++ library for sequence analysis.
unidirectional_search_algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
14 #pragma once
15 
16 #include <seqan3/std/ranges>
17 #include <type_traits>
18 
24 #include <seqan3/range/concept.hpp>
26 
27 namespace seqan3::detail
28 {
29 
31 enum class error_type : uint8_t
32 {
33  deletion,
34  insertion,
35  matchmm,
36  none
37 };
38 
47 template <typename configuration_t, fm_index_specialisation index_t, typename ...policies_t>
48 class unidirectional_search_algorithm : protected policies_t...
49 {
50 private:
52  using traits_t = search_traits<configuration_t>;
54  using search_result_type = typename traits_t::search_result_type;
55 
56  static_assert(!std::same_as<search_result_type, empty_type>, "The search result type was not configured.");
57 
58 public:
62  unidirectional_search_algorithm() = default;
63  unidirectional_search_algorithm(unidirectional_search_algorithm const &) = default;
64  unidirectional_search_algorithm(unidirectional_search_algorithm &&) = default;
65  unidirectional_search_algorithm & operator=(unidirectional_search_algorithm const &) = default;
66  unidirectional_search_algorithm & operator=(unidirectional_search_algorithm &&) = default;
67  ~unidirectional_search_algorithm() = default;
68 
79  unidirectional_search_algorithm(configuration_t const & cfg, index_t const & index) : policies_t{cfg}...
80  {
81  stratum = cfg.get_or(search_cfg::hit_strata{0}).stratum;
82  index_ptr = &index;
83  }
85 
106  template <typename indexed_query_t, typename callback_t>
108  requires (std::tuple_size_v<indexed_query_t> == 2) &&
109  std::ranges::forward_range<std::tuple_element_t<1, indexed_query_t>> &&
110  std::invocable<callback_t, search_result_type>
112  void operator()(indexed_query_t && indexed_query, callback_t && callback)
113  {
114  auto && [query_idx, query] = indexed_query;
115  auto error_state = this->max_error_counts(query); // see policy_max_error
116 
117  // construct internal delegate for collecting hits for later filtering (if necessary)
119  delegate = [&internal_hits] (auto const & it)
120  {
121  internal_hits.push_back(it);
122  };
123 
124  perform_search_by_hit_strategy(internal_hits, query, error_state);
125 
126  this->make_results(std::move(internal_hits), query_idx, callback); // see policy_search_result_builder
127  }
128 
129 private:
131  index_t const * index_ptr{nullptr};
132 
134  std::function<void(typename index_t::cursor_type const &)> delegate;
135 
137  uint8_t stratum{};
138 
139  // forward declaration
140  template <bool abort_on_hit, typename query_t>
141  bool search_trivial(typename index_t::cursor_type cur,
142  query_t & query,
143  typename index_t::cursor_type::size_type const query_pos,
144  search_param const error_left,
145  error_type const prev_error);
146 
153  template <typename query_t>
154  void perform_search_by_hit_strategy(std::vector<typename index_t::cursor_type> & internal_hits,
155  query_t & query,
156  search_param error_state)
157  {
158  if constexpr (!traits_t::search_all_hits)
159  {
160  auto max_total = error_state.total;
161  error_state.total = 0; // start search with less errors
162 
163  while (internal_hits.empty() && error_state.total <= max_total)
164  {
165  // * If you only want the best hit (traits_t::search_single_best_hit), you stop after finding the
166  // first hit, the hit with the least errors (`abort_on_hit` is true).
167  // * If you are in strata mode (traits_t::search_strata_hits), you do the same as with best hits,
168  // but then do the extra step afterwards (`abort_on_hit` is true).
169  // * If you want all best hits (traits_t::search_all_best_hits), you do not stop after the first
170  // hit but continue the current search algorithm/max_error pattern (`abort_on_hit` is true).
171  constexpr bool abort_on_hit = !traits_t::search_all_best_hits;
172  search_trivial<abort_on_hit>(index_ptr->cursor(), query, 0, error_state, error_type::none);
173  ++error_state.total;
174  }
175 
176  if constexpr (traits_t::search_strata_hits)
177  {
178  if (!internal_hits.empty())
179  {
180  internal_hits.clear();
181  error_state.total += stratum - 1;
182  search_trivial<false>(index_ptr->cursor(), query, 0, error_state, error_type::none);
183  }
184  }
185  }
186  else // traits_t::search_all
187  {
188  // If you want to find all hits, you cannot stop once you found any hit (<false>)
189  // since you have to find all paths in the search tree that satisfy the hit condition.
190  search_trivial<false>(index_ptr->cursor(), query, 0, error_state, error_type::none);
191  }
192  }
193 };
194 
213 template <typename configuration_t, typename index_t, typename ...policies_t>
214 template <bool abort_on_hit, typename query_t>
215 inline bool unidirectional_search_algorithm<configuration_t, index_t, policies_t...>::search_trivial(
216  typename index_t::cursor_type cur,
217  query_t & query,
218  typename index_t::cursor_type::size_type const query_pos,
219  search_param const error_left,
220  error_type const prev_error)
221 {
222  // Exact case (end of query sequence or no errors left)
223  if (query_pos == std::ranges::size(query) || error_left.total == 0)
224  {
225  // If not at end of query sequence, try searching the remaining suffix without any errors.
226  if (query_pos == std::ranges::size(query) || cur.extend_right(views::drop(query, query_pos)))
227  {
228  delegate(cur);
229  return true;
230  }
231  }
232  // Approximate case
233  else
234  {
235  // Insertion
236  // Only allow insertions if there is no match and we are not at the beginning of the query.
237  bool const allow_insertion = (cur.query_length() > 0) ? cur.last_rank() != seqan3::to_rank(query[query_pos]) : true;
238 
239  if (allow_insertion && (prev_error != error_type::deletion || error_left.substitution == 0) &&
240  error_left.insertion > 0)
241  {
242  search_param error_left2{error_left};
243  error_left2.insertion--;
244  error_left2.total--;
245 
246  // Always perform a recursive call. Abort recursion if and only if recursive call found a hit and
247  // abort_on_hit is set to true.
248  if (search_trivial<abort_on_hit>(cur,
249  query, query_pos + 1,
250  error_left2,
251  error_type::insertion) &&
252  abort_on_hit)
253  {
254  return true;
255  }
256  }
257 
258  // Do not allow deletions at the beginning of the query sequence
259  if (((query_pos > 0 && error_left.deletion > 0) || error_left.substitution > 0) && cur.extend_right())
260  {
261  do
262  {
263  // Match (when error_left.substitution > 0) and Mismatch
264  if (error_left.substitution > 0)
265  {
266  bool delta = cur.last_rank() != seqan3::to_rank(query[query_pos]);
267  search_param error_left2{error_left};
268  error_left2.total -= delta;
269  error_left2.substitution -= delta;
270 
271  if (search_trivial<abort_on_hit>(cur,
272  query,
273  query_pos + 1,
274  error_left2,
275  error_type::matchmm) &&
276  abort_on_hit)
277  {
278  return true;
279  }
280  }
281 
282  // Deletion (Do not allow deletions at the beginning of the query sequence.)
283  if (query_pos > 0)
284  {
285  // Match (when error_left.substitution == 0)
286  if (error_left.substitution == 0 && cur.last_rank() == seqan3::to_rank(query[query_pos]))
287  {
288  if (search_trivial<abort_on_hit>(cur,
289  query,
290  query_pos + 1,
291  error_left,
292  error_type::matchmm) &&
293  abort_on_hit)
294  {
295  return true;
296  }
297  }
298 
299  // Deletions at the end of the sequence are not allowed. When the algorithm
300  // arrives here, it cannot be at the end of the query and since deletions do not touch the query
301  // (i.e. increase query_pos) it won't be at the end of the query after the deletion.
302  // Do not allow deletions after an insertion.
303  if ((prev_error != error_type::insertion || error_left.substitution == 0) &&
304  error_left.deletion > 0)
305  {
306  search_param error_left2{error_left};
307  error_left2.total--;
308  error_left2.deletion--;
309  // Only search for characters different from the corresponding query character.
310  // (Same character is covered by a match.)
311  if (cur.last_rank() != seqan3::to_rank(query[query_pos]))
312  {
313  if (search_trivial<abort_on_hit>(cur,
314  query,
315  query_pos,
316  error_left2,
317  error_type::deletion) &&
318  abort_on_hit)
319  {
320  return true;
321  }
322  }
323  }
324  }
325  } while (cur.cycle_back());
326  }
327  else
328  {
329  // Match (when error_left.substitution == 0)
330  if (cur.extend_right(query[query_pos]))
331  {
332  if (search_trivial<abort_on_hit>(cur,
333  query,
334  query_pos + 1,
335  error_left,
336  error_type::matchmm) &&
337  abort_on_hit)
338  {
339  return true;
340  }
341  }
342  }
343  }
344 
345  return false;
346 }
348 
349 } // namespace seqan3::detail
drop.hpp
Provides seqan3::views::drop.
std::vector
seqan3::to_rank
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:143
std::function
std::vector::clear
T clear(T... args)
concept.hpp
Additional non-standard concepts for ranges.
std::vector::push_back
T push_back(T... args)
seqan3::views::move
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
seqan3::pack_traits::size
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
ranges
Adaptations of concepts from the Ranges TS.
seqan3::views::drop
constexpr auto drop
A view adaptor that returns all elements after n from the underlying range (or an empty range if the ...
Definition: drop.hpp:169
seqan3::none
@ none
No flag is set.
Definition: debug_stream_type.hpp:30
concept.hpp
Provides the concepts for seqan3::fm_index and seqan3::bi_fm_index and its traits and cursors.
std::vector::empty
T empty(T... args)
search_traits.hpp
Provides seqan3::detail::search_traits.
search_common.hpp
Provides data structures used by different search algorithms.
fm_index_specialisation
Concept for unidirectional FM indices.
concept.hpp
Core alphabet concept and free function/type trait wrappers.
test_accessor.hpp
Forward declares seqan3::detail::test_accessor.