SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
search.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
21 
22 namespace seqan3::detail
23 {
24 
46 template <typename index_t, typename query_t, typename configuration_t>
47 inline auto search_single(index_t const & index, query_t & query, configuration_t const & cfg)
48 {
49  using search_traits_t = search_traits<configuration_t>;
50 
51  // retrieve error numbers / rates
52  detail::search_param max_error{0, 0, 0, 0};
53  if constexpr (search_traits_t::search_with_max_error)
54  {
55  auto & [total, subs, ins, del] = max_error;
56  std::tie(total, subs, ins, del) = std::apply([](auto ...args){ return std::tuple{args...}; },
57  get<search_cfg::max_error>(cfg).value);
58  }
59  else if constexpr (search_traits_t::search_with_max_error_rate)
60  {
61  // NOTE: Casting doubles rounds towards zero (i.e. floor for positive numbers). Thus, given a rate of
62  // 10% and a read length of 101 the max number of errors is correctly casted from 10.1 errors to 10
63  auto & [total, subs, ins, del] = max_error;
64  std::tie(total, subs, ins, del) = std::apply([& query] (auto && ... args)
65  {
66  return std::tuple{(args * std::ranges::size(query))...};
67  }, get<search_cfg::max_error_rate>(cfg).value);
68  }
69 
70  // TODO: if total not set: max_error.total = max_error.deletion + max_error.substitution + max_error.insertion;
71  // TODO: throw exception when any error number or rate is higher than the total error number/rate
72  // throw std::invalid_argument("The total number of errors is set to zero while there is a positive number"
73  // " of errors for a specific error type.");
74 
75  // construct internal delegate for collecting hits for later filtering (if necessary)
77  auto internal_delegate = [&internal_hits, &max_error] (auto const & it)
78  {
79  internal_hits.push_back(it);
80  };
81 
82  // choose mode
83  if constexpr (search_traits_t::search_best_hits)
84  {
85  detail::search_param max_error2{max_error};
86  max_error2.total = 0;
87  while (internal_hits.empty() && max_error2.total <= max_error.total)
88  {
89  detail::search_algo<true>(index, query, max_error2, internal_delegate);
90  max_error2.total++;
91  }
92  }
93  else if constexpr (search_traits_t::search_all_best_hits)
94  {
95  detail::search_param max_error2{max_error};
96  max_error2.total = 0;
97  while (internal_hits.empty() && max_error2.total <= max_error.total)
98  {
99  detail::search_algo<false>(index, query, max_error2, internal_delegate);
100  max_error2.total++;
101  }
102  }
103  else if constexpr (search_traits_t::search_strata_hits)
104  {
105  detail::search_param max_error2{max_error};
106  max_error2.total = 0;
107  while (internal_hits.empty() && max_error2.total <= max_error.total)
108  {
109  detail::search_algo<true>(index, query, max_error2, internal_delegate);
110  max_error2.total++;
111  }
112  if (!internal_hits.empty())
113  {
114  internal_hits.clear(); // TODO: don't clear when using Optimum Search Schemes with lower error bounds
115  uint8_t const s = get<search_cfg::mode>(cfg).value;
116  max_error2.total += s - 1;
117  detail::search_algo<false>(index, query, max_error2, internal_delegate);
118  }
119  }
120  else // detail::search_mode_all
121  {
122  detail::search_algo<false>(index, query, max_error, internal_delegate);
123  }
124 
125  // TODO: filter hits and only do it when necessary (depending on error types)
126 
127  // output cursors or text_positions
128  if constexpr (search_traits_t::search_return_index_cursor)
129  {
130  return internal_hits;
131  }
132  else
133  {
134  using hit_t = std::conditional_t<index_t::text_layout_mode == text_layout::collection,
136  typename index_t::size_type>;
137  std::vector<hit_t> hits;
138 
139  if constexpr (search_traits_t::search_best_hits)
140  {
141  // only one cursor is reported but it might contain more than one text position
142  if (!internal_hits.empty())
143  {
144  auto text_pos = internal_hits[0].lazy_locate();
145  hits.push_back(text_pos[0]);
146  }
147  }
148  else
149  {
150  for (auto const & cur : internal_hits)
151  {
152  for (auto const & text_pos : cur.locate())
153  hits.push_back(text_pos);
154  std::sort(hits.begin(), hits.end());
155  hits.erase(std::unique(hits.begin(), hits.end()), hits.end());
156  }
157  }
158  return hits;
159  }
160 }
161 
180 template <typename index_t, typename queries_t, typename configuration_t>
181 inline auto search_all(index_t const & index, queries_t && queries, configuration_t const & cfg)
182 {
183  using cfg_t = remove_cvref_t<configuration_t>;
184  // return type: for each query: a vector of text_positions (or cursors)
185  // delegate params: text_position (or cursor). we will withhold all hits of one query anyway to filter
186  // duplicates. more efficient to call delegate once with one vector instead of calling
187  // delegate for each hit separately at once.
188  using text_pos_t = std::conditional_t<index_t::text_layout_mode == text_layout::collection,
190  typename index_t::size_type>;
192  typename index_t::cursor_type,
193  text_pos_t>;
194 
195  if constexpr (std::ranges::forward_range<queries_t> && std::ranges::random_access_range<value_type_t<queries_t>>)
196  {
197  // TODO: if constexpr (contains<search_cfg::id::on_hit>(cfg))
199  hits.reserve(std::distance(queries.begin(), queries.end()));
200  for (auto const query : queries)
201  {
202  hits.push_back(search_single(index, query, cfg));
203  }
204  return hits;
205  }
206  else // std::ranges::random_access_range<queries_t>
207  {
208  // TODO: if constexpr (contains<search_cfg::id::on_hit>(cfg))
209  return search_single(index, queries, cfg);
210  }
211 }
212 
214 
215 } // namespace seqan3::detail
std::apply
T apply(T... args)
search_scheme_algorithm.hpp
Provides the algorithm to search in an index using search schemes.
pre.hpp
Provides various transformation trait base templates and shortcuts.
std::pair
std::vector::reserve
T reserve(T... args)
std::vector
std::distance
T distance(T... args)
std::tuple
std::sort
T sort(T... args)
std::vector::clear
T clear(T... args)
std::tie
T tie(T... args)
std::vector::push_back
T push_back(T... args)
seqan3::collection
The text is a range of ranges.
Definition: concept.hpp:86
std::vector::erase
T erase(T... args)
search_trivial.hpp
Provides an approximate string matching algorithm based on simple backtracking. This should only be u...
seqan3::pack_traits::size
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
all.hpp
Meta-header for the search configuration module .
std::vector::begin
T begin(T... args)
concept.hpp
Provides the concepts for seqan3::fm_index and seqan3::bi_fm_index and its traits and cursors.
std::vector::empty
T empty(T... args)
std::unique
T unique(T... args)
std::vector::end
T end(T... args)
search_traits.hpp
Provides seqan3::detail::search_traits.
std::conditional_t