SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
unidirectional_search_algorithm.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
11#pragma once
12
13#include <ranges>
14#include <type_traits>
15
21
22namespace seqan3::detail
23{
24
27enum class error_type : uint8_t
28{
29 deletion,
30 insertion,
31 matchmm,
32 none
33};
34
40template <typename configuration_t, typename index_t, typename... policies_t>
41class unidirectional_search_algorithm : protected policies_t...
42{
43private:
45 using traits_t = search_traits<configuration_t>;
47 using search_result_type = typename traits_t::search_result_type;
48
49 static_assert(!std::same_as<search_result_type, empty_type>, "The search result type was not configured.");
50
51public:
55 unidirectional_search_algorithm() = default;
56 unidirectional_search_algorithm(unidirectional_search_algorithm const &) = default;
57 unidirectional_search_algorithm(unidirectional_search_algorithm &&) = default;
58 unidirectional_search_algorithm & operator=(unidirectional_search_algorithm const &) = default;
59 unidirectional_search_algorithm & operator=(unidirectional_search_algorithm &&) = default;
60 ~unidirectional_search_algorithm() = default;
61
72 unidirectional_search_algorithm(configuration_t const & cfg, index_t const & index) : policies_t{cfg}...
73 {
74 stratum = cfg.get_or(search_cfg::hit_strata{0}).stratum;
75 index_ptr = &index;
76 }
78
99 template <typename indexed_query_t, typename callback_t>
100 requires (std::tuple_size_v<indexed_query_t> == 2)
101 && std::ranges::forward_range<std::tuple_element_t<1, indexed_query_t>>
102 && std::invocable<callback_t, search_result_type>
103 void operator()(indexed_query_t && indexed_query, callback_t && callback)
104 {
105 auto && [query_idx, query] = indexed_query;
106 auto error_state = this->max_error_counts(query); // see policy_max_error
107
108 // construct internal delegate for collecting hits for later filtering (if necessary)
110 delegate = [&internal_hits](auto const & it)
111 {
112 internal_hits.push_back(it);
113 };
114
115 perform_search_by_hit_strategy(internal_hits, query, error_state);
116
117 this->make_results(std::move(internal_hits), query_idx, callback); // see policy_search_result_builder
118 }
119
120private:
122 index_t const * index_ptr{nullptr};
123
125 std::function<void(typename index_t::cursor_type const &)> delegate;
126
128 uint8_t stratum{};
129
130 // forward declaration
131 template <bool abort_on_hit, typename query_t>
132 bool search_trivial(typename index_t::cursor_type cur,
133 query_t & query,
134 typename index_t::cursor_type::size_type const query_pos,
135 search_param const error_left,
136 error_type const prev_error);
137
144 template <typename query_t>
145 void perform_search_by_hit_strategy(std::vector<typename index_t::cursor_type> & internal_hits,
146 query_t & query,
147 search_param error_state)
148 {
149 if constexpr (!traits_t::search_all_hits)
150 {
151 auto max_total = error_state.total;
152 error_state.total = 0; // start search with less errors
153
154 while (internal_hits.empty() && error_state.total <= max_total)
155 {
156 // * If you only want the best hit (traits_t::search_single_best_hit), you stop after finding the
157 // first hit, the hit with the least errors (`abort_on_hit` is true).
158 // * If you are in strata mode (traits_t::search_strata_hits), you do the same as with best hits,
159 // but then do the extra step afterwards (`abort_on_hit` is true).
160 // * If you want all best hits (traits_t::search_all_best_hits), you do not stop after the first
161 // hit but continue the current search algorithm/max_error pattern (`abort_on_hit` is true).
162 constexpr bool abort_on_hit = !traits_t::search_all_best_hits;
163 search_trivial<abort_on_hit>(index_ptr->cursor(), query, 0, error_state, error_type::none);
164 ++error_state.total;
165 }
166
167 if constexpr (traits_t::search_strata_hits)
168 {
169 if (!internal_hits.empty())
170 {
171 internal_hits.clear();
172 error_state.total += stratum - 1;
173 search_trivial<false>(index_ptr->cursor(), query, 0, error_state, error_type::none);
174 }
175 }
176 }
177 else // traits_t::search_all
178 {
179 // If you want to find all hits, you cannot stop once you found any hit (<false>)
180 // since you have to find all paths in the search tree that satisfy the hit condition.
181 search_trivial<false>(index_ptr->cursor(), query, 0, error_state, error_type::none);
182 }
183 }
184};
185
205template <typename configuration_t, typename index_t, typename... policies_t>
206template <bool abort_on_hit, typename query_t>
207inline bool unidirectional_search_algorithm<configuration_t, index_t, policies_t...>::search_trivial(
208 typename index_t::cursor_type cur,
209 query_t & query,
210 typename index_t::cursor_type::size_type const query_pos,
211 search_param const error_left,
212 error_type const prev_error)
213{
214 // Exact case (end of query sequence or no errors left)
215 if (query_pos == std::ranges::size(query) || error_left.total == 0)
216 {
217 // If not at end of query sequence, try searching the remaining suffix without any errors.
218 using drop_size_t = std::ranges::range_difference_t<query_t>;
219 if (query_pos == std::ranges::size(query)
220 || cur.extend_right(std::views::drop(query, static_cast<drop_size_t>(query_pos))))
221 {
222 delegate(cur);
223 return true;
224 }
225 }
226 // Approximate case
227 else
228 {
229 // Insertion
230 // Only allow insertions if there is no match and we are not at the beginning of the query.
231 bool const allow_insertion =
232 (cur.query_length() > 0) ? cur.last_rank() != seqan3::to_rank(query[query_pos]) : true;
233
234 if (allow_insertion && (prev_error != error_type::deletion || error_left.substitution == 0)
235 && error_left.insertion > 0)
236 {
237 search_param error_left2{error_left};
238 error_left2.insertion--;
239 error_left2.total--;
240
241 // Always perform a recursive call. Abort recursion if and only if recursive call found a hit and
242 // abort_on_hit is set to true.
243 if (search_trivial<abort_on_hit>(cur, query, query_pos + 1, error_left2, error_type::insertion)
244 && abort_on_hit)
245 {
246 return true;
247 }
248 }
249
250 // Do not allow deletions at the beginning of the query sequence
251 if (((query_pos > 0 && error_left.deletion > 0) || error_left.substitution > 0) && cur.extend_right())
252 {
253 do
254 {
255 // Match (when error_left.substitution > 0) and Mismatch
256 if (error_left.substitution > 0)
257 {
258 bool delta = cur.last_rank() != seqan3::to_rank(query[query_pos]);
259 search_param error_left2{error_left};
260 error_left2.total -= delta;
261 error_left2.substitution -= delta;
262
263 if (search_trivial<abort_on_hit>(cur, query, query_pos + 1, error_left2, error_type::matchmm)
264 && abort_on_hit)
265 {
266 return true;
267 }
268 }
269
270 // Deletion (Do not allow deletions at the beginning of the query sequence.)
271 if (query_pos > 0)
272 {
273 // Match (when error_left.substitution == 0)
274 if (error_left.substitution == 0 && cur.last_rank() == seqan3::to_rank(query[query_pos]))
275 {
276 if (search_trivial<abort_on_hit>(cur, query, query_pos + 1, error_left, error_type::matchmm)
277 && abort_on_hit)
278 {
279 return true;
280 }
281 }
282
283 // Deletions at the end of the sequence are not allowed. When the algorithm
284 // arrives here, it cannot be at the end of the query and since deletions do not touch the query
285 // (i.e. increase query_pos) it won't be at the end of the query after the deletion.
286 // Do not allow deletions after an insertion.
287 if ((prev_error != error_type::insertion || error_left.substitution == 0)
288 && error_left.deletion > 0)
289 {
290 search_param error_left2{error_left};
291 error_left2.total--;
292 error_left2.deletion--;
293 // Only search for characters different from the corresponding query character.
294 // (Same character is covered by a match.)
295 if (cur.last_rank() != seqan3::to_rank(query[query_pos]))
296 {
297 if (search_trivial<abort_on_hit>(cur, query, query_pos, error_left2, error_type::deletion)
298 && abort_on_hit)
299 {
300 return true;
301 }
302 }
303 }
304 }
305 }
306 while (cur.cycle_back());
307 }
308 else
309 {
310 // Match (when error_left.substitution == 0)
311 if (cur.extend_right(query[query_pos]))
312 {
313 if (search_trivial<abort_on_hit>(cur, query, query_pos + 1, error_left, error_type::matchmm)
314 && abort_on_hit)
315 {
316 return true;
317 }
318 }
319 }
320 }
321
322 return false;
323}
324
325} // namespace seqan3::detail
Core alphabet concept and free function/type trait wrappers.
T clear(T... args)
T empty(T... args)
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition alphabet/concept.hpp:152
@ none
No flag is set.
Definition debug_stream_type.hpp:29
T push_back(T... args)
Provides the concept for seqan3::detail::sdsl_index.
Provides data structures used by different search algorithms.
Provides seqan3::detail::search_traits.
Forward declares seqan3::detail::test_accessor.
Hide me