SeqAn3 3.1.0
The Modern C++ library for sequence analysis.
unidirectional_search_algorithm.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
14#pragma once
15
16#include <seqan3/std/ranges>
17#include <type_traits>
18
24
25namespace seqan3::detail
26{
27
30enum class error_type : uint8_t
31{
32 deletion,
33 insertion,
34 matchmm,
35 none
36};
37
43template <typename configuration_t, typename index_t, typename ...policies_t>
44class unidirectional_search_algorithm : protected policies_t...
45{
46private:
48 using traits_t = search_traits<configuration_t>;
50 using search_result_type = typename traits_t::search_result_type;
51
52 static_assert(!std::same_as<search_result_type, empty_type>, "The search result type was not configured.");
53
54public:
58 unidirectional_search_algorithm() = default;
59 unidirectional_search_algorithm(unidirectional_search_algorithm const &) = default;
60 unidirectional_search_algorithm(unidirectional_search_algorithm &&) = default;
61 unidirectional_search_algorithm & operator=(unidirectional_search_algorithm const &) = default;
62 unidirectional_search_algorithm & operator=(unidirectional_search_algorithm &&) = default;
63 ~unidirectional_search_algorithm() = default;
64
75 unidirectional_search_algorithm(configuration_t const & cfg, index_t const & index) : policies_t{cfg}...
76 {
77 stratum = cfg.get_or(search_cfg::hit_strata{0}).stratum;
78 index_ptr = &index;
79 }
81
102 template <typename indexed_query_t, typename callback_t>
104 requires (std::tuple_size_v<indexed_query_t> == 2) &&
105 std::ranges::forward_range<std::tuple_element_t<1, indexed_query_t>> &&
106 std::invocable<callback_t, search_result_type>
108 void operator()(indexed_query_t && indexed_query, callback_t && callback)
109 {
110 auto && [query_idx, query] = indexed_query;
111 auto error_state = this->max_error_counts(query); // see policy_max_error
112
113 // construct internal delegate for collecting hits for later filtering (if necessary)
115 delegate = [&internal_hits] (auto const & it)
116 {
117 internal_hits.push_back(it);
118 };
119
120 perform_search_by_hit_strategy(internal_hits, query, error_state);
121
122 this->make_results(std::move(internal_hits), query_idx, callback); // see policy_search_result_builder
123 }
124
125private:
127 index_t const * index_ptr{nullptr};
128
130 std::function<void(typename index_t::cursor_type const &)> delegate;
131
133 uint8_t stratum{};
134
135 // forward declaration
136 template <bool abort_on_hit, typename query_t>
137 bool search_trivial(typename index_t::cursor_type cur,
138 query_t & query,
139 typename index_t::cursor_type::size_type const query_pos,
140 search_param const error_left,
141 error_type const prev_error);
142
149 template <typename query_t>
150 void perform_search_by_hit_strategy(std::vector<typename index_t::cursor_type> & internal_hits,
151 query_t & query,
152 search_param error_state)
153 {
154 if constexpr (!traits_t::search_all_hits)
155 {
156 auto max_total = error_state.total;
157 error_state.total = 0; // start search with less errors
158
159 while (internal_hits.empty() && error_state.total <= max_total)
160 {
161 // * If you only want the best hit (traits_t::search_single_best_hit), you stop after finding the
162 // first hit, the hit with the least errors (`abort_on_hit` is true).
163 // * If you are in strata mode (traits_t::search_strata_hits), you do the same as with best hits,
164 // but then do the extra step afterwards (`abort_on_hit` is true).
165 // * If you want all best hits (traits_t::search_all_best_hits), you do not stop after the first
166 // hit but continue the current search algorithm/max_error pattern (`abort_on_hit` is true).
167 constexpr bool abort_on_hit = !traits_t::search_all_best_hits;
168 search_trivial<abort_on_hit>(index_ptr->cursor(), query, 0, error_state, error_type::none);
169 ++error_state.total;
170 }
171
172 if constexpr (traits_t::search_strata_hits)
173 {
174 if (!internal_hits.empty())
175 {
176 internal_hits.clear();
177 error_state.total += stratum - 1;
178 search_trivial<false>(index_ptr->cursor(), query, 0, error_state, error_type::none);
179 }
180 }
181 }
182 else // traits_t::search_all
183 {
184 // If you want to find all hits, you cannot stop once you found any hit (<false>)
185 // since you have to find all paths in the search tree that satisfy the hit condition.
186 search_trivial<false>(index_ptr->cursor(), query, 0, error_state, error_type::none);
187 }
188 }
189};
190
210template <typename configuration_t, typename index_t, typename ...policies_t>
211template <bool abort_on_hit, typename query_t>
212inline bool unidirectional_search_algorithm<configuration_t, index_t, policies_t...>::search_trivial(
213 typename index_t::cursor_type cur,
214 query_t & query,
215 typename index_t::cursor_type::size_type const query_pos,
216 search_param const error_left,
217 error_type const prev_error)
218{
219 // Exact case (end of query sequence or no errors left)
220 if (query_pos == std::ranges::size(query) || error_left.total == 0)
221 {
222 // If not at end of query sequence, try searching the remaining suffix without any errors.
223 using drop_size_t = std::ranges::range_difference_t<query_t>;
224 if (query_pos == std::ranges::size(query) ||
225 cur.extend_right(std::views::drop(query, static_cast<drop_size_t>(query_pos))))
226 {
227 delegate(cur);
228 return true;
229 }
230 }
231 // Approximate case
232 else
233 {
234 // Insertion
235 // Only allow insertions if there is no match and we are not at the beginning of the query.
236 bool const allow_insertion = (cur.query_length() > 0) ? cur.last_rank() != seqan3::to_rank(query[query_pos]) : true;
237
238 if (allow_insertion && (prev_error != error_type::deletion || error_left.substitution == 0) &&
239 error_left.insertion > 0)
240 {
241 search_param error_left2{error_left};
242 error_left2.insertion--;
243 error_left2.total--;
244
245 // Always perform a recursive call. Abort recursion if and only if recursive call found a hit and
246 // abort_on_hit is set to true.
247 if (search_trivial<abort_on_hit>(cur,
248 query, query_pos + 1,
249 error_left2,
250 error_type::insertion) &&
251 abort_on_hit)
252 {
253 return true;
254 }
255 }
256
257 // Do not allow deletions at the beginning of the query sequence
258 if (((query_pos > 0 && error_left.deletion > 0) || error_left.substitution > 0) && cur.extend_right())
259 {
260 do
261 {
262 // Match (when error_left.substitution > 0) and Mismatch
263 if (error_left.substitution > 0)
264 {
265 bool delta = cur.last_rank() != seqan3::to_rank(query[query_pos]);
266 search_param error_left2{error_left};
267 error_left2.total -= delta;
268 error_left2.substitution -= delta;
269
270 if (search_trivial<abort_on_hit>(cur,
271 query,
272 query_pos + 1,
273 error_left2,
274 error_type::matchmm) &&
275 abort_on_hit)
276 {
277 return true;
278 }
279 }
280
281 // Deletion (Do not allow deletions at the beginning of the query sequence.)
282 if (query_pos > 0)
283 {
284 // Match (when error_left.substitution == 0)
285 if (error_left.substitution == 0 && cur.last_rank() == seqan3::to_rank(query[query_pos]))
286 {
287 if (search_trivial<abort_on_hit>(cur,
288 query,
289 query_pos + 1,
290 error_left,
291 error_type::matchmm) &&
292 abort_on_hit)
293 {
294 return true;
295 }
296 }
297
298 // Deletions at the end of the sequence are not allowed. When the algorithm
299 // arrives here, it cannot be at the end of the query and since deletions do not touch the query
300 // (i.e. increase query_pos) it won't be at the end of the query after the deletion.
301 // Do not allow deletions after an insertion.
302 if ((prev_error != error_type::insertion || error_left.substitution == 0) &&
303 error_left.deletion > 0)
304 {
305 search_param error_left2{error_left};
306 error_left2.total--;
307 error_left2.deletion--;
308 // Only search for characters different from the corresponding query character.
309 // (Same character is covered by a match.)
310 if (cur.last_rank() != seqan3::to_rank(query[query_pos]))
311 {
312 if (search_trivial<abort_on_hit>(cur,
313 query,
314 query_pos,
315 error_left2,
316 error_type::deletion) &&
317 abort_on_hit)
318 {
319 return true;
320 }
321 }
322 }
323 }
324 } while (cur.cycle_back());
325 }
326 else
327 {
328 // Match (when error_left.substitution == 0)
329 if (cur.extend_right(query[query_pos]))
330 {
331 if (search_trivial<abort_on_hit>(cur,
332 query,
333 query_pos + 1,
334 error_left,
335 error_type::matchmm) &&
336 abort_on_hit)
337 {
338 return true;
339 }
340 }
341 }
342 }
343
344 return false;
345}
346
347} // namespace seqan3::detail
Core alphabet concept and free function/type trait wrappers.
T clear(T... args)
T empty(T... args)
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
@ none
No flag is set.
Definition: debug_stream_type.hpp:32
typename decltype(detail::split_after< i >(list_t{}))::second_type drop
Return a seqan3::type_list of the types in the input type list, except the first n.
Definition: traits.hpp:388
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
T push_back(T... args)
The <ranges> header from C++20's standard library.
Provides the concept for seqan3::detail::sdsl_index.
Provides data structures used by different search algorithms.
Provides seqan3::detail::search_traits.
Forward declares seqan3::detail::test_accessor.