SeqAn3 3.2.0
The Modern C++ library for sequence analysis.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
unidirectional_search_algorithm.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
14#pragma once
15
16#include <ranges>
17#include <type_traits>
18
24
25namespace seqan3::detail
26{
27
30enum class error_type : uint8_t
31{
32 deletion,
33 insertion,
34 matchmm,
35 none
36};
37
43template <typename configuration_t, typename index_t, typename... policies_t>
44class unidirectional_search_algorithm : protected policies_t...
45{
46private:
48 using traits_t = search_traits<configuration_t>;
50 using search_result_type = typename traits_t::search_result_type;
51
52 static_assert(!std::same_as<search_result_type, empty_type>, "The search result type was not configured.");
53
54public:
58 unidirectional_search_algorithm() = default;
59 unidirectional_search_algorithm(unidirectional_search_algorithm const &) = default;
60 unidirectional_search_algorithm(unidirectional_search_algorithm &&) = default;
61 unidirectional_search_algorithm & operator=(unidirectional_search_algorithm const &) = default;
62 unidirectional_search_algorithm & operator=(unidirectional_search_algorithm &&) = default;
63 ~unidirectional_search_algorithm() = default;
64
75 unidirectional_search_algorithm(configuration_t const & cfg, index_t const & index) : policies_t{cfg}...
76 {
77 stratum = cfg.get_or(search_cfg::hit_strata{0}).stratum;
78 index_ptr = &index;
79 }
81
102 template <typename indexed_query_t, typename callback_t>
103 requires (std::tuple_size_v<indexed_query_t> == 2)
104 && std::ranges::forward_range<std::tuple_element_t<1, indexed_query_t>>
105 && std::invocable<callback_t, search_result_type>
106 void operator()(indexed_query_t && indexed_query, callback_t && callback)
107 {
108 auto && [query_idx, query] = indexed_query;
109 auto error_state = this->max_error_counts(query); // see policy_max_error
110
111 // construct internal delegate for collecting hits for later filtering (if necessary)
113 delegate = [&internal_hits](auto const & it)
114 {
115 internal_hits.push_back(it);
116 };
117
118 perform_search_by_hit_strategy(internal_hits, query, error_state);
119
120 this->make_results(std::move(internal_hits), query_idx, callback); // see policy_search_result_builder
121 }
122
123private:
125 index_t const * index_ptr{nullptr};
126
128 std::function<void(typename index_t::cursor_type const &)> delegate;
129
131 uint8_t stratum{};
132
133 // forward declaration
134 template <bool abort_on_hit, typename query_t>
135 bool search_trivial(typename index_t::cursor_type cur,
136 query_t & query,
137 typename index_t::cursor_type::size_type const query_pos,
138 search_param const error_left,
139 error_type const prev_error);
140
147 template <typename query_t>
148 void perform_search_by_hit_strategy(std::vector<typename index_t::cursor_type> & internal_hits,
149 query_t & query,
150 search_param error_state)
151 {
152 if constexpr (!traits_t::search_all_hits)
153 {
154 auto max_total = error_state.total;
155 error_state.total = 0; // start search with less errors
156
157 while (internal_hits.empty() && error_state.total <= max_total)
158 {
159 // * If you only want the best hit (traits_t::search_single_best_hit), you stop after finding the
160 // first hit, the hit with the least errors (`abort_on_hit` is true).
161 // * If you are in strata mode (traits_t::search_strata_hits), you do the same as with best hits,
162 // but then do the extra step afterwards (`abort_on_hit` is true).
163 // * If you want all best hits (traits_t::search_all_best_hits), you do not stop after the first
164 // hit but continue the current search algorithm/max_error pattern (`abort_on_hit` is true).
165 constexpr bool abort_on_hit = !traits_t::search_all_best_hits;
166 search_trivial<abort_on_hit>(index_ptr->cursor(), query, 0, error_state, error_type::none);
167 ++error_state.total;
168 }
169
170 if constexpr (traits_t::search_strata_hits)
171 {
172 if (!internal_hits.empty())
173 {
174 internal_hits.clear();
175 error_state.total += stratum - 1;
176 search_trivial<false>(index_ptr->cursor(), query, 0, error_state, error_type::none);
177 }
178 }
179 }
180 else // traits_t::search_all
181 {
182 // If you want to find all hits, you cannot stop once you found any hit (<false>)
183 // since you have to find all paths in the search tree that satisfy the hit condition.
184 search_trivial<false>(index_ptr->cursor(), query, 0, error_state, error_type::none);
185 }
186 }
187};
188
208template <typename configuration_t, typename index_t, typename... policies_t>
209template <bool abort_on_hit, typename query_t>
210inline bool unidirectional_search_algorithm<configuration_t, index_t, policies_t...>::search_trivial(
211 typename index_t::cursor_type cur,
212 query_t & query,
213 typename index_t::cursor_type::size_type const query_pos,
214 search_param const error_left,
215 error_type const prev_error)
216{
217 // Exact case (end of query sequence or no errors left)
218 if (query_pos == std::ranges::size(query) || error_left.total == 0)
219 {
220 // If not at end of query sequence, try searching the remaining suffix without any errors.
221 using drop_size_t = std::ranges::range_difference_t<query_t>;
222 if (query_pos == std::ranges::size(query)
223 || cur.extend_right(std::views::drop(query, static_cast<drop_size_t>(query_pos))))
224 {
225 delegate(cur);
226 return true;
227 }
228 }
229 // Approximate case
230 else
231 {
232 // Insertion
233 // Only allow insertions if there is no match and we are not at the beginning of the query.
234 bool const allow_insertion =
235 (cur.query_length() > 0) ? cur.last_rank() != seqan3::to_rank(query[query_pos]) : true;
236
237 if (allow_insertion && (prev_error != error_type::deletion || error_left.substitution == 0)
238 && error_left.insertion > 0)
239 {
240 search_param error_left2{error_left};
241 error_left2.insertion--;
242 error_left2.total--;
243
244 // Always perform a recursive call. Abort recursion if and only if recursive call found a hit and
245 // abort_on_hit is set to true.
246 if (search_trivial<abort_on_hit>(cur, query, query_pos + 1, error_left2, error_type::insertion)
247 && abort_on_hit)
248 {
249 return true;
250 }
251 }
252
253 // Do not allow deletions at the beginning of the query sequence
254 if (((query_pos > 0 && error_left.deletion > 0) || error_left.substitution > 0) && cur.extend_right())
255 {
256 do
257 {
258 // Match (when error_left.substitution > 0) and Mismatch
259 if (error_left.substitution > 0)
260 {
261 bool delta = cur.last_rank() != seqan3::to_rank(query[query_pos]);
262 search_param error_left2{error_left};
263 error_left2.total -= delta;
264 error_left2.substitution -= delta;
265
266 if (search_trivial<abort_on_hit>(cur, query, query_pos + 1, error_left2, error_type::matchmm)
267 && abort_on_hit)
268 {
269 return true;
270 }
271 }
272
273 // Deletion (Do not allow deletions at the beginning of the query sequence.)
274 if (query_pos > 0)
275 {
276 // Match (when error_left.substitution == 0)
277 if (error_left.substitution == 0 && cur.last_rank() == seqan3::to_rank(query[query_pos]))
278 {
279 if (search_trivial<abort_on_hit>(cur, query, query_pos + 1, error_left, error_type::matchmm)
280 && abort_on_hit)
281 {
282 return true;
283 }
284 }
285
286 // Deletions at the end of the sequence are not allowed. When the algorithm
287 // arrives here, it cannot be at the end of the query and since deletions do not touch the query
288 // (i.e. increase query_pos) it won't be at the end of the query after the deletion.
289 // Do not allow deletions after an insertion.
290 if ((prev_error != error_type::insertion || error_left.substitution == 0)
291 && error_left.deletion > 0)
292 {
293 search_param error_left2{error_left};
294 error_left2.total--;
295 error_left2.deletion--;
296 // Only search for characters different from the corresponding query character.
297 // (Same character is covered by a match.)
298 if (cur.last_rank() != seqan3::to_rank(query[query_pos]))
299 {
300 if (search_trivial<abort_on_hit>(cur, query, query_pos, error_left2, error_type::deletion)
301 && abort_on_hit)
302 {
303 return true;
304 }
305 }
306 }
307 }
308 }
309 while (cur.cycle_back());
310 }
311 else
312 {
313 // Match (when error_left.substitution == 0)
314 if (cur.extend_right(query[query_pos]))
315 {
316 if (search_trivial<abort_on_hit>(cur, query, query_pos + 1, error_left, error_type::matchmm)
317 && abort_on_hit)
318 {
319 return true;
320 }
321 }
322 }
323 }
324
325 return false;
326}
327
328} // namespace seqan3::detail
Core alphabet concept and free function/type trait wrappers.
T clear(T... args)
T empty(T... args)
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
@ none
No flag is set.
Definition: debug_stream_type.hpp:32
typename decltype(detail::split_after< i >(list_t{}))::second_type drop
Return a seqan3::type_list of the types in the input type list, except the first n.
Definition: traits.hpp:395
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:146
T push_back(T... args)
Provides the concept for seqan3::detail::sdsl_index.
Provides data structures used by different search algorithms.
Provides seqan3::detail::search_traits.
Forward declares seqan3::detail::test_accessor.