SeqAn3 3.2.0
The Modern C++ library for sequence analysis.
alignment_configurator.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <functional>
16#include <tuple>
17#include <utility>
18#include <vector>
19
64
65namespace seqan3::detail
66{
67
77template <typename range_type, typename alignment_config_type>
78struct alignment_contract
79{
80private:
85 using unref_range_type = std::remove_reference_t<range_type>;
87 using first_seq_t = std::tuple_element_t<0, std::ranges::range_value_t<unref_range_type>>;
89 using second_seq_t = std::tuple_element_t<1, std::ranges::range_value_t<unref_range_type>>;
91
92public:
94 static constexpr bool expects_tuple_like_value_type()
95 {
97 && std::tuple_size_v<std::ranges::range_value_t<unref_range_type>> == 2;
98 }
99
101 static constexpr bool expects_valid_scoring_scheme()
102 {
103 if constexpr (alignment_config_type::template exists<align_cfg::scoring_scheme>())
104 {
105 using scoring_type = std::remove_reference_t<
106 decltype(get<align_cfg::scoring_scheme>(std::declval<alignment_config_type>()).scheme)>;
107 return static_cast<bool>(scoring_scheme_for<scoring_type,
108 std::ranges::range_value_t<first_seq_t>,
109 std::ranges::range_value_t<second_seq_t>>);
110 }
111 else
112 {
113 return false;
114 }
115 }
116
118 static constexpr bool expects_alignment_configuration()
119 {
120 bool const is_global = alignment_config_type::template exists<seqan3::align_cfg::method_global>();
121 bool const is_local = alignment_config_type::template exists<seqan3::align_cfg::method_local>();
122
123 return (is_global || is_local);
124 }
125};
126
131struct alignment_configurator
132{
133private:
137 template <typename traits_t>
138 struct select_matrix_policy
139 {
140 private:
142 static constexpr bool only_coordinates =
143 !(traits_t::compute_begin_positions || traits_t::compute_sequence_alignment);
144
146 using score_matrix_t =
147 std::conditional_t<traits_t::is_banded,
148 alignment_score_matrix_one_column_banded<typename traits_t::score_type>,
149 alignment_score_matrix_one_column<typename traits_t::score_type>>;
151 using trace_matrix_t =
152 std::conditional_t<traits_t::is_banded,
153 alignment_trace_matrix_full_banded<typename traits_t::trace_type, only_coordinates>,
154 alignment_trace_matrix_full<typename traits_t::trace_type, only_coordinates>>;
155
156 public:
158 using type = deferred_crtp_base<alignment_matrix_policy, score_matrix_t, trace_matrix_t>;
159 };
160
164 template <typename traits_t>
165 struct select_gap_policy
166 {
167 private:
169 using score_t = typename traits_t::score_type;
172
173 public:
175 using type = std::conditional_t<traits_t::is_vectorised,
176 deferred_crtp_base<simd_affine_gap_policy, score_t, is_local_t>,
177 deferred_crtp_base<affine_gap_policy, score_t, is_local_t>>;
178 };
179
186 template <typename traits_t>
187 struct select_find_optimum_policy
188 {
189 private:
191 using score_t = typename traits_t::score_type;
192
193 public:
195 using type = std::conditional_t<traits_t::is_vectorised,
196 deferred_crtp_base<simd_find_optimum_policy, score_t>,
197 deferred_crtp_base<find_optimum_policy>>;
198 };
199
201 template <typename traits_t, typename... args_t>
202 using select_alignment_algorithm_t = lazy_conditional_t<traits_t::is_banded,
203 lazy<pairwise_alignment_algorithm_banded, args_t...>,
204 lazy<pairwise_alignment_algorithm, args_t...>>;
205
209 template <typename config_t>
210 struct select_gap_recursion_policy
211 {
212 private:
214 using traits_type = alignment_configuration_traits<config_t>;
216 static constexpr bool with_trace = traits_type::requires_trace_information;
217
219 using gap_recursion_policy_type = std::conditional_t<with_trace,
220 policy_affine_gap_with_trace_recursion<config_t>,
221 policy_affine_gap_recursion<config_t>>;
223 using banded_gap_recursion_policy_type =
224 std::conditional_t<with_trace,
225 policy_affine_gap_with_trace_recursion_banded<config_t>,
226 policy_affine_gap_recursion_banded<config_t>>;
227
228 public:
230 using type =
232 };
233
234public:
261 template <align_pairwise_range_input sequences_t, typename config_t>
262 requires is_type_specialisation_of_v<config_t, configuration>
263 static constexpr auto configure(config_t const & cfg)
264 {
265 auto config_with_output = maybe_default_output(cfg);
266 using config_with_output_t = decltype(config_with_output);
267
268 // ----------------------------------------------------------------------------
269 // Configure the type-erased alignment function.
270 // ----------------------------------------------------------------------------
271
272 using first_seq_t = std::tuple_element_t<0, std::ranges::range_value_t<sequences_t>>;
273 using second_seq_t = std::tuple_element_t<1, std::ranges::range_value_t<sequences_t>>;
274
275 using wrapped_first_t = type_reduce_t<first_seq_t &>;
276 using wrapped_second_t = type_reduce_t<second_seq_t &>;
277
278 // The alignment executor passes a chunk over an indexed sequence pair range to the alignment algorithm.
279 using indexed_sequence_pair_range_t = typename chunked_indexed_sequence_pairs<sequences_t>::type;
280 using indexed_sequence_pair_chunk_t = std::ranges::range_value_t<indexed_sequence_pair_range_t>;
281
282 // Select the result type based on the sequences and the configuration.
283 using alignment_result_value_t = typename align_result_selector<std::remove_reference_t<wrapped_first_t>,
285 config_with_output_t>::type;
286 using alignment_result_t = alignment_result<alignment_result_value_t>;
287 using callback_on_result_t = std::function<void(alignment_result_t)>;
288 // Define the function wrapper type.
289 using function_wrapper_t = std::function<void(indexed_sequence_pair_chunk_t, callback_on_result_t)>;
290
291 // Capture the alignment result type.
292 auto config_with_result_type = config_with_output | align_cfg::detail::result_type<alignment_result_t>{};
293
294 // ----------------------------------------------------------------------------
295 // Test some basic preconditions
296 // ----------------------------------------------------------------------------
297
298 using alignment_contract_t = alignment_contract<sequences_t, config_with_output_t>;
299
300 static_assert(alignment_contract_t::expects_alignment_configuration(),
301 "Alignment configuration error: "
302 "The alignment can only be configured with alignment configurations.");
303
304 static_assert(alignment_contract_t::expects_tuple_like_value_type(),
305 "Alignment configuration error: "
306 "The value type of the sequence ranges must model the seqan3::tuple_like and must contain "
307 "exactly 2 elements.");
308
309 static_assert(alignment_contract_t::expects_valid_scoring_scheme(),
310 "Alignment configuration error: "
311 "Either the scoring scheme was not configured or the given scoring scheme cannot be invoked with "
312 "the value types of the passed sequences.");
313
314 // ----------------------------------------------------------------------------
315 // Configure the algorithm
316 // ----------------------------------------------------------------------------
317
318 // Use default edit distance if gaps are not set.
319 align_cfg::gap_cost_affine edit_gap_cost{};
320 auto const & gap_cost = config_with_result_type.get_or(edit_gap_cost);
321 auto const & scoring_scheme = get<align_cfg::scoring_scheme>(cfg).scheme;
322
323 if constexpr (config_t::template exists<seqan3::align_cfg::method_global>())
324 {
325 // Only use edit distance if ...
326 auto method_global_cfg = get<seqan3::align_cfg::method_global>(config_with_result_type);
327 // Only use edit distance if ...
328 if (gap_cost.open_score == 0 && // gap open score is not set,
329 !(method_global_cfg.free_end_gaps_sequence2_leading
330 || method_global_cfg.free_end_gaps_sequence2_trailing)
331 && // none of the free end gaps are set for second seq,
332 (method_global_cfg.free_end_gaps_sequence1_leading
333 == method_global_cfg
334 .free_end_gaps_sequence1_trailing)) // free ends for leading and trailing gaps are equal in first seq.
335 {
336 // TODO: Instead of relying on nucleotide scoring schemes we need to be able to determine the edit distance
337 // option via the scheme.
338 if constexpr (is_type_specialisation_of_v<std::remove_cvref_t<decltype(scoring_scheme)>,
339 nucleotide_scoring_scheme>)
340 {
341 if ((scoring_scheme.score('A'_dna15, 'A'_dna15) == 0)
342 && (scoring_scheme.score('A'_dna15, 'C'_dna15)) == -1)
343 {
344 return std::pair{configure_edit_distance<function_wrapper_t>(config_with_result_type),
345 config_with_result_type};
346 }
347 }
348 }
349 }
350
351 // ----------------------------------------------------------------------------
352 // Check if invalid configuration was used.
353 // ----------------------------------------------------------------------------
354
355 // Do not allow min score configuration for alignments not computing the edit distance.
356 if (config_t::template exists<align_cfg::min_score>())
357 throw invalid_alignment_configuration{"The align_cfg::min_score configuration is only allowed for the "
358 "specific edit distance computation."};
359 // Configure the alignment algorithm.
360 return std::pair{configure_scoring_scheme<function_wrapper_t>(config_with_result_type),
361 config_with_result_type};
362 }
363
364private:
374 template <typename config_t>
375 static constexpr auto maybe_default_output(config_t const & config) noexcept
376 {
377 using traits_t = alignment_configuration_traits<config_t>;
378
379 if constexpr (traits_t::has_output_configuration)
380 return config;
381 else
382 return config | align_cfg::output_score{} | align_cfg::output_begin_position{}
383 | align_cfg::output_end_position{} | align_cfg::output_alignment{} | align_cfg::output_sequence1_id{}
384 | align_cfg::output_sequence2_id{};
385 }
386
392 template <typename function_wrapper_t, typename config_t>
393 static constexpr function_wrapper_t configure_edit_distance(config_t const & cfg)
394 {
395 using traits_t = alignment_configuration_traits<config_t>;
396
397 // ----------------------------------------------------------------------------
398 // Unsupported configurations
399 // ----------------------------------------------------------------------------
400
401 if constexpr (traits_t::is_banded)
402 throw invalid_alignment_configuration{"Banded alignments are yet not supported."};
403
404 // ----------------------------------------------------------------------------
405 // Configure semi-global alignment
406 // ----------------------------------------------------------------------------
407
408 // Get the value for the sequence ends configuration.
409 auto method_global_cfg = cfg.get_or(align_cfg::method_global{});
410
411 auto configure_edit_traits = [&](auto is_semi_global)
412 {
413 struct edit_traits_type
414 {
415 using is_semi_global_type [[maybe_unused]] = std::remove_cvref_t<decltype(is_semi_global)>;
416 };
417
418 edit_distance_algorithm<std::remove_cvref_t<config_t>, edit_traits_type> algorithm{cfg};
419 return function_wrapper_t{std::move(algorithm)};
420 };
421
422 // Check if it has free ends set for the first sequence trailing gaps.
423 auto has_free_ends_trailing = [&](auto first) constexpr
424 {
425 if constexpr (!decltype(first)::value)
426 {
427 return configure_edit_traits(std::false_type{});
428 }
429 else // Resolve correct property at runtime.
430 {
431 if (method_global_cfg.free_end_gaps_sequence1_trailing)
432 return configure_edit_traits(std::true_type{});
433 else
434 return configure_edit_traits(std::false_type{});
435 }
436 };
437
438 // Check if it has free ends set for the first sequence leading gaps.
439 if (method_global_cfg.free_end_gaps_sequence1_leading)
440 return has_free_ends_trailing(std::true_type{});
441 else
442 return has_free_ends_trailing(std::false_type{});
443 }
444
461 template <typename function_wrapper_t, typename config_t>
462 static constexpr function_wrapper_t configure_scoring_scheme(config_t const & cfg);
463
478 template <typename function_wrapper_t, typename... policies_t, typename config_t>
479 static constexpr function_wrapper_t make_algorithm(config_t const & cfg)
480 {
481 using traits_t = alignment_configuration_traits<config_t>;
482
483 // Temporarily we will use the new and the old alignment implementation in order to
484 // refactor step-by-step to the new implementation. The new implementation will be tested in
485 // macrobenchmarks to show that it maintains a high performance.
486
487 // Use old alignment implementation if...
488 if constexpr (traits_t::is_local || // it is a local alignment,
489 traits_t::is_debug || // it runs in debug mode,
490 traits_t::compute_sequence_alignment || // it computes more than the begin position.
491 (traits_t::is_banded && traits_t::compute_begin_positions)
492 || // banded && more than end positions.
493 (traits_t::is_vectorised && traits_t::compute_end_positions)) // simd and more than the score.
494 {
495 using matrix_policy_t = typename select_matrix_policy<traits_t>::type;
496 using gap_policy_t = typename select_gap_policy<traits_t>::type;
497 using find_optimum_t = typename select_find_optimum_policy<traits_t>::type;
498 using gap_init_policy_t = deferred_crtp_base<affine_gap_init_policy>;
499
500 return alignment_algorithm<config_t,
501 matrix_policy_t,
502 gap_policy_t,
503 find_optimum_t,
504 gap_init_policy_t,
505 policies_t...>{cfg};
506 }
507 else // Use new alignment algorithm implementation.
508 {
509 //----------------------------------------------------------------------------------------------------------
510 // Configure the optimum tracker policy.
511 //----------------------------------------------------------------------------------------------------------
512
513 using scalar_optimum_updater_t =
515
516 using optimum_tracker_policy_t =
517 lazy_conditional_t<traits_t::is_vectorised,
518 lazy<policy_optimum_tracker_simd, config_t, max_score_updater_simd_global>,
519 lazy<policy_optimum_tracker, config_t, scalar_optimum_updater_t>>;
520
521 //----------------------------------------------------------------------------------------------------------
522 // Configure the gap scheme policy.
523 //----------------------------------------------------------------------------------------------------------
524
525 using gap_cost_policy_t = typename select_gap_recursion_policy<config_t>::type;
526
527 //----------------------------------------------------------------------------------------------------------
528 // Configure the result builder policy.
529 //----------------------------------------------------------------------------------------------------------
530
531 using result_builder_policy_t = policy_alignment_result_builder<config_t>;
532
533 //----------------------------------------------------------------------------------------------------------
534 // Configure the scoring scheme policy.
535 //----------------------------------------------------------------------------------------------------------
536
537 using alignment_method_t = std::
538 conditional_t<traits_t::is_global, seqan3::align_cfg::method_global, seqan3::align_cfg::method_local>;
539
540 using score_t = typename traits_t::score_type;
541 using scoring_scheme_t = typename traits_t::scoring_scheme_type;
542 constexpr bool is_aminoacid_scheme =
543 is_type_specialisation_of_v<scoring_scheme_t, aminoacid_scoring_scheme>;
544
545 using simple_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
546 lazy<simd_match_mismatch_scoring_scheme,
547 score_t,
548 typename traits_t::scoring_scheme_alphabet_type,
549 alignment_method_t>,
550 void>;
551 using matrix_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
552 lazy<simd_matrix_scoring_scheme,
553 score_t,
554 typename traits_t::scoring_scheme_alphabet_type,
555 alignment_method_t>,
556 void>;
557
558 using alignment_scoring_scheme_t =
559 std::conditional_t<traits_t::is_vectorised,
561 scoring_scheme_t>;
562
563 using scoring_scheme_policy_t = policy_scoring_scheme<config_t, alignment_scoring_scheme_t>;
564
565 //----------------------------------------------------------------------------------------------------------
566 // Configure the alignment matrix policy.
567 //----------------------------------------------------------------------------------------------------------
568
569 using score_matrix_t = score_matrix_single_column<score_t>;
570 using trace_matrix_t = trace_matrix_full<trace_directions>;
571
572 using alignment_matrix_t =
573 std::conditional_t<traits_t::requires_trace_information,
574 combined_score_and_trace_matrix<score_matrix_t, trace_matrix_t>,
575 score_matrix_t>;
576 using alignment_matrix_policy_t = policy_alignment_matrix<traits_t, alignment_matrix_t>;
577
578 //----------------------------------------------------------------------------------------------------------
579 // Configure the final alignment algorithm.
580 //----------------------------------------------------------------------------------------------------------
581
582 using algorithm_t = select_alignment_algorithm_t<traits_t,
583 config_t,
584 gap_cost_policy_t,
585 optimum_tracker_policy_t,
586 result_builder_policy_t,
587 scoring_scheme_policy_t,
588 alignment_matrix_policy_t>;
589 return algorithm_t{cfg};
590 }
591 }
592};
593
595template <typename function_wrapper_t, typename config_t>
596constexpr function_wrapper_t alignment_configurator::configure_scoring_scheme(config_t const & cfg)
597{
598 using traits_t = alignment_configuration_traits<config_t>;
599
600 using scoring_scheme_t = typename traits_t::scoring_scheme_type;
601 constexpr bool is_aminoacid_scheme = is_type_specialisation_of_v<scoring_scheme_t, aminoacid_scoring_scheme>;
602 using alignment_type_t = typename std::
603 conditional_t<traits_t::is_global, seqan3::align_cfg::method_global, seqan3::align_cfg::method_local>;
604
605 using simple_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
606 lazy<simd_match_mismatch_scoring_scheme,
607 typename traits_t::score_type,
608 typename traits_t::scoring_scheme_alphabet_type,
609 alignment_type_t>,
610 void>;
611 using matrix_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
612 lazy<simd_matrix_scoring_scheme,
613 typename traits_t::score_type,
614 typename traits_t::scoring_scheme_alphabet_type,
615 alignment_type_t>,
616 void>;
617
618 using alignment_scoring_scheme_t =
619 std::conditional_t<traits_t::is_vectorised,
621 scoring_scheme_t>;
622
623 using scoring_scheme_policy_t = deferred_crtp_base<scoring_scheme_policy, alignment_scoring_scheme_t>;
624 return make_algorithm<function_wrapper_t, scoring_scheme_policy_t>(cfg);
625}
627} // namespace seqan3::detail
Provides seqan3::detail::affine_gap_init_policy.
Provides seqan3::detail::affine_gap_policy.
Provides configuration for alignment output.
Provides seqan3::align_cfg::detail::result_type.
Provides seqan3::detail::align_result_selector.
Provides concepts needed internally for the alignment algorithms.
Provides helper type traits for the configuration and execution of the alignment algorithm.
Provides seqan3::detail::alignment_algorithm.
Provides seqan3::detail::alignment_matrix_policy.
Provides seqan3::alignment_result.
Provides seqan3::detail::alignment_score_matrix_one_column.
Provides seqan3::detail::alignment_score_matrix_one_column_banded.
Provides seqan3::detail::alignment_trace_matrix_full.
Provides seqan3::detail::alignment_trace_matrix_full_banded.
Provides seqan3::aminoacid_scoring_scheme.
Provides seqan3::detail::combined_score_and_trace_matrix.
Provides seqan3::detail::deferred_crtp_base.
Provides seqan3::detail::edit_distance_algorithm.
Provides seqan3::detail::find_optimum_policy.
A concept that requires that type be able to score two letters.
Whether a type behaves like a tuple.
Provides lazy template instantiation traits.
Provides seqan3::nucleotide_scoring_scheme.
Provides seqan3::detail::pairwise_alignment_algorithm.
Provides seqan3::detail::pairwise_alignment_algorithm.
Provides seqan3::detail::policy_affine_gap_recursion.
Provides seqan3::detail::policy_affine_gap_recursion_banded.
Provides seqan3::detail::policy_affine_gap_with_trace_recursion.
Provides seqan3::detail::policy_affine_gap_with_trace_recursion_banded.
Provides seqan3::detail::policy_alignment_matrix.
Provides seqan3::detail::policy_alignment_result_builder.
Provides seqan3::detail::policy_optimum_tracker.
Provides seqan3::detail::policy_optimum_tracker_simd.
Provides seqan3::detail::policy_scoring_scheme.
Provides seqan3::detail::score_matrix_single_column.
Provides seqan3::detail::scoring_scheme_policy.
Provides seqan3::simd::simd_type.
Provides seqan3::detail::simd_affine_gap_policy.
Provides seqan3::detail::simd_find_optimum_policy.
Provides seqan3::detail::simd_match_mismatch_scoring_scheme.
Provides seqan3::detail::simd_matrix_scoring_scheme.
Provides type traits for working with templates.
Provides seqan3::detail::trace_matrix_full.
Provides seqan3::views::type_reduce.
Provides seqan3::tuple_like.
Provides seqan3::views::zip.