SeqAn3 3.1.0
The Modern C++ library for sequence analysis.
alignment_configurator.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <functional>
16#include <tuple>
17#include <utility>
18#include <vector>
19
64
65namespace seqan3::detail
66{
67
77template <typename range_type,
78 typename alignment_config_type>
79struct alignment_contract
80{
81private:
86 using unref_range_type = std::remove_reference_t<range_type>;
88 using first_seq_t = std::tuple_element_t<0, std::ranges::range_value_t<unref_range_type>>;
90 using second_seq_t = std::tuple_element_t<1, std::ranges::range_value_t<unref_range_type>>;
92
93public:
95 constexpr static bool expects_tuple_like_value_type()
96 {
98 std::tuple_size_v<std::ranges::range_value_t<unref_range_type>> == 2;
99 }
100
102 constexpr static bool expects_valid_scoring_scheme()
103 {
104 if constexpr (alignment_config_type::template exists<align_cfg::scoring_scheme>())
105 {
106 using scoring_type =
108 decltype(get<align_cfg::scoring_scheme>(std::declval<alignment_config_type>()).scheme)>;
109 return static_cast<bool>(scoring_scheme_for<scoring_type,
110 std::ranges::range_value_t<first_seq_t>,
111 std::ranges::range_value_t<second_seq_t>>);
112 }
113 else
114 {
115 return false;
116 }
117 }
118
120 constexpr static bool expects_alignment_configuration()
121 {
122 const bool is_global = alignment_config_type::template exists<seqan3::align_cfg::method_global>();
123 const bool is_local = alignment_config_type::template exists<seqan3::align_cfg::method_local>();
124
125 return (is_global || is_local);
126 }
127};
128
133struct alignment_configurator
134{
135private:
136
140 template <typename traits_t>
141 struct select_matrix_policy
142 {
143 private:
145 static constexpr bool only_coordinates = !(traits_t::compute_begin_positions ||
146 traits_t::compute_sequence_alignment);
147
149 using score_matrix_t = std::conditional_t<traits_t::is_banded,
150 alignment_score_matrix_one_column_banded<typename traits_t::score_type>,
151 alignment_score_matrix_one_column<typename traits_t::score_type>>;
153 using trace_matrix_t = std::conditional_t<traits_t::is_banded,
154 alignment_trace_matrix_full_banded<typename traits_t::trace_type,
155 only_coordinates>,
156 alignment_trace_matrix_full<typename traits_t::trace_type,
157 only_coordinates>>;
158
159 public:
161 using type = deferred_crtp_base<alignment_matrix_policy, score_matrix_t, trace_matrix_t>;
162 };
163
167 template <typename traits_t>
168 struct select_gap_policy
169 {
170 private:
172 using score_t = typename traits_t::score_type;
175
176 public:
178 using type = std::conditional_t<traits_t::is_vectorised,
179 deferred_crtp_base<simd_affine_gap_policy, score_t, is_local_t>,
180 deferred_crtp_base<affine_gap_policy, score_t, is_local_t>>;
181 };
182
189 template <typename traits_t>
190 struct select_find_optimum_policy
191 {
192 private:
194 using score_t = typename traits_t::score_type;
195
196 public:
198 using type = std::conditional_t<traits_t::is_vectorised,
199 deferred_crtp_base<simd_find_optimum_policy, score_t>,
200 deferred_crtp_base<find_optimum_policy>>;
201 };
202
204 template <typename traits_t, typename ...args_t>
205 using select_alignment_algorithm_t = lazy_conditional_t<traits_t::is_banded,
206 lazy<pairwise_alignment_algorithm_banded, args_t...>,
207 lazy<pairwise_alignment_algorithm, args_t...>>;
208
212 template <typename config_t>
213 struct select_gap_recursion_policy
214 {
215 private:
217 using traits_type = alignment_configuration_traits<config_t>;
219 static constexpr bool with_trace = traits_type::requires_trace_information;
220
222 using gap_recursion_policy_type = std::conditional_t<with_trace,
223 policy_affine_gap_with_trace_recursion<config_t>,
224 policy_affine_gap_recursion<config_t>>;
226 using banded_gap_recursion_policy_type =
227 std::conditional_t<with_trace,
228 policy_affine_gap_with_trace_recursion_banded<config_t>,
229 policy_affine_gap_recursion_banded<config_t>>;
230 public:
232 using type = std::conditional_t<traits_type::is_banded,
233 banded_gap_recursion_policy_type,
234 gap_recursion_policy_type>;
235 };
236
237public:
264 template <align_pairwise_range_input sequences_t, typename config_t>
266 requires is_type_specialisation_of_v<config_t, configuration>
268 static constexpr auto configure(config_t const & cfg)
269 {
270 auto config_with_output = maybe_default_output(cfg);
271 using config_with_output_t = decltype(config_with_output);
272
273 // ----------------------------------------------------------------------------
274 // Configure the type-erased alignment function.
275 // ----------------------------------------------------------------------------
276
277 using first_seq_t = std::tuple_element_t<0, std::ranges::range_value_t<sequences_t>>;
278 using second_seq_t = std::tuple_element_t<1, std::ranges::range_value_t<sequences_t>>;
279
280 using wrapped_first_t = type_reduce_t<first_seq_t &>;
281 using wrapped_second_t = type_reduce_t<second_seq_t &>;
282
283 // The alignment executor passes a chunk over an indexed sequence pair range to the alignment algorithm.
284 using indexed_sequence_pair_range_t = typename chunked_indexed_sequence_pairs<sequences_t>::type;
285 using indexed_sequence_pair_chunk_t = std::ranges::range_value_t<indexed_sequence_pair_range_t>;
286
287 // Select the result type based on the sequences and the configuration.
288 using alignment_result_value_t = typename align_result_selector<std::remove_reference_t<wrapped_first_t>,
290 config_with_output_t>::type;
291 using alignment_result_t = alignment_result<alignment_result_value_t>;
292 using callback_on_result_t = std::function<void(alignment_result_t)>;
293 // Define the function wrapper type.
294 using function_wrapper_t = std::function<void(indexed_sequence_pair_chunk_t, callback_on_result_t)>;
295
296 // Capture the alignment result type.
297 auto config_with_result_type = config_with_output | align_cfg::detail::result_type<alignment_result_t>{};
298
299 // ----------------------------------------------------------------------------
300 // Test some basic preconditions
301 // ----------------------------------------------------------------------------
302
303 using alignment_contract_t = alignment_contract<sequences_t, config_with_output_t>;
304
305 static_assert(alignment_contract_t::expects_alignment_configuration(),
306 "Alignment configuration error: "
307 "The alignment can only be configured with alignment configurations.");
308
309 static_assert(alignment_contract_t::expects_tuple_like_value_type(),
310 "Alignment configuration error: "
311 "The value type of the sequence ranges must model the seqan3::tuple_like and must contain "
312 "exactly 2 elements.");
313
314 static_assert(alignment_contract_t::expects_valid_scoring_scheme(),
315 "Alignment configuration error: "
316 "Either the scoring scheme was not configured or the given scoring scheme cannot be invoked with "
317 "the value types of the passed sequences.");
318
319 // ----------------------------------------------------------------------------
320 // Configure the algorithm
321 // ----------------------------------------------------------------------------
322
323 // Use default edit distance if gaps are not set.
324 align_cfg::gap_cost_affine edit_gap_cost{};
325 auto const & gap_cost = config_with_result_type.get_or(edit_gap_cost);
326 auto const & scoring_scheme = get<align_cfg::scoring_scheme>(cfg).scheme;
327
328 if constexpr (config_t::template exists<seqan3::align_cfg::method_global>())
329 {
330 // Only use edit distance if ...
331 auto method_global_cfg = get<seqan3::align_cfg::method_global>(config_with_result_type);
332 // Only use edit distance if ...
333 if (gap_cost.open_score == 0 && // gap open score is not set,
334 !(method_global_cfg.free_end_gaps_sequence2_leading ||
335 method_global_cfg.free_end_gaps_sequence2_trailing) && // none of the free end gaps are set for second seq,
336 (method_global_cfg.free_end_gaps_sequence1_leading ==
337 method_global_cfg.free_end_gaps_sequence1_trailing)) // free ends for leading and trailing gaps are equal in first seq.
338 {
339 // TODO: Instead of relying on nucleotide scoring schemes we need to be able to determine the edit distance
340 // option via the scheme.
341 if constexpr (is_type_specialisation_of_v<std::remove_cvref_t<decltype(scoring_scheme)>,
342 nucleotide_scoring_scheme>)
343 {
344 if ((scoring_scheme.score('A'_dna15, 'A'_dna15) == 0) &&
345 (scoring_scheme.score('A'_dna15, 'C'_dna15)) == -1)
346 {
347 return std::pair{configure_edit_distance<function_wrapper_t>(config_with_result_type),
348 config_with_result_type};
349 }
350 }
351 }
352 }
353
354 // ----------------------------------------------------------------------------
355 // Check if invalid configuration was used.
356 // ----------------------------------------------------------------------------
357
358 // Do not allow min score configuration for alignments not computing the edit distance.
359 if (config_t::template exists<align_cfg::min_score>())
360 throw invalid_alignment_configuration{"The align_cfg::min_score configuration is only allowed for the "
361 "specific edit distance computation."};
362 // Configure the alignment algorithm.
363 return std::pair{configure_scoring_scheme<function_wrapper_t>(config_with_result_type),
364 config_with_result_type};
365 }
366
367private:
377 template <typename config_t>
378 static constexpr auto maybe_default_output(config_t const & config) noexcept
379 {
380 using traits_t = alignment_configuration_traits<config_t>;
381
382 if constexpr (traits_t::has_output_configuration)
383 return config;
384 else
385 return config | align_cfg::output_score{} |
386 align_cfg::output_begin_position{} |
387 align_cfg::output_end_position{} |
388 align_cfg::output_alignment{} |
389 align_cfg::output_sequence1_id{} |
390 align_cfg::output_sequence2_id{};
391 }
392
398 template <typename function_wrapper_t, typename config_t>
399 static constexpr function_wrapper_t configure_edit_distance(config_t const & cfg)
400 {
401 using traits_t = alignment_configuration_traits<config_t>;
402
403 // ----------------------------------------------------------------------------
404 // Unsupported configurations
405 // ----------------------------------------------------------------------------
406
407 if constexpr (traits_t::is_banded)
408 throw invalid_alignment_configuration{"Banded alignments are yet not supported."};
409
410 // ----------------------------------------------------------------------------
411 // Configure semi-global alignment
412 // ----------------------------------------------------------------------------
413
414 // Get the value for the sequence ends configuration.
415 auto method_global_cfg = cfg.get_or(align_cfg::method_global{});
416
417 auto configure_edit_traits = [&] (auto is_semi_global)
418 {
419 struct edit_traits_type
420 {
421 using is_semi_global_type [[maybe_unused]] = std::remove_cvref_t<decltype(is_semi_global)>;
422 };
423
424 edit_distance_algorithm<std::remove_cvref_t<config_t>, edit_traits_type> algorithm{cfg};
425 return function_wrapper_t{std::move(algorithm)};
426 };
427
428 // Check if it has free ends set for the first sequence trailing gaps.
429 auto has_free_ends_trailing = [&] (auto first) constexpr
430 {
431 if constexpr (!decltype(first)::value)
432 {
433 return configure_edit_traits(std::false_type{});
434 }
435 else // Resolve correct property at runtime.
436 {
437 if (method_global_cfg.free_end_gaps_sequence1_trailing)
438 return configure_edit_traits(std::true_type{});
439 else
440 return configure_edit_traits(std::false_type{});
441 }
442 };
443
444 // Check if it has free ends set for the first sequence leading gaps.
445 if (method_global_cfg.free_end_gaps_sequence1_leading)
446 return has_free_ends_trailing(std::true_type{});
447 else
448 return has_free_ends_trailing(std::false_type{});
449 }
450
467 template <typename function_wrapper_t, typename config_t>
468 static constexpr function_wrapper_t configure_scoring_scheme(config_t const & cfg);
469
484 template <typename function_wrapper_t, typename ...policies_t, typename config_t>
485 static constexpr function_wrapper_t make_algorithm(config_t const & cfg)
486 {
487 using traits_t = alignment_configuration_traits<config_t>;
488
489 // Temporarily we will use the new and the old alignment implementation in order to
490 // refactor step-by-step to the new implementation. The new implementation will be tested in
491 // macrobenchmarks to show that it maintains a high performance.
492
493 // Use old alignment implementation if...
494 if constexpr (traits_t::is_local || // it is a local alignment,
495 traits_t::is_debug || // it runs in debug mode,
496 traits_t::compute_sequence_alignment || // it computes more than the begin position.
497 (traits_t::is_banded && traits_t::compute_begin_positions) || // banded && more than end positions.
498 (traits_t::is_vectorised && traits_t::compute_end_positions)) // simd and more than the score.
499 {
500 using matrix_policy_t = typename select_matrix_policy<traits_t>::type;
501 using gap_policy_t = typename select_gap_policy<traits_t>::type;
502 using find_optimum_t = typename select_find_optimum_policy<traits_t>::type;
503 using gap_init_policy_t = deferred_crtp_base<affine_gap_init_policy>;
504
505 return alignment_algorithm<config_t, matrix_policy_t, gap_policy_t, find_optimum_t, gap_init_policy_t, policies_t...>{cfg};
506 }
507 else // Use new alignment algorithm implementation.
508 {
509 //----------------------------------------------------------------------------------------------------------
510 // Configure the optimum tracker policy.
511 //----------------------------------------------------------------------------------------------------------
512
513 using scalar_optimum_updater_t = std::conditional_t<traits_t::is_banded,
514 max_score_banded_updater,
515 max_score_updater>;
516
517 using optimum_tracker_policy_t =
518 lazy_conditional_t<traits_t::is_vectorised,
519 lazy<policy_optimum_tracker_simd, config_t, max_score_updater_simd_global>,
520 lazy<policy_optimum_tracker, config_t, scalar_optimum_updater_t>>;
521
522 //----------------------------------------------------------------------------------------------------------
523 // Configure the gap scheme policy.
524 //----------------------------------------------------------------------------------------------------------
525
526 using gap_cost_policy_t = typename select_gap_recursion_policy<config_t>::type;
527
528 //----------------------------------------------------------------------------------------------------------
529 // Configure the result builder policy.
530 //----------------------------------------------------------------------------------------------------------
531
532 using result_builder_policy_t = policy_alignment_result_builder<config_t>;
533
534 //----------------------------------------------------------------------------------------------------------
535 // Configure the scoring scheme policy.
536 //----------------------------------------------------------------------------------------------------------
537
538 using alignment_method_t = std::conditional_t<traits_t::is_global,
541
542 using score_t = typename traits_t::score_type;
543 using scoring_scheme_t = typename traits_t::scoring_scheme_type;
544 constexpr bool is_aminoacid_scheme = is_type_specialisation_of_v<scoring_scheme_t, aminoacid_scoring_scheme>;
545
546 using simple_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
547 lazy<simd_match_mismatch_scoring_scheme,
548 score_t,
549 typename traits_t::scoring_scheme_alphabet_type,
550 alignment_method_t>,
551 void>;
552 using matrix_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
553 lazy<simd_matrix_scoring_scheme,
554 score_t,
555 typename traits_t::scoring_scheme_alphabet_type,
556 alignment_method_t>,
557 void>;
558
559 using alignment_scoring_scheme_t = std::conditional_t<traits_t::is_vectorised,
560 std::conditional_t<is_aminoacid_scheme,
561 matrix_simd_scheme_t,
562 simple_simd_scheme_t>,
563 scoring_scheme_t>;
564
565 using scoring_scheme_policy_t = policy_scoring_scheme<config_t, alignment_scoring_scheme_t>;
566
567 //----------------------------------------------------------------------------------------------------------
568 // Configure the alignment matrix policy.
569 //----------------------------------------------------------------------------------------------------------
570
571 using score_matrix_t = score_matrix_single_column<score_t>;
572 using trace_matrix_t = trace_matrix_full<trace_directions>;
573
574 using alignment_matrix_t = std::conditional_t<traits_t::requires_trace_information,
575 combined_score_and_trace_matrix<score_matrix_t,
576 trace_matrix_t>,
577 score_matrix_t>;
578 using alignment_matrix_policy_t = policy_alignment_matrix<traits_t, alignment_matrix_t>;
579
580 //----------------------------------------------------------------------------------------------------------
581 // Configure the final alignment algorithm.
582 //----------------------------------------------------------------------------------------------------------
583
584 using algorithm_t = select_alignment_algorithm_t<traits_t,
585 config_t,
586 gap_cost_policy_t,
587 optimum_tracker_policy_t,
588 result_builder_policy_t,
589 scoring_scheme_policy_t,
590 alignment_matrix_policy_t>;
591 return algorithm_t{cfg};
592 }
593 }
594};
595
597template <typename function_wrapper_t, typename config_t>
598constexpr function_wrapper_t alignment_configurator::configure_scoring_scheme(config_t const & cfg)
599{
600 using traits_t = alignment_configuration_traits<config_t>;
601
602 using scoring_scheme_t = typename traits_t::scoring_scheme_type;
603 constexpr bool is_aminoacid_scheme = is_type_specialisation_of_v<scoring_scheme_t, aminoacid_scoring_scheme>;
604 using alignment_type_t = typename std::conditional_t<traits_t::is_global,
607
608 using simple_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
609 lazy<simd_match_mismatch_scoring_scheme,
610 typename traits_t::score_type,
611 typename traits_t::scoring_scheme_alphabet_type,
612 alignment_type_t>,
613 void>;
614 using matrix_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
615 lazy<simd_matrix_scoring_scheme,
616 typename traits_t::score_type,
617 typename traits_t::scoring_scheme_alphabet_type,
618 alignment_type_t>,
619 void>;
620
621 using alignment_scoring_scheme_t = std::conditional_t<traits_t::is_vectorised,
623 scoring_scheme_t>;
624
625 using scoring_scheme_policy_t = deferred_crtp_base<scoring_scheme_policy, alignment_scoring_scheme_t>;
626 return make_algorithm<function_wrapper_t, scoring_scheme_policy_t>(cfg);
627}
629} // namespace seqan3::detail
Provides seqan3::detail::affine_gap_init_policy.
Provides seqan3::detail::affine_gap_policy.
Provides configuration for alignment output.
Provides seqan3::align_cfg::detail::result_type.
Provides seqan3::detail::align_result_selector.
Provides concepts needed internally for the alignment algorithms.
Provides helper type traits for the configuration and execution of the alignment algorithm.
Provides seqan3::detail::alignment_algorithm.
Provides seqan3::detail::alignment_matrix_policy.
Provides seqan3::alignment_result.
Provides seqan3::detail::alignment_score_matrix_one_column.
Provides seqan3::detail::alignment_score_matrix_one_column_banded.
Provides seqan3::detail::alignment_trace_matrix_full.
Provides seqan3::detail::alignment_trace_matrix_full_banded.
Provides seqan3::aminoacid_scoring_scheme.
Sets the global alignment method.
Definition: align_config_method.hpp:122
Sets the local alignment method.
Definition: align_config_method.hpp:45
Provides seqan3::detail::combined_score_and_trace_matrix.
Provides seqan3::detail::deferred_crtp_base.
Provides seqan3::detail::edit_distance_algorithm.
Provides seqan3::detail::find_optimum_policy.
A concept that requires that type be able to score two letters.
Whether a type behaves like a tuple.
Provides lazy template instantiation traits.
Provides seqan3::nucleotide_scoring_scheme.
Provides seqan3::detail::pairwise_alignment_algorithm.
Provides seqan3::detail::pairwise_alignment_algorithm.
Provides seqan3::detail::policy_affine_gap_recursion.
Provides seqan3::detail::policy_affine_gap_recursion_banded.
Provides seqan3::detail::policy_affine_gap_with_trace_recursion.
Provides seqan3::detail::policy_affine_gap_with_trace_recursion_banded.
Provides seqan3::detail::policy_alignment_matrix.
Provides seqan3::detail::policy_alignment_result_builder.
Provides seqan3::detail::policy_optimum_tracker.
Provides seqan3::detail::policy_optimum_tracker_simd.
Provides seqan3::detail::policy_scoring_scheme.
Provides seqan3::detail::score_matrix_single_column.
Provides seqan3::detail::scoring_scheme_policy.
Provides seqan3::simd::simd_type.
Provides seqan3::detail::simd_affine_gap_policy.
Provides seqan3::detail::simd_find_optimum_policy.
Provides seqan3::detail::simd_match_mismatch_scoring_scheme.
Provides seqan3::detail::simd_matrix_scoring_scheme.
Provides type traits for working with templates.
Provides seqan3::detail::trace_matrix_full.
Provides seqan3::views::type_reduce.
Provides seqan3::tuple_like.
Provides seqan3::views::zip.