SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
alignment_configurator.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <functional>
13#include <tuple>
14#include <utility>
15#include <vector>
16
43#include <seqan3/alignment/pairwise/policy/affine_gap_init_policy.hpp>
44#include <seqan3/alignment/pairwise/policy/affine_gap_policy.hpp>
45#include <seqan3/alignment/pairwise/policy/alignment_matrix_policy.hpp>
46#include <seqan3/alignment/pairwise/policy/find_optimum_policy.hpp>
47#include <seqan3/alignment/pairwise/policy/scoring_scheme_policy.hpp>
48#include <seqan3/alignment/pairwise/policy/simd_affine_gap_policy.hpp>
49#include <seqan3/alignment/pairwise/policy/simd_find_optimum_policy.hpp>
56#include <seqan3/utility/simd/simd.hpp>
61
62namespace seqan3::detail
63{
64
74template <typename range_type, typename alignment_config_type>
75struct alignment_contract
76{
77private:
82 using unref_range_type = std::remove_reference_t<range_type>;
84 using first_seq_t = std::tuple_element_t<0, std::ranges::range_value_t<unref_range_type>>;
86 using second_seq_t = std::tuple_element_t<1, std::ranges::range_value_t<unref_range_type>>;
88
89public:
91 static constexpr bool expects_tuple_like_value_type()
92 {
94 && std::tuple_size_v<std::ranges::range_value_t<unref_range_type>> == 2;
95 }
96
98 static constexpr bool expects_valid_scoring_scheme()
99 {
100 if constexpr (alignment_config_type::template exists<align_cfg::scoring_scheme>())
101 {
102 using scoring_type = std::remove_reference_t<
103 decltype(get<align_cfg::scoring_scheme>(std::declval<alignment_config_type>()).scheme)>;
104 return static_cast<bool>(scoring_scheme_for<scoring_type,
105 std::ranges::range_value_t<first_seq_t>,
106 std::ranges::range_value_t<second_seq_t>>);
107 }
108 else
109 {
110 return false;
111 }
112 }
113
115 static constexpr bool expects_alignment_configuration()
116 {
117 bool const is_global = alignment_config_type::template exists<seqan3::align_cfg::method_global>();
118 bool const is_local = alignment_config_type::template exists<seqan3::align_cfg::method_local>();
119
120 return (is_global || is_local);
121 }
122};
123
128struct alignment_configurator
129{
130private:
134 template <typename traits_t>
135 struct select_matrix_policy
136 {
137 private:
139 static constexpr bool only_coordinates =
140 !(traits_t::compute_begin_positions || traits_t::compute_sequence_alignment);
141
143 using score_matrix_t =
144 std::conditional_t<traits_t::is_banded,
145 alignment_score_matrix_one_column_banded<typename traits_t::score_type>,
146 alignment_score_matrix_one_column<typename traits_t::score_type>>;
148 using trace_matrix_t =
149 std::conditional_t<traits_t::is_banded,
150 alignment_trace_matrix_full_banded<typename traits_t::trace_type, only_coordinates>,
151 alignment_trace_matrix_full<typename traits_t::trace_type, only_coordinates>>;
152
153 public:
155 using type = deferred_crtp_base<alignment_matrix_policy, score_matrix_t, trace_matrix_t>;
156 };
157
161 template <typename traits_t>
162 struct select_gap_policy
163 {
164 private:
166 using score_t = typename traits_t::score_type;
169
170 public:
172 using type = std::conditional_t<traits_t::is_vectorised,
173 deferred_crtp_base<simd_affine_gap_policy, score_t, is_local_t>,
174 deferred_crtp_base<affine_gap_policy, score_t, is_local_t>>;
175 };
176
183 template <typename traits_t>
184 struct select_find_optimum_policy
185 {
186 private:
188 using score_t = typename traits_t::score_type;
189
190 public:
192 using type = std::conditional_t<traits_t::is_vectorised,
193 deferred_crtp_base<simd_find_optimum_policy, score_t>,
194 deferred_crtp_base<find_optimum_policy>>;
195 };
196
198 template <typename traits_t, typename... args_t>
199 using select_alignment_algorithm_t = lazy_conditional_t<traits_t::is_banded,
200 lazy<pairwise_alignment_algorithm_banded, args_t...>,
201 lazy<pairwise_alignment_algorithm, args_t...>>;
202
206 template <typename config_t>
207 struct select_gap_recursion_policy
208 {
209 private:
211 using traits_type = alignment_configuration_traits<config_t>;
213 static constexpr bool with_trace = traits_type::requires_trace_information;
214
216 using gap_recursion_policy_type = std::conditional_t<with_trace,
217 policy_affine_gap_with_trace_recursion<config_t>,
218 policy_affine_gap_recursion<config_t>>;
220 using banded_gap_recursion_policy_type =
221 std::conditional_t<with_trace,
222 policy_affine_gap_with_trace_recursion_banded<config_t>,
223 policy_affine_gap_recursion_banded<config_t>>;
224
225 public:
227 using type =
229 };
230
231public:
258 template <align_pairwise_range_input sequences_t, typename config_t>
259 requires is_type_specialisation_of_v<config_t, configuration>
260 static constexpr auto configure(config_t const & cfg)
261 {
262 auto config_with_output = maybe_default_output(cfg);
263 using config_with_output_t = decltype(config_with_output);
264
265 // ----------------------------------------------------------------------------
266 // Configure the type-erased alignment function.
267 // ----------------------------------------------------------------------------
268
269 using first_seq_t = std::tuple_element_t<0, std::ranges::range_value_t<sequences_t>>;
270 using second_seq_t = std::tuple_element_t<1, std::ranges::range_value_t<sequences_t>>;
271
272 using wrapped_first_t = type_reduce_t<first_seq_t &>;
273 using wrapped_second_t = type_reduce_t<second_seq_t &>;
274
275 // The alignment executor passes a chunk over an indexed sequence pair range to the alignment algorithm.
276 using indexed_sequence_pair_range_t = typename chunked_indexed_sequence_pairs<sequences_t>::type;
277 using indexed_sequence_pair_chunk_t = std::ranges::range_value_t<indexed_sequence_pair_range_t>;
278
279 // Select the result type based on the sequences and the configuration.
280 using alignment_result_value_t = typename align_result_selector<std::remove_reference_t<wrapped_first_t>,
282 config_with_output_t>::type;
283 using alignment_result_t = alignment_result<alignment_result_value_t>;
284 using callback_on_result_t = std::function<void(alignment_result_t)>;
285 // Define the function wrapper type.
286 using function_wrapper_t = std::function<void(indexed_sequence_pair_chunk_t, callback_on_result_t)>;
287
288 // Capture the alignment result type.
289 auto config_with_result_type = config_with_output | align_cfg::detail::result_type<alignment_result_t>{};
290
291 // ----------------------------------------------------------------------------
292 // Test some basic preconditions
293 // ----------------------------------------------------------------------------
294
295 using alignment_contract_t = alignment_contract<sequences_t, config_with_output_t>;
296
297 static_assert(alignment_contract_t::expects_alignment_configuration(),
298 "Alignment configuration error: "
299 "The alignment can only be configured with alignment configurations.");
300
301 static_assert(alignment_contract_t::expects_tuple_like_value_type(),
302 "Alignment configuration error: "
303 "The value type of the sequence ranges must model the seqan3::tuple_like and must contain "
304 "exactly 2 elements.");
305
306 static_assert(alignment_contract_t::expects_valid_scoring_scheme(),
307 "Alignment configuration error: "
308 "Either the scoring scheme was not configured or the given scoring scheme cannot be invoked with "
309 "the value types of the passed sequences.");
310
311 // ----------------------------------------------------------------------------
312 // Configure the algorithm
313 // ----------------------------------------------------------------------------
314
315 // Use default edit distance if gaps are not set.
316 align_cfg::gap_cost_affine edit_gap_cost{};
317 auto const & gap_cost = config_with_result_type.get_or(edit_gap_cost);
318 auto const & scoring_scheme = get<align_cfg::scoring_scheme>(cfg).scheme;
319
320 if constexpr (config_t::template exists<seqan3::align_cfg::method_global>())
321 {
322 // Only use edit distance if ...
323 auto method_global_cfg = get<seqan3::align_cfg::method_global>(config_with_result_type);
324 // Only use edit distance if ...
325 if (gap_cost.open_score == 0 && // gap open score is not set,
326 !(method_global_cfg.free_end_gaps_sequence2_leading
327 || method_global_cfg.free_end_gaps_sequence2_trailing)
328 && // none of the free end gaps are set for second seq,
329 (method_global_cfg.free_end_gaps_sequence1_leading
330 == method_global_cfg
331 .free_end_gaps_sequence1_trailing)) // free ends for leading and trailing gaps are equal in first seq.
332 {
333 // TODO: Instead of relying on nucleotide scoring schemes we need to be able to determine the edit distance
334 // option via the scheme.
335 if constexpr (is_type_specialisation_of_v<std::remove_cvref_t<decltype(scoring_scheme)>,
336 nucleotide_scoring_scheme>)
337 {
338 if ((scoring_scheme.score('A'_dna15, 'A'_dna15) == 0)
339 && (scoring_scheme.score('A'_dna15, 'C'_dna15)) == -1)
340 {
341 return std::pair{configure_edit_distance<function_wrapper_t>(config_with_result_type),
342 config_with_result_type};
343 }
344 }
345 }
346 }
347
348 // ----------------------------------------------------------------------------
349 // Check if invalid configuration was used.
350 // ----------------------------------------------------------------------------
351
352 // Do not allow min score configuration for alignments not computing the edit distance.
353 if (config_t::template exists<align_cfg::min_score>())
354 throw invalid_alignment_configuration{"The align_cfg::min_score configuration is only allowed for the "
355 "specific edit distance computation."};
356 // Configure the alignment algorithm.
357 return std::pair{configure_scoring_scheme<function_wrapper_t>(config_with_result_type),
358 config_with_result_type};
359 }
360
361private:
371 template <typename config_t>
372 static constexpr auto maybe_default_output(config_t const & config) noexcept
373 {
374 using traits_t = alignment_configuration_traits<config_t>;
375
376 if constexpr (traits_t::has_output_configuration)
377 return config;
378 else
379 return config | align_cfg::output_score{} | align_cfg::output_begin_position{}
380 | align_cfg::output_end_position{} | align_cfg::output_alignment{} | align_cfg::output_sequence1_id{}
381 | align_cfg::output_sequence2_id{};
382 }
383
389 template <typename function_wrapper_t, typename config_t>
390 static constexpr function_wrapper_t configure_edit_distance(config_t const & cfg)
391 {
392 using traits_t = alignment_configuration_traits<config_t>;
393
394 // ----------------------------------------------------------------------------
395 // Unsupported configurations
396 // ----------------------------------------------------------------------------
397
398 if constexpr (traits_t::is_banded)
399 throw invalid_alignment_configuration{"Banded alignments are yet not supported."};
400
401 // ----------------------------------------------------------------------------
402 // Configure semi-global alignment
403 // ----------------------------------------------------------------------------
404
405 // Get the value for the sequence ends configuration.
406 auto method_global_cfg = cfg.get_or(align_cfg::method_global{});
407
408 auto configure_edit_traits = [&](auto is_semi_global)
409 {
410 struct edit_traits_type
411 {
412 using is_semi_global_type [[maybe_unused]] = std::remove_cvref_t<decltype(is_semi_global)>;
413 };
414
415 edit_distance_algorithm<std::remove_cvref_t<config_t>, edit_traits_type> algorithm{cfg};
416 return function_wrapper_t{std::move(algorithm)};
417 };
418
419 // Check if it has free ends set for the first sequence trailing gaps.
420 auto has_free_ends_trailing = [&](auto first) constexpr
421 {
422 if constexpr (!decltype(first)::value)
423 {
424 return configure_edit_traits(std::false_type{});
425 }
426 else // Resolve correct property at runtime.
427 {
428 if (method_global_cfg.free_end_gaps_sequence1_trailing)
429 return configure_edit_traits(std::true_type{});
430 else
431 return configure_edit_traits(std::false_type{});
432 }
433 };
434
435 // Check if it has free ends set for the first sequence leading gaps.
436 if (method_global_cfg.free_end_gaps_sequence1_leading)
437 return has_free_ends_trailing(std::true_type{});
438 else
439 return has_free_ends_trailing(std::false_type{});
440 }
441
458 template <typename function_wrapper_t, typename config_t>
459 static constexpr function_wrapper_t configure_scoring_scheme(config_t const & cfg);
460
475 template <typename function_wrapper_t, typename... policies_t, typename config_t>
476 static constexpr function_wrapper_t make_algorithm(config_t const & cfg)
477 {
478 using traits_t = alignment_configuration_traits<config_t>;
479
480 // Temporarily we will use the new and the old alignment implementation in order to
481 // refactor step-by-step to the new implementation. The new implementation will be tested in
482 // macrobenchmarks to show that it maintains a high performance.
483
484 // Use old alignment implementation if...
485 if constexpr (traits_t::is_local || // it is a local alignment,
486 traits_t::is_debug || // it runs in debug mode,
487 traits_t::compute_sequence_alignment || // it computes more than the begin position.
488 (traits_t::is_banded && traits_t::compute_begin_positions)
489 || // banded && more than end positions.
490 (traits_t::is_vectorised && traits_t::compute_end_positions)) // simd and more than the score.
491 {
492 using matrix_policy_t = typename select_matrix_policy<traits_t>::type;
493 using gap_policy_t = typename select_gap_policy<traits_t>::type;
494 using find_optimum_t = typename select_find_optimum_policy<traits_t>::type;
495 using gap_init_policy_t = deferred_crtp_base<affine_gap_init_policy>;
496
497 return alignment_algorithm<config_t,
498 matrix_policy_t,
499 gap_policy_t,
500 find_optimum_t,
501 gap_init_policy_t,
502 policies_t...>{cfg};
503 }
504 else // Use new alignment algorithm implementation.
505 {
506 //----------------------------------------------------------------------------------------------------------
507 // Configure the optimum tracker policy.
508 //----------------------------------------------------------------------------------------------------------
509
510 using scalar_optimum_updater_t =
512
513 using optimum_tracker_policy_t =
514 lazy_conditional_t<traits_t::is_vectorised,
515 lazy<policy_optimum_tracker_simd, config_t, max_score_updater_simd_global>,
516 lazy<policy_optimum_tracker, config_t, scalar_optimum_updater_t>>;
517
518 //----------------------------------------------------------------------------------------------------------
519 // Configure the gap scheme policy.
520 //----------------------------------------------------------------------------------------------------------
521
522 using gap_cost_policy_t = typename select_gap_recursion_policy<config_t>::type;
523
524 //----------------------------------------------------------------------------------------------------------
525 // Configure the result builder policy.
526 //----------------------------------------------------------------------------------------------------------
527
528 using result_builder_policy_t = policy_alignment_result_builder<config_t>;
529
530 //----------------------------------------------------------------------------------------------------------
531 // Configure the scoring scheme policy.
532 //----------------------------------------------------------------------------------------------------------
533
534 using alignment_method_t = std::
535 conditional_t<traits_t::is_global, seqan3::align_cfg::method_global, seqan3::align_cfg::method_local>;
536
537 using score_t = typename traits_t::score_type;
538 using scoring_scheme_t = typename traits_t::scoring_scheme_type;
539 constexpr bool is_aminoacid_scheme =
540 is_type_specialisation_of_v<scoring_scheme_t, aminoacid_scoring_scheme>;
541
542 using simple_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
543 lazy<simd_match_mismatch_scoring_scheme,
544 score_t,
545 typename traits_t::scoring_scheme_alphabet_type,
546 alignment_method_t>,
547 void>;
548 using matrix_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
549 lazy<simd_matrix_scoring_scheme,
550 score_t,
551 typename traits_t::scoring_scheme_alphabet_type,
552 alignment_method_t>,
553 void>;
554
555 using alignment_scoring_scheme_t =
556 std::conditional_t<traits_t::is_vectorised,
558 scoring_scheme_t>;
559
560 using scoring_scheme_policy_t = policy_scoring_scheme<config_t, alignment_scoring_scheme_t>;
561
562 //----------------------------------------------------------------------------------------------------------
563 // Configure the alignment matrix policy.
564 //----------------------------------------------------------------------------------------------------------
565
566 using score_matrix_t = score_matrix_single_column<score_t>;
567 using trace_matrix_t = trace_matrix_full<trace_directions>;
568
569 using alignment_matrix_t =
570 std::conditional_t<traits_t::requires_trace_information,
571 combined_score_and_trace_matrix<score_matrix_t, trace_matrix_t>,
572 score_matrix_t>;
573 using alignment_matrix_policy_t = policy_alignment_matrix<traits_t, alignment_matrix_t>;
574
575 //----------------------------------------------------------------------------------------------------------
576 // Configure the final alignment algorithm.
577 //----------------------------------------------------------------------------------------------------------
578
579 using algorithm_t = select_alignment_algorithm_t<traits_t,
580 config_t,
581 gap_cost_policy_t,
582 optimum_tracker_policy_t,
583 result_builder_policy_t,
584 scoring_scheme_policy_t,
585 alignment_matrix_policy_t>;
586 return algorithm_t{cfg};
587 }
588 }
589};
590
592template <typename function_wrapper_t, typename config_t>
593constexpr function_wrapper_t alignment_configurator::configure_scoring_scheme(config_t const & cfg)
594{
595 using traits_t = alignment_configuration_traits<config_t>;
596
597 using scoring_scheme_t = typename traits_t::scoring_scheme_type;
598 constexpr bool is_aminoacid_scheme = is_type_specialisation_of_v<scoring_scheme_t, aminoacid_scoring_scheme>;
599 using alignment_type_t = typename std::
600 conditional_t<traits_t::is_global, seqan3::align_cfg::method_global, seqan3::align_cfg::method_local>;
601
602 using simple_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
603 lazy<simd_match_mismatch_scoring_scheme,
604 typename traits_t::score_type,
605 typename traits_t::scoring_scheme_alphabet_type,
606 alignment_type_t>,
607 void>;
608 using matrix_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
609 lazy<simd_matrix_scoring_scheme,
610 typename traits_t::score_type,
611 typename traits_t::scoring_scheme_alphabet_type,
612 alignment_type_t>,
613 void>;
614
615 using alignment_scoring_scheme_t =
616 std::conditional_t<traits_t::is_vectorised,
618 scoring_scheme_t>;
619
620 using scoring_scheme_policy_t = deferred_crtp_base<scoring_scheme_policy, alignment_scoring_scheme_t>;
621 return make_algorithm<function_wrapper_t, scoring_scheme_policy_t>(cfg);
622}
624} // namespace seqan3::detail
Provides configuration for alignment output.
Provides seqan3::align_cfg::detail::result_type.
Provides seqan3::detail::align_result_selector.
Provides concepts needed internally for the alignment algorithms.
Provides helper type traits for the configuration and execution of the alignment algorithm.
Provides seqan3::detail::alignment_algorithm.
Provides seqan3::alignment_result.
Provides seqan3::detail::alignment_score_matrix_one_column.
Provides seqan3::detail::alignment_score_matrix_one_column_banded.
Provides seqan3::detail::alignment_trace_matrix_full.
Provides seqan3::detail::alignment_trace_matrix_full_banded.
Provides seqan3::aminoacid_scoring_scheme.
Provides seqan3::detail::combined_score_and_trace_matrix.
Provides seqan3::detail::deferred_crtp_base.
Provides seqan3::detail::edit_distance_algorithm.
A concept that requires that type be able to score two letters.
Whether a type behaves like a tuple.
Provides lazy template instantiation traits.
Provides seqan3::nucleotide_scoring_scheme.
Provides seqan3::detail::pairwise_alignment_algorithm.
Provides seqan3::detail::pairwise_alignment_algorithm.
Provides seqan3::detail::policy_affine_gap_recursion.
Provides seqan3::detail::policy_affine_gap_recursion_banded.
Provides seqan3::detail::policy_affine_gap_with_trace_recursion.
Provides seqan3::detail::policy_affine_gap_with_trace_recursion_banded.
Provides seqan3::detail::policy_alignment_matrix.
Provides seqan3::detail::policy_alignment_result_builder.
Provides seqan3::detail::policy_optimum_tracker.
Provides seqan3::detail::policy_optimum_tracker_simd.
Provides seqan3::detail::policy_scoring_scheme.
Provides seqan3::detail::score_matrix_single_column.
Provides seqan3::detail::simd_match_mismatch_scoring_scheme.
Provides seqan3::detail::simd_matrix_scoring_scheme.
Provides type traits for working with templates.
Provides seqan3::detail::trace_matrix_full.
Provides seqan3::views::type_reduce.
Provides seqan3::tuple_like.
Provides seqan3::views::zip.
Hide me