SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
alignment_configurator.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <functional>
16 #include <tuple>
17 #include <utility>
18 #include <vector>
19 
64 
65 namespace seqan3::detail
66 {
67 
77 template <typename range_type,
78  typename alignment_config_type>
79 struct alignment_contract
80 {
81 private:
86  using unref_range_type = std::remove_reference_t<range_type>;
88  using first_seq_t = std::tuple_element_t<0, std::ranges::range_value_t<unref_range_type>>;
90  using second_seq_t = std::tuple_element_t<1, std::ranges::range_value_t<unref_range_type>>;
92 
93 public:
95  constexpr static bool expects_tuple_like_value_type()
96  {
98  std::tuple_size_v<std::ranges::range_value_t<unref_range_type>> == 2;
99  }
100 
102  constexpr static bool expects_valid_scoring_scheme()
103  {
104  if constexpr (alignment_config_type::template exists<align_cfg::scoring_scheme>())
105  {
106  using scoring_type =
108  decltype(get<align_cfg::scoring_scheme>(std::declval<alignment_config_type>()).scheme)>;
109  return static_cast<bool>(scoring_scheme_for<scoring_type,
110  std::ranges::range_value_t<first_seq_t>,
111  std::ranges::range_value_t<second_seq_t>>);
112  }
113  else
114  {
115  return false;
116  }
117  }
118 
120  constexpr static bool expects_alignment_configuration()
121  {
122  const bool is_global = alignment_config_type::template exists<seqan3::align_cfg::method_global>();
123  const bool is_local = alignment_config_type::template exists<seqan3::align_cfg::method_local>();
124 
125  return (is_global || is_local);
126  }
127 };
128 
133 struct alignment_configurator
134 {
135 private:
136 
140  template <typename traits_t>
141  struct select_matrix_policy
142  {
143  private:
145  static constexpr bool only_coordinates = !(traits_t::compute_begin_positions ||
146  traits_t::compute_sequence_alignment);
147 
149  using score_matrix_t = std::conditional_t<traits_t::is_banded,
150  alignment_score_matrix_one_column_banded<typename traits_t::score_type>,
151  alignment_score_matrix_one_column<typename traits_t::score_type>>;
153  using trace_matrix_t = std::conditional_t<traits_t::is_banded,
154  alignment_trace_matrix_full_banded<typename traits_t::trace_type,
155  only_coordinates>,
156  alignment_trace_matrix_full<typename traits_t::trace_type,
157  only_coordinates>>;
158 
159  public:
161  using type = deferred_crtp_base<alignment_matrix_policy, score_matrix_t, trace_matrix_t>;
162  };
163 
167  template <typename traits_t>
168  struct select_gap_policy
169  {
170  private:
172  using score_t = typename traits_t::score_type;
174  using is_local_t = std::bool_constant<traits_t::is_local>;
175 
176  public:
178  using type = std::conditional_t<traits_t::is_vectorised,
179  deferred_crtp_base<simd_affine_gap_policy, score_t, is_local_t>,
180  deferred_crtp_base<affine_gap_policy, score_t, is_local_t>>;
181  };
182 
189  template <typename traits_t>
190  struct select_find_optimum_policy
191  {
192  private:
194  using score_t = typename traits_t::score_type;
195 
196  public:
198  using type = std::conditional_t<traits_t::is_vectorised,
199  deferred_crtp_base<simd_find_optimum_policy, score_t>,
200  deferred_crtp_base<find_optimum_policy>>;
201  };
202 
204  template <typename traits_t, typename ...args_t>
205  using select_alignment_algorithm_t = lazy_conditional_t<traits_t::is_banded,
206  lazy<pairwise_alignment_algorithm_banded, args_t...>,
207  lazy<pairwise_alignment_algorithm, args_t...>>;
208 
212  template <typename config_t>
213  struct select_gap_recursion_policy
214  {
215  private:
217  using traits_type = alignment_configuration_traits<config_t>;
219  static constexpr bool with_trace = traits_type::requires_trace_information;
220 
222  using gap_recursion_policy_type = std::conditional_t<with_trace,
223  policy_affine_gap_with_trace_recursion<config_t>,
224  policy_affine_gap_recursion<config_t>>;
226  using banded_gap_recursion_policy_type =
227  std::conditional_t<with_trace,
228  policy_affine_gap_with_trace_recursion_banded<config_t>,
229  policy_affine_gap_recursion_banded<config_t>>;
230  public:
232  using type = std::conditional_t<traits_type::is_banded,
233  banded_gap_recursion_policy_type,
234  gap_recursion_policy_type>;
235  };
236 
237 public:
264  template <align_pairwise_range_input sequences_t, typename config_t>
266  requires is_type_specialisation_of_v<config_t, configuration>
268  static constexpr auto configure(config_t const & cfg)
269  {
270  auto config_with_output = maybe_default_output(cfg);
271  using config_with_output_t = decltype(config_with_output);
272 
273  // ----------------------------------------------------------------------------
274  // Configure the type-erased alignment function.
275  // ----------------------------------------------------------------------------
276 
277  using first_seq_t = std::tuple_element_t<0, std::ranges::range_value_t<sequences_t>>;
278  using second_seq_t = std::tuple_element_t<1, std::ranges::range_value_t<sequences_t>>;
279 
280  using wrapped_first_t = type_reduce_t<first_seq_t &>;
281  using wrapped_second_t = type_reduce_t<second_seq_t &>;
282 
283  // The alignment executor passes a chunk over an indexed sequence pair range to the alignment algorithm.
284  using indexed_sequence_pair_range_t = typename chunked_indexed_sequence_pairs<sequences_t>::type;
285  using indexed_sequence_pair_chunk_t = std::ranges::range_value_t<indexed_sequence_pair_range_t>;
286 
287  // Select the result type based on the sequences and the configuration.
288  using alignment_result_value_t = typename align_result_selector<std::remove_reference_t<wrapped_first_t>,
290  config_with_output_t>::type;
291  using alignment_result_t = alignment_result<alignment_result_value_t>;
292  using callback_on_result_t = std::function<void(alignment_result_t)>;
293  // Define the function wrapper type.
294  using function_wrapper_t = std::function<void(indexed_sequence_pair_chunk_t, callback_on_result_t)>;
295 
296  // Capture the alignment result type.
297  auto config_with_result_type = config_with_output | align_cfg::detail::result_type<alignment_result_t>{};
298 
299  // ----------------------------------------------------------------------------
300  // Test some basic preconditions
301  // ----------------------------------------------------------------------------
302 
303  using alignment_contract_t = alignment_contract<sequences_t, config_with_output_t>;
304 
305  static_assert(alignment_contract_t::expects_alignment_configuration(),
306  "Alignment configuration error: "
307  "The alignment can only be configured with alignment configurations.");
308 
309  static_assert(alignment_contract_t::expects_tuple_like_value_type(),
310  "Alignment configuration error: "
311  "The value type of the sequence ranges must model the seqan3::tuple_like and must contain "
312  "exactly 2 elements.");
313 
314  static_assert(alignment_contract_t::expects_valid_scoring_scheme(),
315  "Alignment configuration error: "
316  "Either the scoring scheme was not configured or the given scoring scheme cannot be invoked with "
317  "the value types of the passed sequences.");
318 
319  // ----------------------------------------------------------------------------
320  // Configure the algorithm
321  // ----------------------------------------------------------------------------
322 
323  // Use default edit distance if gaps are not set.
324  align_cfg::gap_cost_affine edit_gap_cost{};
325  auto const & gap_cost = config_with_result_type.get_or(edit_gap_cost);
326  auto const & scoring_scheme = get<align_cfg::scoring_scheme>(cfg).scheme;
327 
328  if constexpr (config_t::template exists<seqan3::align_cfg::method_global>())
329  {
330  // Only use edit distance if ...
331  auto method_global_cfg = get<seqan3::align_cfg::method_global>(config_with_result_type);
332  // Only use edit distance if ...
333  if (gap_cost.open_score == 0 && // gap open score is not set,
334  !(method_global_cfg.free_end_gaps_sequence2_leading ||
335  method_global_cfg.free_end_gaps_sequence2_trailing) && // none of the free end gaps are set for second seq,
336  (method_global_cfg.free_end_gaps_sequence1_leading ==
337  method_global_cfg.free_end_gaps_sequence1_trailing)) // free ends for leading and trailing gaps are equal in first seq.
338  {
339  // TODO: Instead of relying on nucleotide scoring schemes we need to be able to determine the edit distance
340  // option via the scheme.
341  if constexpr (is_type_specialisation_of_v<std::remove_cvref_t<decltype(scoring_scheme)>,
342  nucleotide_scoring_scheme>)
343  {
344  if ((scoring_scheme.score('A'_dna15, 'A'_dna15) == 0) &&
345  (scoring_scheme.score('A'_dna15, 'C'_dna15)) == -1)
346  {
347  return std::pair{configure_edit_distance<function_wrapper_t>(config_with_result_type),
348  config_with_result_type};
349  }
350  }
351  }
352  }
353 
354  // ----------------------------------------------------------------------------
355  // Check if invalid configuration was used.
356  // ----------------------------------------------------------------------------
357 
358  // Do not allow min score configuration for alignments not computing the edit distance.
359  if (config_t::template exists<align_cfg::min_score>())
360  throw invalid_alignment_configuration{"The align_cfg::min_score configuration is only allowed for the "
361  "specific edit distance computation."};
362  // Configure the alignment algorithm.
363  return std::pair{configure_scoring_scheme<function_wrapper_t>(config_with_result_type),
364  config_with_result_type};
365  }
366 
367 private:
377  template <typename config_t>
378  static constexpr auto maybe_default_output(config_t const & config) noexcept
379  {
380  using traits_t = alignment_configuration_traits<config_t>;
381 
382  if constexpr (traits_t::has_output_configuration)
383  return config;
384  else
385  return config | align_cfg::output_score{} |
386  align_cfg::output_begin_position{} |
387  align_cfg::output_end_position{} |
388  align_cfg::output_alignment{} |
389  align_cfg::output_sequence1_id{} |
390  align_cfg::output_sequence2_id{};
391  }
392 
398  template <typename function_wrapper_t, typename config_t>
399  static constexpr function_wrapper_t configure_edit_distance(config_t const & cfg)
400  {
401  using traits_t = alignment_configuration_traits<config_t>;
402 
403  // ----------------------------------------------------------------------------
404  // Unsupported configurations
405  // ----------------------------------------------------------------------------
406 
407  if constexpr (traits_t::is_banded)
408  throw invalid_alignment_configuration{"Banded alignments are yet not supported."};
409 
410  // ----------------------------------------------------------------------------
411  // Configure semi-global alignment
412  // ----------------------------------------------------------------------------
413 
414  // Get the value for the sequence ends configuration.
415  auto method_global_cfg = cfg.get_or(align_cfg::method_global{});
416 
417  auto configure_edit_traits = [&] (auto is_semi_global)
418  {
419  struct edit_traits_type
420  {
421  using is_semi_global_type [[maybe_unused]] = std::remove_cvref_t<decltype(is_semi_global)>;
422  };
423 
424  edit_distance_algorithm<std::remove_cvref_t<config_t>, edit_traits_type> algorithm{cfg};
425  return function_wrapper_t{std::move(algorithm)};
426  };
427 
428  // Check if it has free ends set for the first sequence trailing gaps.
429  auto has_free_ends_trailing = [&] (auto first) constexpr
430  {
431  if constexpr (!decltype(first)::value)
432  {
433  return configure_edit_traits(std::false_type{});
434  }
435  else // Resolve correct property at runtime.
436  {
437  if (method_global_cfg.free_end_gaps_sequence1_trailing)
438  return configure_edit_traits(std::true_type{});
439  else
440  return configure_edit_traits(std::false_type{});
441  }
442  };
443 
444  // Check if it has free ends set for the first sequence leading gaps.
445  if (method_global_cfg.free_end_gaps_sequence1_leading)
446  return has_free_ends_trailing(std::true_type{});
447  else
448  return has_free_ends_trailing(std::false_type{});
449  }
450 
467  template <typename function_wrapper_t, typename config_t>
468  static constexpr function_wrapper_t configure_scoring_scheme(config_t const & cfg);
469 
484  template <typename function_wrapper_t, typename ...policies_t, typename config_t>
485  static constexpr function_wrapper_t make_algorithm(config_t const & cfg)
486  {
487  using traits_t = alignment_configuration_traits<config_t>;
488 
489  // Temporarily we will use the new and the old alignment implementation in order to
490  // refactor step-by-step to the new implementation. The new implementation will be tested in
491  // macrobenchmarks to show that it maintains a high performance.
492 
493  // Use old alignment implementation if...
494  if constexpr (traits_t::is_local || // it is a local alignment,
495  traits_t::is_debug || // it runs in debug mode,
496  traits_t::compute_sequence_alignment || // it computes more than the begin position.
497  (traits_t::is_banded && traits_t::compute_begin_positions) || // banded && more than end positions.
498  (traits_t::is_vectorised && traits_t::compute_end_positions)) // simd and more than the score.
499  {
500  using matrix_policy_t = typename select_matrix_policy<traits_t>::type;
501  using gap_policy_t = typename select_gap_policy<traits_t>::type;
502  using find_optimum_t = typename select_find_optimum_policy<traits_t>::type;
503  using gap_init_policy_t = deferred_crtp_base<affine_gap_init_policy>;
504 
505  return alignment_algorithm<config_t, matrix_policy_t, gap_policy_t, find_optimum_t, gap_init_policy_t, policies_t...>{cfg};
506  }
507  else // Use new alignment algorithm implementation.
508  {
509  //----------------------------------------------------------------------------------------------------------
510  // Configure the optimum tracker policy.
511  //----------------------------------------------------------------------------------------------------------
512 
513  using scalar_optimum_updater_t = std::conditional_t<traits_t::is_banded,
514  max_score_banded_updater,
515  max_score_updater>;
516 
517  using optimum_tracker_policy_t =
518  lazy_conditional_t<traits_t::is_vectorised,
519  lazy<policy_optimum_tracker_simd, config_t, max_score_updater_simd_global>,
520  lazy<policy_optimum_tracker, config_t, scalar_optimum_updater_t>>;
521 
522  //----------------------------------------------------------------------------------------------------------
523  // Configure the gap scheme policy.
524  //----------------------------------------------------------------------------------------------------------
525 
526  using gap_cost_policy_t = typename select_gap_recursion_policy<config_t>::type;
527 
528  //----------------------------------------------------------------------------------------------------------
529  // Configure the result builder policy.
530  //----------------------------------------------------------------------------------------------------------
531 
532  using result_builder_policy_t = policy_alignment_result_builder<config_t>;
533 
534  //----------------------------------------------------------------------------------------------------------
535  // Configure the scoring scheme policy.
536  //----------------------------------------------------------------------------------------------------------
537 
538  using alignment_method_t = std::conditional_t<traits_t::is_global,
541 
542  using score_t = typename traits_t::score_type;
543  using scoring_scheme_t = typename traits_t::scoring_scheme_type;
544  constexpr bool is_aminoacid_scheme = is_type_specialisation_of_v<scoring_scheme_t, aminoacid_scoring_scheme>;
545 
546  using simple_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
547  lazy<simd_match_mismatch_scoring_scheme,
548  score_t,
549  typename traits_t::scoring_scheme_alphabet_type,
550  alignment_method_t>,
551  void>;
552  using matrix_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
553  lazy<simd_matrix_scoring_scheme,
554  score_t,
555  typename traits_t::scoring_scheme_alphabet_type,
556  alignment_method_t>,
557  void>;
558 
559  using alignment_scoring_scheme_t = std::conditional_t<traits_t::is_vectorised,
560  std::conditional_t<is_aminoacid_scheme,
561  matrix_simd_scheme_t,
562  simple_simd_scheme_t>,
563  scoring_scheme_t>;
564 
565  using scoring_scheme_policy_t = policy_scoring_scheme<config_t, alignment_scoring_scheme_t>;
566 
567  //----------------------------------------------------------------------------------------------------------
568  // Configure the alignment matrix policy.
569  //----------------------------------------------------------------------------------------------------------
570 
571  using score_matrix_t = score_matrix_single_column<score_t>;
572  using trace_matrix_t = trace_matrix_full<trace_directions>;
573 
574  using alignment_matrix_t = std::conditional_t<traits_t::requires_trace_information,
575  combined_score_and_trace_matrix<score_matrix_t,
576  trace_matrix_t>,
577  score_matrix_t>;
578  using alignment_matrix_policy_t = policy_alignment_matrix<traits_t, alignment_matrix_t>;
579 
580  //----------------------------------------------------------------------------------------------------------
581  // Configure the final alignment algorithm.
582  //----------------------------------------------------------------------------------------------------------
583 
584  using algorithm_t = select_alignment_algorithm_t<traits_t,
585  config_t,
586  gap_cost_policy_t,
587  optimum_tracker_policy_t,
588  result_builder_policy_t,
589  scoring_scheme_policy_t,
590  alignment_matrix_policy_t>;
591  return algorithm_t{cfg};
592  }
593  }
594 };
595 
597 template <typename function_wrapper_t, typename config_t>
598 constexpr function_wrapper_t alignment_configurator::configure_scoring_scheme(config_t const & cfg)
599 {
600  using traits_t = alignment_configuration_traits<config_t>;
601 
602  using scoring_scheme_t = typename traits_t::scoring_scheme_type;
603  constexpr bool is_aminoacid_scheme = is_type_specialisation_of_v<scoring_scheme_t, aminoacid_scoring_scheme>;
604  using alignment_type_t = typename std::conditional_t<traits_t::is_global,
607 
608  using simple_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
609  lazy<simd_match_mismatch_scoring_scheme,
610  typename traits_t::score_type,
611  typename traits_t::scoring_scheme_alphabet_type,
612  alignment_type_t>,
613  void>;
614  using matrix_simd_scheme_t = lazy_conditional_t<traits_t::is_vectorised,
615  lazy<simd_matrix_scoring_scheme,
616  typename traits_t::score_type,
617  typename traits_t::scoring_scheme_alphabet_type,
618  alignment_type_t>,
619  void>;
620 
621  using alignment_scoring_scheme_t = std::conditional_t<traits_t::is_vectorised,
623  scoring_scheme_t>;
624 
625  using scoring_scheme_policy_t = deferred_crtp_base<scoring_scheme_policy, alignment_scoring_scheme_t>;
626  return make_algorithm<function_wrapper_t, scoring_scheme_policy_t>(cfg);
627 }
629 } // namespace seqan3::detail
Provides seqan3::detail::affine_gap_init_policy.
Provides seqan3::detail::affine_gap_policy.
Provides configuration for alignment output.
Provides seqan3::align_cfg::detail::result_type.
Provides seqan3::detail::align_result_selector.
Provides concepts needed internally for the alignment algorithms.
Provides helper type traits for the configuration and execution of the alignment algorithm.
Provides seqan3::detail::alignment_algorithm.
Provides seqan3::detail::alignment_matrix_policy.
Provides seqan3::alignment_result.
Provides seqan3::detail::alignment_score_matrix_one_column.
Provides seqan3::detail::alignment_score_matrix_one_column_banded.
Provides seqan3::detail::alignment_trace_matrix_full.
Provides seqan3::detail::alignment_trace_matrix_full_banded.
Provides seqan3::aminoacid_scoring_scheme.
Sets the global alignment method.
Definition: align_config_method.hpp:107
Sets the local alignment method.
Definition: align_config_method.hpp:43
Provides seqan3::detail::combined_score_and_trace_matrix.
Provides seqan3::detail::deferred_crtp_base.
Provides type traits for working with templates.
Provides seqan3::detail::edit_distance_algorithm.
Provides seqan3::detail::find_optimum_policy.
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:74
A concept that requires that type be able to score two letters.
Whether a type behaves like a tuple.
Provides lazy template instantiation traits.
Provides seqan3::nucleotide_scoring_scheme.
Provides seqan3::detail::pairwise_alignment_algorithm.
Provides seqan3::detail::pairwise_alignment_algorithm.
Provides seqan3::detail::policy_affine_gap_recursion.
Provides seqan3::detail::policy_affine_gap_recursion_banded.
Provides seqan3::detail::policy_affine_gap_with_trace_recursion.
Provides seqan3::detail::policy_affine_gap_with_trace_recursion_banded.
Provides seqan3::detail::policy_alignment_matrix.
Provides seqan3::detail::policy_alignment_result_builder.
Provides seqan3::detail::policy_optimum_tracker.
Provides seqan3::detail::policy_optimum_tracker_simd.
Provides seqan3::detail::policy_scoring_scheme.
Provides seqan3::detail::score_matrix_single_column.
Provides seqan3::detail::scoring_scheme_policy.
Provides seqan3::detail::simd_affine_gap_policy.
Provides seqan3::detail::simd_find_optimum_policy.
Provides seqan3::detail::simd_match_mismatch_scoring_scheme.
Provides seqan3::detail::simd_matrix_scoring_scheme.
Provides seqan3::detail::trace_matrix_full.
Provides seqan3::simd::simd_type.
Provides seqan3::tuple_like.
Provides seqan3::views::type_reduce.
Provides seqan3::views::zip.