SeqAn3  3.0.2
The Modern C++ library for sequence analysis.
alignment_configurator.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <functional>
16 #include <tuple>
17 #include <utility>
18 #include <vector>
19 
59 
60 namespace seqan3::detail
61 {
62 
72 template <typename range_type,
73  typename alignment_config_type>
74 struct alignment_contract
75 {
76 private:
80  using unref_range_type = std::remove_reference_t<range_type>;
83  using first_seq_t = std::tuple_element_t<0, std::ranges::range_value_t<unref_range_type>>;
85  using second_seq_t = std::tuple_element_t<1, std::ranges::range_value_t<unref_range_type>>;
87 
88 public:
90  constexpr static bool expects_tuple_like_value_type()
91  {
93  std::tuple_size_v<std::ranges::range_value_t<unref_range_type>> == 2;
94  }
95 
97  constexpr static bool expects_valid_scoring_scheme()
98  {
99  if constexpr (alignment_config_type::template exists<align_cfg::scoring_scheme>())
100  {
101  using scoring_type =
103  decltype(get<align_cfg::scoring_scheme>(std::declval<alignment_config_type>()).scheme)>;
104  return static_cast<bool>(scoring_scheme_for<scoring_type,
105  std::ranges::range_value_t<first_seq_t>,
106  std::ranges::range_value_t<second_seq_t>>);
107  }
108  else
109  {
110  return false;
111  }
112  }
113 
115  constexpr static bool expects_alignment_configuration()
116  {
117  const bool is_global = alignment_config_type::template exists<seqan3::align_cfg::method_global>();
118  const bool is_local = alignment_config_type::template exists<seqan3::align_cfg::method_local>();
119 
120  return (is_global || is_local);
121  }
122 };
123 
128 struct alignment_configurator
129 {
130 private:
131 
135  template <typename traits_t>
136  struct select_matrix_policy
137  {
138  private:
140  static constexpr bool only_coordinates = !(traits_t::compute_begin_positions ||
141  traits_t::compute_sequence_alignment);
142 
144  using score_matrix_t = std::conditional_t<traits_t::is_banded,
145  alignment_score_matrix_one_column_banded<typename traits_t::score_type>,
146  alignment_score_matrix_one_column<typename traits_t::score_type>>;
148  using trace_matrix_t = std::conditional_t<traits_t::is_banded,
149  alignment_trace_matrix_full_banded<typename traits_t::trace_type,
150  only_coordinates>,
151  alignment_trace_matrix_full<typename traits_t::trace_type,
152  only_coordinates>>;
153 
154  public:
156  using type = deferred_crtp_base<alignment_matrix_policy, score_matrix_t, trace_matrix_t>;
157  };
158 
162  template <typename traits_t>
163  struct select_gap_policy
164  {
165  private:
167  using score_t = typename traits_t::score_type;
169  using is_local_t = std::bool_constant<traits_t::is_local>;
170 
171  public:
173  using type = std::conditional_t<traits_t::is_vectorised,
174  deferred_crtp_base<simd_affine_gap_policy, score_t, is_local_t>,
175  deferred_crtp_base<affine_gap_policy, score_t, is_local_t>>;
176  };
177 
184  template <typename traits_t>
185  struct select_find_optimum_policy
186  {
187  private:
189  using score_t = typename traits_t::score_type;
190 
191  public:
193  using type = std::conditional_t<traits_t::is_vectorised,
194  deferred_crtp_base<simd_find_optimum_policy, score_t>,
195  deferred_crtp_base<find_optimum_policy>>;
196  };
197 
199  template <typename traits_t, typename ...args_t>
200  using select_alignment_algorithm_t = lazy_conditional_t<traits_t::is_banded,
201  lazy<pairwise_alignment_algorithm_banded, args_t...>,
202  lazy<pairwise_alignment_algorithm, args_t...>>;
203 
204 public:
231  template <align_pairwise_range_input sequences_t, typename config_t>
233  requires is_type_specialisation_of_v<config_t, configuration>
235  static constexpr auto configure(config_t const & cfg)
236  {
237  auto config_with_output = maybe_default_output(cfg);
238  using config_with_output_t = decltype(config_with_output);
239 
240  // ----------------------------------------------------------------------------
241  // Configure the type-erased alignment function.
242  // ----------------------------------------------------------------------------
243 
244  using first_seq_t = std::tuple_element_t<0, std::ranges::range_value_t<sequences_t>>;
245  using second_seq_t = std::tuple_element_t<1, std::ranges::range_value_t<sequences_t>>;
246 
247  using wrapped_first_t = type_reduce_view<first_seq_t &>;
248  using wrapped_second_t = type_reduce_view<second_seq_t &>;
249 
250  // The alignment executor passes a chunk over an indexed sequence pair range to the alignment algorithm.
251  using indexed_sequence_pair_range_t = typename chunked_indexed_sequence_pairs<sequences_t>::type;
252  using indexed_sequence_pair_chunk_t = std::ranges::range_value_t<indexed_sequence_pair_range_t>;
253 
254  // Select the result type based on the sequences and the configuration.
255  using alignment_result_value_t = typename align_result_selector<std::remove_reference_t<wrapped_first_t>,
257  config_with_output_t>::type;
258  using alignment_result_t = alignment_result<alignment_result_value_t>;
259  using callback_on_result_t = std::function<void(alignment_result_t)>;
260  // Define the function wrapper type.
261  using function_wrapper_t = std::function<void(indexed_sequence_pair_chunk_t, callback_on_result_t)>;
262 
263  // Capture the alignment result type.
264  auto config_with_result_type = config_with_output | align_cfg::detail::result_type<alignment_result_t>{};
265 
266  // ----------------------------------------------------------------------------
267  // Test some basic preconditions
268  // ----------------------------------------------------------------------------
269 
270  using alignment_contract_t = alignment_contract<sequences_t, config_with_output_t>;
271 
272  static_assert(alignment_contract_t::expects_alignment_configuration(),
273  "Alignment configuration error: "
274  "The alignment can only be configured with alignment configurations.");
275 
276  static_assert(alignment_contract_t::expects_tuple_like_value_type(),
277  "Alignment configuration error: "
278  "The value type of the sequence ranges must model the seqan3::tuple_like and must contain "
279  "exactly 2 elements.");
280 
281  static_assert(alignment_contract_t::expects_valid_scoring_scheme(),
282  "Alignment configuration error: "
283  "Either the scoring scheme was not configured or the given scoring scheme cannot be invoked with "
284  "the value types of the passed sequences.");
285 
286  // ----------------------------------------------------------------------------
287  // Configure the algorithm
288  // ----------------------------------------------------------------------------
289 
290  // Use default edit distance if gaps are not set.
291  align_cfg::gap_cost_affine edit_gap_cost{};
292  auto const & gap_cost = config_with_result_type.get_or(edit_gap_cost);
293  auto const & scoring_scheme = get<align_cfg::scoring_scheme>(cfg).scheme;
294 
295  if constexpr (config_t::template exists<seqan3::align_cfg::method_global>())
296  {
297  // Only use edit distance if ...
298  auto method_global_cfg = get<seqan3::align_cfg::method_global>(config_with_result_type);
299  // Only use edit distance if ...
300  if (gap_cost.open_score == 0 && // gap open score is not set,
301  !(method_global_cfg.free_end_gaps_sequence2_leading ||
302  method_global_cfg.free_end_gaps_sequence2_trailing) && // none of the free end gaps are set for second seq,
303  (method_global_cfg.free_end_gaps_sequence1_leading ==
304  method_global_cfg.free_end_gaps_sequence1_trailing)) // free ends for leading and trailing gaps are equal in first seq.
305  {
306  // TODO: Instead of relying on nucleotide scoring schemes we need to be able to determine the edit distance
307  // option via the scheme.
308  if constexpr (is_type_specialisation_of_v<std::remove_cvref_t<decltype(scoring_scheme)>,
309  nucleotide_scoring_scheme>)
310  {
311  if ((scoring_scheme.score('A'_dna15, 'A'_dna15) == 0) &&
312  (scoring_scheme.score('A'_dna15, 'C'_dna15)) == -1)
313  {
314  return std::pair{configure_edit_distance<function_wrapper_t>(config_with_result_type),
315  config_with_result_type};
316  }
317  }
318  }
319  }
320 
321  // ----------------------------------------------------------------------------
322  // Check if invalid configuration was used.
323  // ----------------------------------------------------------------------------
324 
325  // Do not allow min score configuration for alignments not computing the edit distance.
326  if (config_t::template exists<align_cfg::min_score>())
327  throw invalid_alignment_configuration{"The align_cfg::min_score configuration is only allowed for the "
328  "specific edit distance computation."};
329  // Configure the alignment algorithm.
330  return std::pair{configure_scoring_scheme<function_wrapper_t>(config_with_result_type),
331  config_with_result_type};
332  }
333 
334 private:
344  template <typename config_t>
345  static constexpr auto maybe_default_output(config_t const & config) noexcept
346  {
347  using traits_t = alignment_configuration_traits<config_t>;
348 
349  if constexpr (traits_t::has_output_configuration)
350  return config;
351  else
352  return config | align_cfg::output_score{} |
353  align_cfg::output_begin_position{} |
354  align_cfg::output_end_position{} |
355  align_cfg::output_alignment{} |
356  align_cfg::output_sequence1_id{} |
357  align_cfg::output_sequence2_id{};
358  }
359 
365  template <typename function_wrapper_t, typename config_t>
366  static constexpr function_wrapper_t configure_edit_distance(config_t const & cfg)
367  {
368  using traits_t = alignment_configuration_traits<config_t>;
369 
370  // ----------------------------------------------------------------------------
371  // Unsupported configurations
372  // ----------------------------------------------------------------------------
373 
374  if constexpr (traits_t::is_banded)
375  throw invalid_alignment_configuration{"Banded alignments are yet not supported."};
376 
377  // ----------------------------------------------------------------------------
378  // Configure semi-global alignment
379  // ----------------------------------------------------------------------------
380 
381  // Get the value for the sequence ends configuration.
382  auto method_global_cfg = cfg.get_or(align_cfg::method_global{});
383 
384  auto configure_edit_traits = [&] (auto is_semi_global)
385  {
386  struct edit_traits_type
387  {
388  using is_semi_global_type [[maybe_unused]] = std::remove_cvref_t<decltype(is_semi_global)>;
389  };
390 
391  edit_distance_algorithm<std::remove_cvref_t<config_t>, edit_traits_type> algorithm{cfg};
392  return function_wrapper_t{std::move(algorithm)};
393  };
394 
395  // Check if it has free ends set for the first sequence trailing gaps.
396  auto has_free_ends_trailing = [&] (auto first) constexpr
397  {
398  if constexpr (!decltype(first)::value)
399  {
400  return configure_edit_traits(std::false_type{});
401  }
402  else // Resolve correct property at runtime.
403  {
404  if (method_global_cfg.free_end_gaps_sequence1_trailing)
405  return configure_edit_traits(std::true_type{});
406  else
407  return configure_edit_traits(std::false_type{});
408  }
409  };
410 
411  // Check if it has free ends set for the first sequence leading gaps.
412  if (method_global_cfg.free_end_gaps_sequence1_leading)
413  return has_free_ends_trailing(std::true_type{});
414  else
415  return has_free_ends_trailing(std::false_type{});
416  }
417 
434  template <typename function_wrapper_t, typename config_t>
435  static constexpr function_wrapper_t configure_scoring_scheme(config_t const & cfg);
436 
451  template <typename function_wrapper_t, typename ...policies_t, typename config_t>
452  static constexpr function_wrapper_t make_algorithm(config_t const & cfg)
453  {
454  using traits_t = alignment_configuration_traits<config_t>;
455 
456  // Temporarily we will use the new and the old alignment implementation in order to
457  // refactor step-by-step to the new implementation. The new implementation will be tested in
458  // macrobenchmarks to show that it maintains a high performance.
459 
460  // Use old alignment implementation if...
461  if constexpr (traits_t::is_local || // it is a local alignment,
462  traits_t::is_debug || // it runs in debug mode,
463  (traits_t::compute_begin_positions || traits_t::compute_sequence_alignment) || // it computes more than the end position.
464  (traits_t::is_banded && traits_t::compute_end_positions) || // banded
465  (traits_t::is_vectorised && traits_t::is_banded) || // it is vectorised and banded,
466  (traits_t::is_vectorised && traits_t::compute_end_positions)) // simd and more than the score.
467  {
468  using matrix_policy_t = typename select_matrix_policy<traits_t>::type;
469  using gap_policy_t = typename select_gap_policy<traits_t>::type;
470  using find_optimum_t = typename select_find_optimum_policy<traits_t>::type;
471  using gap_init_policy_t = deferred_crtp_base<affine_gap_init_policy>;
472 
473  return alignment_algorithm<config_t, matrix_policy_t, gap_policy_t, find_optimum_t, gap_init_policy_t, policies_t...>{cfg};
474  }
475  else // Use new alignment algorithm implementation.
476  {
477  using optimum_tracker_policy_t =
478  lazy_conditional_t<traits_t::is_vectorised,
479  lazy<policy_optimum_tracker_simd, config_t, max_score_updater_simd_global>,
480  lazy<policy_optimum_tracker, config_t, max_score_updater>>;
481 
482  using gap_cost_policy_t = std::conditional_t<traits_t::is_banded,
483  policy_affine_gap_recursion_banded<config_t>,
484  policy_affine_gap_recursion<config_t>>;
485  using result_builder_policy_t = policy_alignment_result_builder<config_t>;
486 
487  using alignment_method_t = typename std::conditional_t<traits_t::is_global,
490 
491  using score_t = typename traits_t::score_type;
492  using alignment_scoring_scheme_t =
493  lazy_conditional_t<traits_t::is_vectorised,
494  lazy<simd_match_mismatch_scoring_scheme,
495  score_t,
496  typename traits_t::scoring_scheme_alphabet_type,
497  alignment_method_t>,
498  typename traits_t::scoring_scheme_type>;
499 
500  using scoring_scheme_policy_t = policy_scoring_scheme<config_t, alignment_scoring_scheme_t>;
501  using alignment_matrix_policy_t = policy_alignment_matrix<traits_t, score_matrix_single_column<score_t>>;
502 
503  using algorithm_t = select_alignment_algorithm_t<traits_t,
504  config_t,
505  gap_cost_policy_t,
506  optimum_tracker_policy_t,
507  result_builder_policy_t,
508  scoring_scheme_policy_t,
509  alignment_matrix_policy_t>;
510  return algorithm_t{cfg};
511  }
512  }
513 };
514 
516 template <typename function_wrapper_t, typename config_t>
517 constexpr function_wrapper_t alignment_configurator::configure_scoring_scheme(config_t const & cfg)
518 {
519  using traits_t = alignment_configuration_traits<config_t>;
520 
521  using alignment_scoring_scheme_t =
522  lazy_conditional_t<traits_t::is_vectorised,
523  lazy<simd_match_mismatch_scoring_scheme,
524  typename traits_t::score_type,
525  typename traits_t::scoring_scheme_alphabet_type,
526  typename std::conditional_t<traits_t::is_global,
529  typename traits_t::scoring_scheme_type>;
530 
531  using scoring_scheme_policy_t = deferred_crtp_base<scoring_scheme_policy, alignment_scoring_scheme_t>;
532  return make_algorithm<function_wrapper_t, scoring_scheme_policy_t>(cfg);
533 }
535 } // namespace seqan3::detail
alignment_trace_matrix_full_banded.hpp
Provides seqan3::detail::alignment_trace_matrix_full_banded.
zip.hpp
Provides seqan3::views::zip.
std::bool_constant
utility
type_traits.hpp
Provides helper type traits for the configuration and execution of the alignment algorithm.
edit_distance_algorithm.hpp
Provides seqan3::detail::edit_distance_algorithm.
affine_gap_init_policy.hpp
Provides seqan3::detail::affine_gap_init_policy.
functional
tuple.hpp
Provides seqan3::tuple_like.
std::pair
policy_alignment_result_builder.hpp
Provides seqan3::detail::policy_alignment_result_builder.
align_config_result_type.hpp
Provides seqan3::align_cfg::detail::result_type.
seqan3::align_cfg::method_local
Sets the local alignment method.
Definition: align_config_method.hpp:43
vector
policy_alignment_matrix.hpp
Provides seqan3::detail::policy_alignment_matrix.
template_inspection.hpp
Provides seqan3::type_list and auxiliary type traits.
tuple
nucleotide_scoring_scheme.hpp
Provides seqan3::nucleotide_scoring_scheme.
policy_optimum_tracker.hpp
Provides seqan3::detail::policy_optimum_tracker.
simd_match_mismatch_scoring_scheme.hpp
Provides seqan3::detail::simd_match_mismatch_scoring_scheme.
std::function
pairwise_alignment_algorithm.hpp
Provides seqan3::detail::pairwise_alignment_algorithm.
alignment_algorithm.hpp
Provides seqan3::detail::alignment_algorithm.
affine_gap_policy.hpp
Provides seqan3::detail::affine_gap_policy.
score_matrix_single_column.hpp
Provides seqan3::detail::score_matrix_single_column.
simd.hpp
Provides seqan3::simd::simd_type.
seqan3::views::move
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
alignment_score_matrix_one_column.hpp
Provides seqan3::detail::alignment_score_matrix_one_column.
type_reduce.hpp
Provides seqan3::views::type_reduce.
align_config_output.hpp
Provides configuration for alignment output.
simd_find_optimum_policy.hpp
Provides seqan3::detail::simd_find_optimum_policy.
concept.hpp
Provides concepts needed internally for the alignment algorithms.
alignment_matrix_policy.hpp
Provides seqan3::detail::alignment_matrix_policy.
simd_affine_gap_policy.hpp
Provides seqan3::detail::simd_affine_gap_policy.
alignment_result.hpp
Provides seqan3::alignment_result.
pairwise_alignment_algorithm_banded.hpp
Provides seqan3::detail::pairwise_alignment_algorithm.
tuple_like
Whether a type behaves like a tuple.
policy_scoring_scheme.hpp
Provides seqan3::detail::policy_scoring_scheme.
scoring_scheme_for
A concept that requires that type be able to score two letters.
std::remove_reference_t
find_optimum_policy.hpp
Provides seqan3::detail::find_optimum_policy.
seqan3::align_cfg::method_global
Sets the global alignment method.
Definition: align_config_method.hpp:106
std::remove_cvref_t
policy_affine_gap_recursion_banded.hpp
Provides seqan3::detail::policy_affine_gap_recursion_banded.
policy_affine_gap_recursion.hpp
Provides seqan3::detail::policy_affine_gap_recursion.
std::conditional_t
deferred_crtp_base.hpp
Provides seqan3::detail::deferred_crtp_base.
scoring_scheme_policy.hpp
Provides seqan3::detail::scoring_scheme_policy.
align_result_selector.hpp
Provides seqan3::detail::align_result_selector.
lazy.hpp
Provides lazy template instantiation traits.
policy_optimum_tracker_simd.hpp
Provides seqan3::detail::policy_optimum_tracker_simd.
alignment_score_matrix_one_column_banded.hpp
Provides seqan3::detail::alignment_score_matrix_one_column_banded.
alignment_trace_matrix_full.hpp
Provides seqan3::detail::alignment_trace_matrix_full.
type_list.hpp
Provides seqan3::type_list.