SeqAn3  3.0.2
The Modern C++ library for sequence analysis.
alignment_algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <memory>
16 #include <optional>
17 #include <type_traits>
18 
27 
39 #include <seqan3/std/iterator>
40 #include <seqan3/std/ranges>
41 
42 namespace seqan3::detail
43 {
44 
78 template <typename config_t, typename ...algorithm_policies_t>
79 class alignment_algorithm :
80  public invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>...
81 {
82 private:
83 
85  using traits_t = alignment_configuration_traits<config_t>;
87  using alignment_column_t = decltype(std::declval<alignment_algorithm>().current_alignment_column());
89  using alignment_column_iterator_t = std::ranges::iterator_t<alignment_column_t>;
91  using alignment_result_t = typename traits_t::alignment_result_type;
92 
93  static_assert(!std::same_as<alignment_result_t, empty_type>, "Alignment result type was not configured.");
94 
96  using score_debug_matrix_t =
97  std::conditional_t<traits_t::is_debug,
98  two_dimensional_matrix<std::optional<typename traits_t::original_score_type>,
100  matrix_major_order::column>,
101  empty_type>;
103  using trace_debug_matrix_t =
104  std::conditional_t<traits_t::is_debug,
105  two_dimensional_matrix<std::optional<trace_directions>,
107  matrix_major_order::column>,
108  empty_type>;
109 
110 public:
114  constexpr alignment_algorithm() = default;
115  constexpr alignment_algorithm(alignment_algorithm const &) = default;
116  constexpr alignment_algorithm(alignment_algorithm &&) = default;
117  constexpr alignment_algorithm & operator=(alignment_algorithm const &) = default;
118  constexpr alignment_algorithm & operator=(alignment_algorithm &&) = default;
119  ~alignment_algorithm() = default;
120 
129  explicit constexpr alignment_algorithm(config_t const & cfg) :
130  invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>{cfg}...,
131  cfg_ptr{std::make_shared<config_t>(cfg)}
132  {
133  this->scoring_scheme = seqan3::get<align_cfg::scoring_scheme>(*cfg_ptr).scheme;
134  this->initialise_alignment_state(*cfg_ptr);
135  }
137 
183  template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
185  requires (!traits_t::is_vectorised) && std::invocable<callback_t, alignment_result_t>
187  void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
188  {
189  using std::get;
190 
191  for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
192  compute_single_pair(idx, get<0>(sequence_pair), get<1>(sequence_pair), callback);
193  }
194 
196  template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
198  requires traits_t::is_vectorised && std::invocable<callback_t, alignment_result_t>
200  void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
201  {
202  assert(cfg_ptr != nullptr);
203 
204  static_assert(simd_concept<typename traits_t::score_type>, "Expected simd score type.");
205  static_assert(simd_concept<typename traits_t::trace_type>, "Expected simd trace type.");
206 
207  // Extract the batch of sequences for the first and the second sequence.
208  auto sequence1_range = indexed_sequence_pairs | views::get<0> | views::get<0>;
209  auto sequence2_range = indexed_sequence_pairs | views::get<0> | views::get<1>;
210 
211  // Initialise the find_optimum policy in the simd case.
212  this->initialise_find_optimum_policy(sequence1_range,
213  sequence2_range,
214  this->scoring_scheme.padding_match_score());
215 
216  // Convert batch of sequences to sequence of simd vectors.
217  auto simd_sequences1 = convert_batch_of_sequences_to_simd_vector(sequence1_range);
218  auto simd_sequences2 = convert_batch_of_sequences_to_simd_vector(sequence2_range);
219 
220  max_size_in_collection = std::pair{simd_sequences1.size(), simd_sequences2.size()};
221  // Reset the alignment state's optimum between executions of the alignment algorithm.
222  this->alignment_state.reset_optimum();
223 
224  compute_matrix(simd_sequences1, simd_sequences2);
225 
226  make_alignment_result(indexed_sequence_pairs, callback);
227  }
229 
230 private:
244  template <typename sequence_range_t>
245  constexpr auto convert_batch_of_sequences_to_simd_vector(sequence_range_t & sequences)
246  {
247  assert(static_cast<size_t>(std::ranges::distance(sequences)) <= traits_t::alignments_per_vector);
248 
249  using simd_score_t = typename traits_t::score_type;
250 
251  std::vector<simd_score_t, aligned_allocator<simd_score_t, alignof(simd_score_t)>> simd_sequence{};
252 
253  for (auto && simd_vector_chunk : sequences | views::to_simd<simd_score_t>(traits_t::padding_symbol))
254  for (auto && simd_vector : simd_vector_chunk)
255  simd_sequence.push_back(std::move(simd_vector));
256 
257  return simd_sequence;
258  }
259 
277  template <std::ranges::forward_range sequence1_t,
278  std::ranges::forward_range sequence2_t,
279  typename callback_t>
280  constexpr void compute_single_pair(size_t const idx,
281  sequence1_t && sequence1,
282  sequence2_t && sequence2,
283  callback_t & callback)
284  {
285  assert(cfg_ptr != nullptr);
286 
287  if constexpr (traits_t::is_debug)
288  initialise_debug_matrices(sequence1, sequence2);
289 
290  // Reset the alignment state's optimum between executions of the alignment algorithm.
291  this->alignment_state.reset_optimum();
292 
293  if constexpr (traits_t::is_banded)
294  {
295  using seqan3::get;
296  // Get the band and check if band configuration is valid.
297  auto const & band = get<align_cfg::band_fixed_size>(*cfg_ptr);
298  check_valid_band_parameter(sequence1, sequence2, band);
299  auto && [subsequence1, subsequence2] = this->slice_sequences(sequence1, sequence2, band);
300  // It would be great to use this interface here instead
301  compute_matrix(subsequence1, subsequence2, band);
302  make_alignment_result(idx, subsequence1, subsequence2, callback);
303  }
304  else
305  {
306  compute_matrix(sequence1, sequence2);
307  make_alignment_result(idx, sequence1, sequence2, callback);
308  }
309  }
310 
327  template <typename sequence1_t, typename sequence2_t>
328  constexpr void check_valid_band_parameter(sequence1_t && sequence1,
329  sequence2_t && sequence2,
330  align_cfg::band_fixed_size const & band)
331  {
332  static_assert(config_t::template exists<align_cfg::band_fixed_size>(),
333  "The band configuration is required for the banded alignment algorithm.");
334 
336  static_assert(std::is_signed_v<diff_type>, "Only signed types can be used to test the band parameters.");
337 
338  if (static_cast<diff_type>(band.lower_diagonal) > std::ranges::distance(sequence1))
339  {
340  throw invalid_alignment_configuration
341  {
342  "Invalid band error: The lower diagonal excludes the whole alignment matrix."
343  };
344  }
345 
346  if (static_cast<diff_type>(band.upper_diagonal) < -std::ranges::distance(sequence2))
347  {
348  throw invalid_alignment_configuration
349  {
350  "Invalid band error: The upper diagonal excludes the whole alignment matrix."
351  };
352  }
353  }
354 
367  template <typename sequence1_t, typename sequence2_t>
368  constexpr void initialise_debug_matrices(sequence1_t & sequence1, sequence2_t & sequence2)
369  {
370  size_t rows = std::ranges::distance(sequence2) + 1;
371  size_t cols = std::ranges::distance(sequence1) + 1;
372 
373  score_debug_matrix = score_debug_matrix_t{number_rows{rows}, number_cols{cols}};
374  trace_debug_matrix = trace_debug_matrix_t{number_rows{rows}, number_cols{cols}};
375  }
376 
384  template <typename sequence1_t, typename sequence2_t>
385  void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2)
387  requires (!traits_t::is_banded)
389  {
390  // ----------------------------------------------------------------------------
391  // Initialisation phase: allocate memory and initialise first column.
392  // ----------------------------------------------------------------------------
393 
394  this->allocate_matrix(sequence1, sequence2);
395  initialise_first_alignment_column(sequence2);
396 
397  // ----------------------------------------------------------------------------
398  // Recursion phase: compute column-wise the alignment matrix.
399  // ----------------------------------------------------------------------------
400 
401  for (auto const & seq1_value : sequence1)
402  {
403  compute_alignment_column<true>(seq1_value, sequence2);
404  finalise_last_cell_in_column(true);
405  }
406 
407  // ----------------------------------------------------------------------------
408  // Wrap up phase: track score in last column and prepare the alignment result.
409  // ----------------------------------------------------------------------------
410 
411  finalise_alignment();
412  }
413 
415  template <typename sequence1_t, typename sequence2_t>
416  void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2, align_cfg::band_fixed_size const & band)
418  requires traits_t::is_banded
420  {
421  // ----------------------------------------------------------------------------
422  // Initialisation phase: allocate memory and initialise first column.
423  // ----------------------------------------------------------------------------
424 
425  // Allocate and initialise first column.
426  this->allocate_matrix(sequence1, sequence2, band, this->alignment_state);
427  size_t last_row_index = this->score_matrix.band_row_index;
428  initialise_first_alignment_column(sequence2 | views::take(last_row_index));
429 
430  // ----------------------------------------------------------------------------
431  // 1st recursion phase: iterate as long as the band intersects with the first row.
432  // ----------------------------------------------------------------------------
433 
434  size_t sequence2_size = std::ranges::distance(sequence2);
435  for (auto const & seq1_value : sequence1 | views::take(this->score_matrix.band_col_index))
436  {
437  compute_alignment_column<true>(seq1_value, sequence2 | views::take(++last_row_index));
438  // Only if band reached last row of matrix the last cell might be tracked.
439  finalise_last_cell_in_column(last_row_index >= sequence2_size);
440  }
441 
442  // ----------------------------------------------------------------------------
443  // 2nd recursion phase: iterate until the end of the matrix.
444  // ----------------------------------------------------------------------------
445 
446  size_t first_row_index = 0;
447  for (auto const & seq1_value : sequence1 | views::drop(this->score_matrix.band_col_index))
448  {
449  // In the second phase the band moves in every column one base down on the second sequence.
450  compute_alignment_column<false>(seq1_value, sequence2 | views::slice(first_row_index++, ++last_row_index));
451  // Only if band reached last row of matrix the last cell might be tracked.
452  finalise_last_cell_in_column(last_row_index >= sequence2_size);
453  }
454 
455  // ----------------------------------------------------------------------------
456  // Wrap up phase: track score in last column and prepare the alignment result.
457  // ----------------------------------------------------------------------------
458 
459  finalise_alignment();
460  }
461 
474  template <typename sequence2_t>
475  auto initialise_first_alignment_column(sequence2_t && sequence2)
476  {
477  // Get the initial column.
478  alignment_column = this->current_alignment_column();
479  assert(!alignment_column.empty()); // Must contain at least one element.
480 
481  // Initialise first cell.
482  alignment_column_it = alignment_column.begin();
483  this->init_origin_cell(*alignment_column_it, this->alignment_state);
484 
485  // Initialise the remaining cells of this column.
486  for (auto it = std::ranges::begin(sequence2); it != std::ranges::end(sequence2); ++it)
487  this->init_column_cell(*++alignment_column_it, this->alignment_state);
488 
489  // Finalise the last cell of the initial column.
490  bool at_last_row = true;
491  if constexpr (traits_t::is_banded) // If the band reaches until the last row of the matrix.
492  at_last_row = static_cast<size_t>(this->score_matrix.band_row_index) == this->score_matrix.num_rows - 1;
493 
494  finalise_last_cell_in_column(at_last_row);
495  }
496 
512  template <bool initialise_first_cell, typename sequence1_value_t, typename sequence2_t>
513  void compute_alignment_column(sequence1_value_t const & seq1_value, sequence2_t && sequence2)
514  {
515  this->next_alignment_column(); // move to next column and set alignment column iterator accordingly.
516  alignment_column = this->current_alignment_column();
517  alignment_column_it = alignment_column.begin();
518 
519  auto seq2_it = std::ranges::begin(sequence2);
520 
521  if constexpr (initialise_first_cell) // Initialise first cell if it intersects with the first row of the matrix.
522  {
523  this->init_row_cell(*alignment_column_it, this->alignment_state);
524  }
525  else // Compute first cell of banded column if it does not intersect with the first row of the matrix.
526  {
527  this->compute_first_band_cell(*alignment_column_it,
528  this->alignment_state,
529  this->scoring_scheme.score(seq1_value, *seq2_it));
530  ++seq2_it;
531  }
532 
533  for (; seq2_it != std::ranges::end(sequence2); ++seq2_it)
534  this->compute_cell(*++alignment_column_it,
535  this->alignment_state,
536  this->scoring_scheme.score(seq1_value, *seq2_it));
537  }
538 
549  constexpr void finalise_last_cell_in_column(bool const at_last_row) noexcept
550  {
551  if (at_last_row)
552  this->check_score_of_last_row_cell(*alignment_column_it, this->alignment_state);
553 
554  if constexpr (traits_t::is_debug)
555  dump_alignment_column();
556  }
557 
559  constexpr void finalise_alignment() noexcept
560  {
561  // ----------------------------------------------------------------------------
562  // Check for the optimum in last cell/column.
563  // ----------------------------------------------------------------------------
564 
565  this->check_score_of_cells_in_last_column(alignment_column, this->alignment_state);
566  this->check_score_of_last_cell(*alignment_column_it, this->alignment_state);
567  }
568 
595  template <typename index_t, typename sequence1_t, typename sequence2_t, typename callback_t>
597  requires (!traits_t::is_vectorised)
599  constexpr void make_alignment_result([[maybe_unused]] index_t const idx,
600  [[maybe_unused]] sequence1_t & sequence1,
601  [[maybe_unused]] sequence2_t & sequence2,
602  callback_t & callback)
603  {
604  using result_value_t = typename alignment_result_value_type_accessor<alignment_result_t>::type;
605 
606  // ----------------------------------------------------------------------------
607  // Build the alignment result
608  // ----------------------------------------------------------------------------
609 
610  static_assert(seqan3::detail::alignment_configuration_traits<config_t>::has_output_configuration,
611  "The configuration must contain at least one align_cfg::output_* element.");
612 
613  result_value_t res{};
614 
615  if constexpr (traits_t::output_sequence1_id)
616  res.sequence1_id = idx;
617 
618  if constexpr (traits_t::output_sequence2_id)
619  res.sequence2_id = idx;
620 
621  // Choose what needs to be computed.
622  if constexpr (traits_t::compute_score)
623  res.score = this->alignment_state.optimum.score;
624 
625  if constexpr (traits_t::compute_end_positions)
626  {
627  res.end_positions = alignment_coordinate{column_index_type{this->alignment_state.optimum.column_index},
628  row_index_type{this->alignment_state.optimum.row_index}};
629  // At some point this needs to be refactored so that it is not necessary to adapt the coordinate.
630  if constexpr (traits_t::is_banded)
631  res.end_positions.second += res.end_positions.first - this->trace_matrix.band_col_index;
632  }
633 
634  if constexpr (traits_t::compute_begin_positions)
635  {
636  // Get a aligned sequence builder for banded or un-banded case.
637  aligned_sequence_builder builder{sequence1, sequence2};
638  auto optimum_coordinate = alignment_coordinate{column_index_type{this->alignment_state.optimum.column_index},
639  row_index_type{this->alignment_state.optimum.row_index}};
640  auto trace_res = builder(this->trace_matrix.trace_path(optimum_coordinate));
641  res.begin_positions.first = trace_res.first_sequence_slice_positions.first;
642  res.begin_positions.second = trace_res.second_sequence_slice_positions.first;
643 
644  if constexpr (traits_t::compute_sequence_alignment)
645  res.alignment = std::move(trace_res.alignment);
646  }
647 
648  // Store the matrices in debug mode.
649  if constexpr (traits_t::is_debug)
650  {
651  res.score_debug_matrix = std::move(score_debug_matrix);
652  if constexpr (traits_t::compute_sequence_alignment) // compute alignment
653  res.trace_debug_matrix = std::move(trace_debug_matrix);
654  }
655 
656  callback(std::move(res));
657  }
658 
684  template <typename indexed_sequence_pair_range_t, typename callback_t>
686  requires traits_t::is_vectorised
688  constexpr auto make_alignment_result(indexed_sequence_pair_range_t && index_sequence_pairs,
689  callback_t & callback)
690  {
691  using result_value_t = typename alignment_result_value_type_accessor<alignment_result_t>::type;
692 
693  size_t simd_index = 0;
694  for (auto && [sequence_pairs, alignment_index] : index_sequence_pairs)
695  {
696  (void) sequence_pairs;
697  result_value_t res{};
698 
699  if constexpr (traits_t::output_sequence1_id)
700  res.sequence1_id = alignment_index;
701 
702  if constexpr (traits_t::output_sequence2_id)
703  res.sequence2_id = alignment_index;
704 
705  if constexpr (traits_t::compute_score)
706  res.score = this->alignment_state.optimum.score[simd_index]; // Just take this
707 
708  if constexpr (traits_t::compute_end_positions)
709  {
710  res.end_positions.first = this->alignment_state.optimum.column_index[simd_index];
711  res.end_positions.second = this->alignment_state.optimum.row_index[simd_index];
712  }
713 
714  callback(std::move(res));
715  ++simd_index;
716  }
717  }
718 
727  void dump_alignment_column()
728  {
729  using std::get;
730 
731  auto column = this->current_alignment_column();
732 
733  auto coord = get<1>(column.front()).coordinate;
734  if constexpr (traits_t::is_banded)
735  coord.second += coord.first - this->score_matrix.band_col_index;
736 
737  matrix_offset offset{row_index_type{static_cast<std::ptrdiff_t>(coord.second)},
738  column_index_type{static_cast<std::ptrdiff_t>(coord.first)}};
739 
740  std::ranges::copy(column | std::views::transform([] (auto const & tpl)
741  {
742  using std::get;
743  return get<0>(tpl).current;
744  }), score_debug_matrix.begin() + offset);
745 
746  // if traceback is enabled.
747  if constexpr (traits_t::compute_sequence_alignment)
748  {
749  auto trace_matrix_it = trace_debug_matrix.begin() + offset;
750  std::ranges::copy(column | std::views::transform([] (auto const & tpl)
751  {
752  using std::get;
753  return get<1>(tpl).current;
754  }), trace_debug_matrix.begin() + offset);
755  }
756  }
757 
759  std::shared_ptr<config_t> cfg_ptr{};
761  alignment_column_t alignment_column{};
763  alignment_column_iterator_t alignment_column_it{};
765  score_debug_matrix_t score_debug_matrix{};
767  trace_debug_matrix_t trace_debug_matrix{};
769  std::pair<size_t, size_t> max_size_in_collection{};
770 };
771 
772 } // namespace seqan3::detail
view_to_simd.hpp
Provides seqan3::detail::to_simd view.
function.hpp
Provides various type traits for use on functions.
drop.hpp
Provides seqan3::views::drop.
std::shared_ptr
type_traits.hpp
Provides helper type traits for the configuration and execution of the alignment algorithm.
std::pair
std::vector
align_config_scoring_scheme.hpp
Provides seqan3::align_cfg::scoring_scheme.
iterator
Provides C++20 additions to the <iterator> header.
seqan3::get
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:627
seqan3::views::get
auto const get
A view calling std::get on each element in a range.
Definition: get.hpp:65
exception.hpp
Includes customized exception types for the alignment module .
concept.hpp
Provides seqan3::simd::simd_concept.
std::vector::push_back
T push_back(T... args)
trace_directions.hpp
Provides the declaration of seqan3::detail::trace_directions.
simd.hpp
Provides seqan3::simd::simd_type.
align_config_band.hpp
Provides seqan3::detail::align_config_band.
seqan3::views::move
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
simd_traits.hpp
Provides seqan3::simd::simd_traits.
concept.hpp
Provides concepts needed internally for the alignment algorithms.
aligned_sequence_builder.hpp
Provides seqan3::detail::aligned_sequence_builder.
std::iter_difference_t
memory
seqan3::views::take
constexpr auto take
A view adaptor that returns the first size elements from the underlying range (or less if the underly...
Definition: take.hpp:611
aligned_allocator.hpp
Provides seqan3::aligned_allocator.
take.hpp
Provides seqan3::views::take.
ranges
Adaptations of concepts from the Ranges TS.
seqan3::views::drop
constexpr auto drop
A view adaptor that returns all elements after n from the underlying range (or an empty range if the ...
Definition: drop.hpp:169
empty_type.hpp
Provides seqan3::detail::empty_type.
std::ranges::begin
T begin(T... args)
seqan3::views::slice
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:141
std::ptrdiff_t
std::allocator
optional
seqan3::pack_traits::transform
seqan3::type_list< trait_t< pack_t >... > transform
Apply a transformation trait to every type in the pack and return a seqan3::type_list of the results.
Definition: traits.hpp:307
std::conditional_t
deferred_crtp_base.hpp
Provides seqan3::detail::deferred_crtp_base.
align_result_selector.hpp
Provides seqan3::detail::align_result_selector.
get.hpp
Provides seqan3::views::get.