SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
alignment_algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <memory>
16 #include <optional>
17 #include <type_traits>
18 
28 
39 #include <seqan3/std/iterator>
40 #include <seqan3/std/ranges>
41 
42 namespace seqan3::detail
43 {
44 
78 template <typename config_t, typename ...algorithm_policies_t>
79 class alignment_algorithm :
80  public invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>...
81 {
82 private:
83 
85  using traits_t = alignment_configuration_traits<config_t>;
87  using alignment_column_t = decltype(std::declval<alignment_algorithm>().current_alignment_column());
89  using alignment_column_iterator_t = std::ranges::iterator_t<alignment_column_t>;
90 
92  using score_debug_matrix_t =
93  std::conditional_t<traits_t::is_debug,
94  two_dimensional_matrix<std::optional<typename traits_t::original_score_t>,
96  matrix_major_order::column>,
97  empty_type>;
99  using trace_debug_matrix_t =
100  std::conditional_t<traits_t::is_debug,
101  two_dimensional_matrix<std::optional<trace_directions>,
103  matrix_major_order::column>,
104  empty_type>;
105 
106 public:
110  constexpr alignment_algorithm() = default;
111  constexpr alignment_algorithm(alignment_algorithm const &) = default;
112  constexpr alignment_algorithm(alignment_algorithm &&) = default;
113  constexpr alignment_algorithm & operator=(alignment_algorithm const &) = default;
114  constexpr alignment_algorithm & operator=(alignment_algorithm &&) = default;
115  ~alignment_algorithm() = default;
116 
125  explicit constexpr alignment_algorithm(config_t const & cfg) : cfg_ptr{std::make_shared<config_t>(cfg)}
126  {
127  this->scoring_scheme = seqan3::get<align_cfg::scoring>(*cfg_ptr).value;
128  this->initialise_alignment_state(*cfg_ptr);
129  }
131 
176  template <indexed_sequence_pair_range indexed_sequence_pairs_t>
178  requires !traits_t::is_vectorised
180  auto operator()(indexed_sequence_pairs_t && indexed_sequence_pairs)
181  {
182  using sequence_pairs_t = std::tuple_element_t<0, std::ranges::range_value_t<indexed_sequence_pairs_t>>;
183  using sequence1_t = std::tuple_element_t<0, sequence_pairs_t>;
184  using sequence2_t = std::tuple_element_t<1, sequence_pairs_t>;
185  using result_t = typename align_result_selector<sequence1_t, sequence2_t, config_t>::type;
186 
187  using std::get;
188 
190  for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
191  results.emplace_back(compute_single_pair(idx, get<0>(sequence_pair), get<1>(sequence_pair)));
192 
193  return results;
194  }
195 
197  template <indexed_sequence_pair_range indexed_sequence_pairs_t>
198  auto operator()(indexed_sequence_pairs_t && indexed_sequence_pairs)
200  requires traits_t::is_vectorised
202  {
203  assert(cfg_ptr != nullptr);
204 
205  static_assert(simd_concept<typename traits_t::score_t>, "Expected simd score type.");
206  static_assert(simd_concept<typename traits_t::trace_t>, "Expected simd trace type.");
207 
208  // Extract the batch of sequences for the first and the second sequence.
209  auto sequence1_range = indexed_sequence_pairs | views::get<0> | views::get<0>;
210  auto sequence2_range = indexed_sequence_pairs | views::get<0> | views::get<1>;
211 
212  // Initialise the find_optimum policy in the simd case.
213  this->initialise_find_optimum_policy(sequence1_range,
214  sequence2_range,
215  this->scoring_scheme.padding_match_score());
216 
217  // Convert batch of sequences to sequence of simd vectors.
218  auto simd_sequences1 = convert_batch_of_sequences_to_simd_vector(sequence1_range);
219  auto simd_sequences2 = convert_batch_of_sequences_to_simd_vector(sequence2_range);
220 
221  max_size_in_collection = std::pair{simd_sequences1.size(), simd_sequences2.size()};
222  // Reset the alignment state's optimum between executions of the alignment algorithm.
223  this->alignment_state.reset_optimum();
224 
225  compute_matrix(simd_sequences1, simd_sequences2);
226 
227  return make_alignment_result(indexed_sequence_pairs);
228  }
230 
231 private:
245  template <typename sequence_range_t>
246  constexpr auto convert_batch_of_sequences_to_simd_vector(sequence_range_t & sequences)
247  {
248  assert(static_cast<size_t>(std::ranges::distance(sequences)) <= traits_t::alignments_per_vector);
249 
250  using simd_score_t = typename traits_t::score_t;
251 
252  std::vector<simd_score_t, aligned_allocator<simd_score_t, alignof(simd_score_t)>> simd_sequence{};
253 
254  for (auto && simd_vector_chunk : sequences | views::to_simd<simd_score_t>(traits_t::padding_symbol))
255  for (auto && simd_vector : simd_vector_chunk)
256  simd_sequence.push_back(std::move(simd_vector));
257 
258  return simd_sequence;
259  }
260 
278  template <std::ranges::forward_range sequence1_t, std::ranges::forward_range sequence2_t>
279  constexpr auto compute_single_pair(size_t const idx, sequence1_t && sequence1, sequence2_t && sequence2)
280  {
281  assert(cfg_ptr != nullptr);
282 
283  if constexpr (traits_t::is_debug)
284  initialise_debug_matrices(sequence1, sequence2);
285 
286  // Reset the alignment state's optimum between executions of the alignment algorithm.
287  this->alignment_state.reset_optimum();
288 
289  if constexpr (traits_t::is_banded)
290  {
291  // Get the band and check if band configuration is valid.
292  auto const & band = seqan3::get<align_cfg::band>(*cfg_ptr).value;
293  check_valid_band_parameter(sequence1, sequence2, band);
294  auto && [subsequence1, subsequence2] = this->slice_sequences(sequence1, sequence2, band);
295  // It would be great to use this interface here instead
296  compute_matrix(subsequence1, subsequence2, band);
297  return make_alignment_result(idx, subsequence1, subsequence2);
298  }
299  else
300  {
301  compute_matrix(sequence1, sequence2);
302  return make_alignment_result(idx, sequence1, sequence2);
303  }
304  }
305 
322  template <typename sequence1_t, typename sequence2_t>
323  constexpr void check_valid_band_parameter(sequence1_t && sequence1,
324  sequence2_t && sequence2,
325  static_band const & band)
326  {
327  static_assert(config_t::template exists<align_cfg::band>(),
328  "The band configuration is required for the banded alignment algorithm.");
329 
331  static_assert(std::is_signed_v<diff_type>, "Only signed types can be used to test the band parameters.");
332 
333  if (static_cast<diff_type>(band.lower_bound) > std::ranges::distance(sequence1))
334  {
335  throw invalid_alignment_configuration
336  {
337  "Invalid band error: The lower bound excludes the whole alignment matrix."
338  };
339  }
340 
341  if (static_cast<diff_type>(band.upper_bound) < -std::ranges::distance(sequence2))
342  {
343  throw invalid_alignment_configuration
344  {
345  "Invalid band error: The upper bound excludes the whole alignment matrix."
346  };
347  }
348  }
349 
362  template <typename sequence1_t, typename sequence2_t>
363  constexpr void initialise_debug_matrices(sequence1_t & sequence1, sequence2_t & sequence2)
364  {
365  size_t rows = std::ranges::distance(sequence2) + 1;
366  size_t cols = std::ranges::distance(sequence1) + 1;
367 
368  score_debug_matrix = score_debug_matrix_t{number_rows{rows}, number_cols{cols}};
369  trace_debug_matrix = trace_debug_matrix_t{number_rows{rows}, number_cols{cols}};
370  }
371 
379  template <typename sequence1_t, typename sequence2_t>
380  void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2)
382  requires !traits_t::is_banded
384  {
385  // ----------------------------------------------------------------------------
386  // Initialisation phase: allocate memory and initialise first column.
387  // ----------------------------------------------------------------------------
388 
389  this->allocate_matrix(sequence1, sequence2);
390  initialise_first_alignment_column(sequence2);
391 
392  // ----------------------------------------------------------------------------
393  // Recursion phase: compute column-wise the alignment matrix.
394  // ----------------------------------------------------------------------------
395 
396  for (auto const & seq1_value : sequence1)
397  {
398  compute_alignment_column<true>(seq1_value, sequence2);
399  finalise_last_cell_in_column(true);
400  }
401 
402  // ----------------------------------------------------------------------------
403  // Wrap up phase: track score in last column and prepare the alignment result.
404  // ----------------------------------------------------------------------------
405 
406  finalise_alignment();
407  }
408 
410  template <typename sequence1_t, typename sequence2_t>
411  void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2, static_band const & band)
413  requires traits_t::is_banded
415  {
416  // ----------------------------------------------------------------------------
417  // Initialisation phase: allocate memory and initialise first column.
418  // ----------------------------------------------------------------------------
419 
420  // Allocate and initialise first column.
421  this->allocate_matrix(sequence1, sequence2, band, this->alignment_state);
422  size_t last_row_index = this->score_matrix.band_row_index;
423  initialise_first_alignment_column(sequence2 | views::take(last_row_index));
424 
425  // ----------------------------------------------------------------------------
426  // 1st recursion phase: iterate as long as the band intersects with the first row.
427  // ----------------------------------------------------------------------------
428 
429  size_t sequence2_size = std::ranges::distance(sequence2);
430  for (auto const & seq1_value : sequence1 | views::take(this->score_matrix.band_col_index))
431  {
432  compute_alignment_column<true>(seq1_value, sequence2 | views::take(++last_row_index));
433  // Only if band reached last row of matrix the last cell might be tracked.
434  finalise_last_cell_in_column(last_row_index >= sequence2_size);
435  }
436 
437  // ----------------------------------------------------------------------------
438  // 2nd recursion phase: iterate until the end of the matrix.
439  // ----------------------------------------------------------------------------
440 
441  size_t first_row_index = 0;
442  for (auto const & seq1_value : sequence1 | views::drop(this->score_matrix.band_col_index))
443  {
444  // In the second phase the band moves in every column one base down on the second sequence.
445  compute_alignment_column<false>(seq1_value, sequence2 | views::slice(first_row_index++, ++last_row_index));
446  // Only if band reached last row of matrix the last cell might be tracked.
447  finalise_last_cell_in_column(last_row_index >= sequence2_size);
448  }
449 
450  // ----------------------------------------------------------------------------
451  // Wrap up phase: track score in last column and prepare the alignment result.
452  // ----------------------------------------------------------------------------
453 
454  finalise_alignment();
455  }
456 
469  template <typename sequence2_t>
470  auto initialise_first_alignment_column(sequence2_t && sequence2)
471  {
472  // Get the initial column.
473  alignment_column = this->current_alignment_column();
474  assert(!alignment_column.empty()); // Must contain at least one element.
475 
476  // Initialise first cell.
477  alignment_column_it = alignment_column.begin();
478  this->init_origin_cell(*alignment_column_it, this->alignment_state);
479 
480  // Initialise the remaining cells of this column.
481  for (auto it = std::ranges::begin(sequence2); it != std::ranges::end(sequence2); ++it)
482  this->init_column_cell(*++alignment_column_it, this->alignment_state);
483 
484  // Finalise the last cell of the initial column.
485  bool at_last_row = true;
486  if constexpr (traits_t::is_banded) // If the band reaches until the last row of the matrix.
487  at_last_row = static_cast<size_t>(this->score_matrix.band_row_index) == this->score_matrix.num_rows - 1;
488 
489  finalise_last_cell_in_column(at_last_row);
490  }
491 
507  template <bool initialise_first_cell, typename sequence1_value_t, typename sequence2_t>
508  void compute_alignment_column(sequence1_value_t const & seq1_value, sequence2_t && sequence2)
509  {
510  this->next_alignment_column(); // move to next column and set alignment column iterator accordingly.
511  alignment_column = this->current_alignment_column();
512  alignment_column_it = alignment_column.begin();
513 
514  auto seq2_it = std::ranges::begin(sequence2);
515 
516  if constexpr (initialise_first_cell) // Initialise first cell if it intersects with the first row of the matrix.
517  {
518  this->init_row_cell(*alignment_column_it, this->alignment_state);
519  }
520  else // Compute first cell of banded column if it does not intersect with the first row of the matrix.
521  {
522  this->compute_first_band_cell(*alignment_column_it,
523  this->alignment_state,
524  this->scoring_scheme.score(seq1_value, *seq2_it));
525  ++seq2_it;
526  }
527 
528  for (; seq2_it != std::ranges::end(sequence2); ++seq2_it)
529  this->compute_cell(*++alignment_column_it,
530  this->alignment_state,
531  this->scoring_scheme.score(seq1_value, *seq2_it));
532  }
533 
544  constexpr void finalise_last_cell_in_column(bool const at_last_row) noexcept
545  {
546  if (at_last_row)
547  this->check_score_of_last_row_cell(*alignment_column_it, this->alignment_state);
548 
549  if constexpr (traits_t::is_debug)
550  dump_alignment_column();
551  }
552 
554  constexpr void finalise_alignment() noexcept
555  {
556  // ----------------------------------------------------------------------------
557  // Check for the optimum in last cell/column.
558  // ----------------------------------------------------------------------------
559 
560  this->check_score_of_cells_in_last_column(alignment_column, this->alignment_state);
561  this->check_score_of_last_cell(*alignment_column_it, this->alignment_state);
562  }
563 
588  template <typename index_t, typename sequence1_t, typename sequence2_t>
590  requires !traits_t::is_vectorised
592  constexpr auto make_alignment_result(index_t const idx,
593  sequence1_t & sequence1,
594  sequence2_t & sequence2)
595  {
596  // ----------------------------------------------------------------------------
597  // Build the alignment result
598  // ----------------------------------------------------------------------------
599 
600  static_assert(config_t::template exists<align_cfg::result>(),
601  "The configuration must contain an align_cfg::result element.");
602 
603  using result_value_t = typename align_result_selector<sequence1_t, sequence2_t, config_t>::type;
604  result_value_t res{};
605 
606  res.id = idx;
607 
608  // Choose what needs to be computed.
609  if constexpr (traits_t::result_type_rank >= 0) // compute score
610  res.score = this->alignment_state.optimum.score;
611 
612  if constexpr (traits_t::result_type_rank >= 1) // compute back coordinate
613  {
614  res.back_coordinate = alignment_coordinate{column_index_type{this->alignment_state.optimum.column_index},
615  row_index_type{this->alignment_state.optimum.row_index}};
616  // At some point this needs to be refactored so that it is not necessary to adapt the coordinate.
617  if constexpr (traits_t::is_banded)
618  res.back_coordinate.second += res.back_coordinate.first - this->trace_matrix.band_col_index;
619  }
620 
621  if constexpr (traits_t::result_type_rank >= 2) // compute front coordinate
622  {
623  // Get a aligned sequence builder for banded or un-banded case.
624  aligned_sequence_builder builder{sequence1, sequence2};
625  auto optimum_coordinate = alignment_coordinate{column_index_type{this->alignment_state.optimum.column_index},
626  row_index_type{this->alignment_state.optimum.row_index}};
627  auto trace_res = builder(this->trace_matrix.trace_path(optimum_coordinate));
628  res.front_coordinate.first = trace_res.first_sequence_slice_positions.first;
629  res.front_coordinate.second = trace_res.second_sequence_slice_positions.first;
630 
631  if constexpr (traits_t::result_type_rank == 3) // compute alignment
632  res.alignment = std::move(trace_res.alignment);
633  }
634 
635  // Store the matrices in debug mode.
636  if constexpr (traits_t::is_debug)
637  {
638  res.score_debug_matrix = std::move(score_debug_matrix);
639  if constexpr (traits_t::result_type_rank == 3) // compute alignment
640  res.trace_debug_matrix = std::move(trace_debug_matrix);
641  }
642 
643  return res;
644  }
645 
669  template <typename indexed_sequence_pair_range_t>
671  requires traits_t::is_vectorised
673  constexpr auto make_alignment_result(indexed_sequence_pair_range_t && index_sequence_pairs)
674  {
675  using indexed_sequence_pair_t = std::ranges::range_value_t<indexed_sequence_pair_range_t>;
676  using sequence_pair_t = std::tuple_element_t<0, indexed_sequence_pair_t>;
677  using sequence1_t = std::tuple_element_t<0, sequence_pair_t>;
678  using sequence2_t = std::tuple_element_t<1, sequence_pair_t>;
679 
680  using result_value_t = typename align_result_selector<sequence1_t, sequence2_t, config_t>::type;
681 
683  results.reserve(std::ranges::distance(index_sequence_pairs));
684 
685  size_t simd_index = 0;
686  for (auto && [sequence_pairs, alignment_index] : index_sequence_pairs)
687  {
688  (void) sequence_pairs;
689  result_value_t res{};
690  res.id = alignment_index;
691 
692  res.score = this->alignment_state.optimum.score[simd_index]; // Just take this
693 
694  if constexpr (traits_t::result_type_rank >= 1) // compute back coordinate
695  {
696  res.back_coordinate.first = this->alignment_state.optimum.column_index[simd_index] ;
697  res.back_coordinate.second = this->alignment_state.optimum.row_index[simd_index];
698  }
699 
700  results.emplace_back(std::move(res));
701  ++simd_index;
702  }
703 
704  return results;
705  }
706 
715  void dump_alignment_column()
716  {
717  using std::get;
718 
719  auto column = this->current_alignment_column();
720 
721  auto coord = get<1>(column.front()).coordinate;
722  if constexpr (traits_t::is_banded)
723  coord.second += coord.first - this->score_matrix.band_col_index;
724 
725  matrix_offset offset{row_index_type{static_cast<std::ptrdiff_t>(coord.second)},
726  column_index_type{static_cast<std::ptrdiff_t>(coord.first)}};
727 
728  std::ranges::copy(column | std::views::transform([] (auto const & tpl)
729  {
730  using std::get;
731  return get<0>(tpl).current;
732  }), score_debug_matrix.begin() + offset);
733 
734  // if traceback is enabled.
735  if constexpr (config_t::template exists<align_cfg::result<with_alignment_type>>())
736  {
737  auto trace_matrix_it = trace_debug_matrix.begin() + offset;
738  std::ranges::copy(column | std::views::transform([] (auto const & tpl)
739  {
740  using std::get;
741  return get<1>(tpl).current;
742  }), trace_debug_matrix.begin() + offset);
743  }
744  }
745 
747  std::shared_ptr<config_t> cfg_ptr{};
749  alignment_column_t alignment_column{};
751  alignment_column_iterator_t alignment_column_it{};
753  score_debug_matrix_t score_debug_matrix{};
755  trace_debug_matrix_t trace_debug_matrix{};
757  std::pair<size_t, size_t> max_size_in_collection{};
758 };
759 
760 } // namespace seqan3::detail
view_to_simd.hpp
Provides seqan3::detail::to_simd view.
drop.hpp
Provides seqan3::views::drop.
std::shared_ptr
type_traits.hpp
Provides helper type traits for the configuration and execution of the alignment algorithm.
seqan3::field::offset
Sequence (SEQ) relative start position (0-based), unsigned value.
std::pair
std::vector::reserve
T reserve(T... args)
std::vector
iterator
Provides C++20 additions to the <iterator> header.
seqan3::views::move
const auto move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
align_config_scoring.hpp
Provides seqan3::align_cfg::scoring.
exception.hpp
Includes customized exception types for the alignment module .
scoring_scheme
A concept that requires that type be able to score two letters.
concept.hpp
Provides seqan3::simd::simd_concept.
std::vector::push_back
T push_back(T... args)
trace_directions.hpp
Provides the declaration of seqan3::detail::trace_directions.
seqan3::views::get
const auto get
A view calling std::get on each element in a range.
Definition: get.hpp:65
simd.hpp
Provides seqan3::simd::simd_type.
align_config_band.hpp
Provides seqan3::detail::align_config_band.
simd_traits.hpp
Provides seqan3::simd::simd_traits.
concept.hpp
Provides concepts needed internally for the alignment algorithms.
aligned_sequence_builder.hpp
Provides seqan3::detail::aligned_sequence_builder.
std::iter_difference_t
memory
seqan3::views::take
constexpr auto take
A view adaptor that returns the first size elements from the underlying range (or less if the underly...
Definition: take.hpp:624
aligned_allocator.hpp
Provides seqan3::aligned_allocator.
take.hpp
Provides seqan3::views::take.
ranges
Adaptations of concepts from the Ranges TS.
std::vector::emplace_back
T emplace_back(T... args)
seqan3::views::drop
constexpr auto drop
A view adaptor that returns all elements after n from the underlying range (or an empty range if the ...
Definition: drop.hpp:168
empty_type.hpp
Provides seqan3::detail::empty_type.
std::ranges::begin
T begin(T... args)
std
SeqAn specific customisations in the standard namespace.
seqan3::views::slice
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:141
std::allocator
optional
seqan3::pack_traits::transform
seqan3::type_list< trait_t< pack_t >... > transform
Apply a transformation trait to every type in the pack and return a seqan3::type_list of the results.
Definition: traits.hpp:307
std::conditional_t
align_config_result.hpp
Provides configuration for alignment output.
deferred_crtp_base.hpp
Provides seqan3::detail::deferred_crtp_base.
scoring_scheme::score
score_type score(alph1_t const alph1, alph2_t const alph2)
Compute the score of two letters.
align_result_selector.hpp
Provides seqan3::detail::align_result_selector.
get.hpp
Provides seqan3::views::get.
seqan3::field::alignment
The (pairwise) alignment stored in an seqan3::alignment object.