SeqAn3  3.0.0
The Modern C++ library for sequence analysis.
alignment_algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <range/v3/algorithm/for_each.hpp>
16 #include <range/v3/view/drop_exactly.hpp>
17 #include <range/v3/view/zip.hpp>
18 
28 
35 #include <seqan3/std/concepts>
36 #include <seqan3/std/iterator>
37 #include <seqan3/std/ranges>
38 
39 namespace seqan3::detail
40 {
41 
75 template <typename config_t, typename ...algorithm_policies_t>
76 class alignment_algorithm :
77  public invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>...
78 {
79 private:
80 
82  static constexpr bool is_banded = std::remove_reference_t<config_t>::template exists<align_cfg::band>();
83 
84 public:
88  constexpr alignment_algorithm() = default;
89  constexpr alignment_algorithm(alignment_algorithm const &) = default;
90  constexpr alignment_algorithm(alignment_algorithm &&) = default;
91  constexpr alignment_algorithm & operator=(alignment_algorithm const &) = default;
92  constexpr alignment_algorithm & operator=(alignment_algorithm &&) = default;
93  ~alignment_algorithm() = default;
94 
102  explicit constexpr alignment_algorithm(config_t const & cfg) : cfg_ptr{new config_t(cfg)}
103  {}
105 
133  template <std::ranges::ForwardRange first_range_t, std::ranges::ForwardRange second_range_t>
134  auto operator()(size_t const idx, first_range_t && first_range, second_range_t && second_range)
135  requires !is_banded
136  {
137  assert(cfg_ptr != nullptr);
138 
139  // ----------------------------------------------------------------------------
140  // Initialise dp algorithm.
141  // ----------------------------------------------------------------------------
142 
143  // We need to allocate the score_matrix and maybe the trace_matrix.
144  this->allocate_matrix(first_range, second_range);
145 
146  // Initialise cache variables to keep frequently used variables close to the CPU registers.
147  auto cache = this->make_cache(cfg_ptr->template value_or<align_cfg::gap>(gap_scheme{gap_score{-1},
148  gap_open_score{-10}}));
149 
150  initialise_matrix(cache);
151 
152  // ----------------------------------------------------------------------------
153  // Compute the unbanded alignment.
154  // ----------------------------------------------------------------------------
155 
156  compute_matrix(first_range, second_range, cache);
157 
158  // ----------------------------------------------------------------------------
159  // Cleanup and prepare the alignment result.
160  // ----------------------------------------------------------------------------
161 
162  using result_t = typename align_result_selector<first_range_t, second_range_t, config_t>::type;
163  result_t res{};
164 
165  res.id = idx;
166  // Choose what needs to be computed.
167  if constexpr (config_t::template exists<align_cfg::result<with_score_type>>())
168  {
169  res.score = get<3>(cache).score;
170  }
171  if constexpr (config_t::template exists<align_cfg::result<with_back_coordinate_type>>())
172  {
173  res.score = get<3>(cache).score;
174  res.back_coordinate = get<3>(cache).coordinate;
175  }
176  if constexpr (config_t::template exists<align_cfg::result<with_front_coordinate_type>>())
177  { // At the moment we also compute the traceback even if only the front coordinate was requested.
178  // This can be later optimised by computing the reverse alignment in a narrow band in linear memory.
179  // Especially for the SIMD version this might be more efficient.
180  res.score = get<3>(cache).score;
181  res.back_coordinate = get<3>(cache).coordinate;
182  res.front_coordinate = get<0>(compute_traceback(first_range,
183  second_range,
184  get<3>(cache).coordinate));
185  }
186  if constexpr (config_t::template exists<align_cfg::result<with_alignment_type>>())
187  {
188  res.score = get<3>(cache).score;
189  res.back_coordinate = get<3>(cache).coordinate;
190  std::tie(res.front_coordinate, res.alignment) = compute_traceback(first_range,
191  second_range,
192  get<3>(cache).coordinate);
193  }
194  return alignment_result{res};
195  }
196 
225  template <std::ranges::ForwardRange first_range_t, std::ranges::ForwardRange second_range_t>
226  auto operator()(size_t const idx, first_range_t && first_range, second_range_t && second_range)
227  requires is_banded
228  {
229  assert(cfg_ptr != nullptr);
230 
231  using std::get;
232 
233  static_assert(config_t::template exists<align_cfg::band>(),
234  "The band configuration is required for the banded alignment algorithm.");
235  auto const & band = get<align_cfg::band>(*cfg_ptr).value;
236 
237  // ----------------------------------------------------------------------------
238  // Check valid band settings.
239  // ----------------------------------------------------------------------------
240 
241  using diff_type = typename std::iterator_traits<std::ranges::iterator_t<first_range_t>>::difference_type;
242  static_assert(std::is_signed_v<diff_type>, "Only signed types can be used to test the band parameters.");
243 
244  if (static_cast<diff_type>(band.lower_bound) > std::ranges::distance(first_range))
245  {
246  throw invalid_alignment_configuration
247  {
248  "Invalid band error: The lower bound excludes the whole alignment matrix."
249  };
250  }
251 
252  if (static_cast<diff_type>(band.upper_bound) < -std::ranges::distance(second_range))
253  {
254  throw invalid_alignment_configuration
255  {
256  "Invalid band error: The upper bound excludes the whole alignment matrix."
257  };
258  }
259 
260  // ----------------------------------------------------------------------------
261  // Initialise dp algorithm.
262  // ----------------------------------------------------------------------------
263 
264  // Trim the sequences according to special band settings.
265  auto [trimmed_first_range, trimmed_second_range] =
266  this->trim_sequences(first_range, second_range, band);
267 
268  this->allocate_matrix(trimmed_first_range, trimmed_second_range, band);
269 
270  // Use default gap if not set from outside.
271  auto const & gap = cfg_ptr->template value_or<align_cfg::gap>(gap_scheme{gap_score{-1}, gap_open_score{-10}});
272 
273  // Initialise cache variables to keep frequently used variables close to the CPU registers.
274  auto cache = this->make_cache(gap);
275 
276  initialise_matrix(cache);
277 
278  // ----------------------------------------------------------------------------
279  // Compute the banded alignment.
280  // ----------------------------------------------------------------------------
281 
282  compute_banded_matrix(trimmed_first_range, trimmed_second_range, cache);
283 
284  // ----------------------------------------------------------------------------
285  // Cleanup and optionally compute the traceback.
286  // ----------------------------------------------------------------------------
287 
288  using result_t = typename align_result_selector<first_range_t, second_range_t, config_t>::type;
289  result_t res{};
290 
291  res.id = idx;
292  // Balance the score with possible leading/trailing gaps depending on the
293  // band settings.
294  this->balance_leading_gaps(get<3>(cache), band, gap);
295 
296  this->balance_trailing_gaps(get<3>(cache),
297  this->dimension_first_range,
298  this->dimension_second_range,
299  band,
300  gap);
301 
302  if constexpr (config_t::template exists<align_cfg::result<detail::with_score_type>>())
303  {
304  res.score = get<3>(cache).score;
305  }
306  if constexpr (config_t::template exists<align_cfg::result<with_back_coordinate_type>>())
307  {
308  res.score = get<3>(cache).score;
309  res.back_coordinate = this->map_banded_coordinate_to_range_position(get<3>(cache).coordinate);
310  }
311  if constexpr (config_t::template exists<align_cfg::result<with_front_coordinate_type>>())
312  { // At the moment we also compute the traceback even if only the front coordinate was requested.
313  // This can be later optimised by computing the reverse alignment in linear memory from the maximum.
314  // Especially for the SIMD version this might be more efficient.
315  res.score = get<3>(cache).score;
316  res.back_coordinate = this->map_banded_coordinate_to_range_position(get<3>(cache).coordinate);
317  res.front_coordinate =
318  get<0>(compute_traceback(first_range,
319  second_range,
320  get<3>(cache).coordinate));
321  }
322  if constexpr (config_t::template exists<align_cfg::result<with_alignment_type>>())
323  {
324  res.score = get<3>(cache).score;
325  res.back_coordinate = this->map_banded_coordinate_to_range_position(get<3>(cache).coordinate);
326  std::tie(res.front_coordinate, res.alignment) = compute_traceback(first_range,
327  second_range,
328  get<3>(cache).coordinate);
329  }
330  return alignment_result{res};
331  }
332 private:
333 
338  template <typename cache_t>
339  void initialise_matrix(cache_t & cache)
340  {
341  // Get the current dynamic programming matrix.
342  auto col = this->current_column();
343 
344  this->init_origin_cell(*std::ranges::begin(col), cache);
345 
346  ranges::for_each(col | ranges::view::drop_exactly(1), [&cache, this](auto && cell)
347  {
348  this->init_column_cell(std::forward<decltype(cell)>(cell), cache);
349  });
350 
351  auto [cell, coordinate, trace] = *std::ranges::prev(std::ranges::end(col));
352  (void) trace;
353  if constexpr (is_banded)
354  {
355  alignment_optimum current{get<0>(get<0>(std::move(cell))), static_cast<alignment_coordinate>(coordinate)};
356  this->check_score_last_row(current, get<3>(cache));
357  }
358  else
359  {
360  alignment_optimum current{get<0>(std::move(cell)), static_cast<alignment_coordinate>(coordinate)};
361  this->check_score_last_row(current, get<3>(cache));
362  }
363  }
364 
373  template <typename first_range_t,
374  typename second_range_t,
375  typename cache_t>
376  void compute_matrix(first_range_t & first_range,
377  second_range_t & second_range,
378  cache_t & cache)
379  {
380  using std::get;
381  auto const & score_scheme = get<align_cfg::scoring>(*cfg_ptr).value;
382  ranges::for_each(first_range, [&, this](auto seq1_value)
383  {
384  // Move internal matrix to next column.
385  this->go_next_column();
386 
387  auto col = this->current_column();
388  this->init_row_cell(*std::ranges::begin(col), cache);
389 
390  auto second_range_it = std::ranges::begin(second_range);
391  ranges::for_each(col | ranges::view::drop_exactly(1), [&, this] (auto && cell)
392  {
393  this->compute_cell(cell, cache, score_scheme.score(seq1_value, *second_range_it));
394  ++second_range_it;
395  });
396 
397  // Prepare last cell for tracking the optimum.
398  auto [cell, coordinate, trace] = *std::ranges::prev(std::ranges::end(col));
399  (void) trace;
400  alignment_optimum current{get<0>(std::move(cell)), static_cast<alignment_coordinate>(coordinate)};
401  this->check_score_last_row(current, get<3>(cache));
402  });
403 
404  // Prepare the last column for tracking the optimum: Only get the current score cell and the coordinate.
405  auto last_column_view = this->current_column() | std::view::transform([](auto && entry)
406  {
407  using std::get;
408  return std::tuple{get<0>(std::forward<decltype(entry)>(entry)),
409  get<1>(std::forward<decltype(entry)>(entry))};
410  });
411  this->check_score_last_column(last_column_view, get<3>(cache));
412  }
413 
422  template <typename first_range_t,
423  typename second_range_t,
424  typename cache_t>
425  void compute_banded_matrix(first_range_t & first_range,
426  second_range_t & second_range,
427  cache_t & cache)
428  {
429  auto const & score_scheme = get<align_cfg::scoring>(*cfg_ptr).value;
430  // ----------------------------------------------------------------------------
431  // 1st phase: Iterate as long as the band intersects with the first row.
432  // ----------------------------------------------------------------------------
433 
434  ranges::for_each(first_range | view::take_exactly(this->band_column_index), [&, this](auto first_range_value)
435  {
436  this->go_next_column(); // Move to the next column.
437  auto col = this->current_column();
438  this->init_row_cell(*std::ranges::begin(col), cache); // initialise first row of dp matrix.
439 
440  auto second_range_it = std::ranges::begin(second_range);
441  ranges::for_each(col | ranges::view::drop_exactly(1), [&, this] (auto && cell)
442  {
443  this->compute_cell(std::forward<decltype(cell)>(cell),
444  cache,
445  score_scheme.score(first_range_value, *second_range_it));
446  ++second_range_it;
447  });
448 
449  if (this->band_touches_last_row()) // TODO [[unlikely]]
450  {
451  auto [cell, coordinate, trace] = *std::ranges::prev(std::ranges::end(col));
452  (void) trace;
453  alignment_optimum current{get<0>(get<0>(std::move(cell))),
454  static_cast<alignment_coordinate>(coordinate)};
455  this->check_score_last_row(current, get<3>(cache));
456  }
457  });
458 
459  // ----------------------------------------------------------------------------
460  // 2nd phase: Iterate until the end of the matrix.
461  // ----------------------------------------------------------------------------
462 
463  // Drop the first columns from the 1st phase.
464  ranges::for_each(first_range | ranges::view::drop_exactly(this->band_column_index),
465  [&, this](auto first_range_value)
466  {
467  this->go_next_column(); // Move to the next column.
468  auto col = this->current_column();
469 
470  // Move the second_range_it to the correct position depending on the current band position.
471  auto second_range_it = std::ranges::begin(second_range);
472  std::ranges::advance(second_range_it, this->second_range_begin_offset());
473 
474  // Initialise the first cell of the current band.
475  this->compute_first_band_cell(*std::ranges::begin(col),
476  cache,
477  score_scheme.score(first_range_value, *second_range_it));
478 
479  // Process rest of current column band.
480  ++second_range_it;
481  ranges::for_each(col | ranges::view::drop_exactly(1), [&, this] (auto && cell)
482  {
483  this->compute_cell(std::forward<decltype(cell)>(cell),
484  cache,
485  score_scheme.score(first_range_value, *second_range_it));
486  ++second_range_it;
487  });
488 
489  if (this->band_touches_last_row()) // TODO [[unlikely]]
490  {
491  auto [cell, coordinate, trace] = *std::ranges::prev(std::ranges::end(col));
492  (void) trace;
493  alignment_optimum current{get<0>(get<0>(std::move(cell))),
494  static_cast<alignment_coordinate>(coordinate)};
495  this->check_score_last_row(current, get<3>(cache));
496  }
497  });
498  // Prepare the last column for tracking the optimum: Only get the current score cell and the coordinate.
499  auto last_column_view = this->current_column() | std::view::transform([](auto && entry) {
500  using std::get;
501  return std::tuple{get<0>(get<0>(std::forward<decltype(entry)>(entry))),
502  get<1>(std::forward<decltype(entry)>(entry))};
503  });
504  this->check_score_last_column(last_column_view, get<3>(cache));
505  }
506 
519  template <typename first_range_t, typename second_range_t>
520  auto compute_traceback(first_range_t & first_range,
521  second_range_t & second_range,
522  alignment_coordinate back_coordinate)
523  {
524  // Parse the traceback
525  auto [front_coordinate, first_gap_segments, second_gap_segments] = this->parse_traceback(back_coordinate);
526 
527  using result_type = typename align_result_selector<first_range_t, second_range_t, config_t>::type;
528  using aligned_seq_type = decltype(result_type{}.alignment);
529 
530  // If we compute the traceback the aligned sequences must be provided.
532  {
533  auto fill_aligned_sequence = [] (auto & aligned_sequence, auto & gap_segments, size_t const normalise)
534  {
535  assert(std::ranges::empty(gap_segments) || normalise <= gap_segments[0].position);
536 
537  size_t offset = 0;
538  for (auto const & gap_elem : gap_segments)
539  {
540  auto it = std::ranges::begin(aligned_sequence);
541  std::ranges::advance(it, (gap_elem.position - normalise) + offset);
542  insert_gap(aligned_sequence, it, gap_elem.size);
543  offset += gap_elem.size;
544  }
545  };
546 
547  // In banded case we need to refine the back coordinate to map to the correct position within the
548  // second range.
549  if constexpr (is_banded)
550  back_coordinate = this->map_banded_coordinate_to_range_position(back_coordinate);
551 
552  // Get the subrange over the first sequence according to the front and back coordinate.
553  auto first_subrange = view::slice(first_range, front_coordinate.first, back_coordinate.first);
554 
555  // Get the subrange over the second sequence according to the front and back coordinate.
556  auto second_subrange = view::slice(second_range, front_coordinate.second, back_coordinate.second);
557 
558  // Create and fill the aligned_sequences.
559  aligned_seq_type aligned_seq;
560  assign_unaligned(std::get<0>(aligned_seq), first_subrange);
561  assign_unaligned(std::get<1>(aligned_seq), second_subrange);
562  fill_aligned_sequence(std::get<0>(aligned_seq), first_gap_segments, front_coordinate.first);
563  fill_aligned_sequence(std::get<1>(aligned_seq), second_gap_segments, front_coordinate.second);
564 
565  return std::tuple{front_coordinate, aligned_seq};
566  }
567  else
568  {
569  return std::tuple{front_coordinate, std::ignore};
570  }
571  }
572 
574  std::shared_ptr<config_t> cfg_ptr{};
575 };
576 
577 } // namespace seqan3::detail
Provides seqan3::view::get.
::ranges::distance distance
Alias for ranges::distance. Returns the number of hops from first to last.
Definition: iterator:321
void assign_unaligned(aligned_seq_t &aligned_seq, unaligned_sequence_type &&unaligned_seq)
An implementation of seqan3::AlignedSequence::assign_unaligned_sequence for sequence containers...
Definition: aligned_sequence_concept.hpp:345
aligned_seq_t::iterator insert_gap(aligned_seq_t &aligned_seq, typename aligned_seq_t::const_iterator pos_it)
An implementation of seqan3::AlignedSequence::insert_gap for sequence containers. ...
Definition: aligned_sequence_concept.hpp:229
::ranges::prev prev
Alias for ranges::prev. Returns the nth predecessor of the given iterator.
Definition: iterator:326
T tie(T... args)
Meta-header for the alignment configuration module .
Includes the AlignedSequence and the related insert_gap and erase_gap functions to enable stl contain...
Provides C++20 additions to the <iterator> header.
Provides seqan3::gap_scheme.
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:144
Provides seqan3::detail::deferred_crtp_base.
Provides seqan3::detail::unbanded_score_dp_matrix_policy.
Provides seqan3::TupleLike.
auto constexpr take_exactly
A view adaptor that returns the first size elements from the underlying range (or less if the underly...
Definition: take_exactly.hpp:80
The Concepts library.
Provides seqan3::detail::affine_gap_init_policy.
Adaptations of concepts from the Ranges TS.
::ranges::begin begin
Alias for ranges::begin. Returns an iterator to the beginning of a range.
Definition: ranges:174
::ranges::advance advance
Alias for ranges::advance. Advances the iterator by the given distance.
Definition: iterator:316
Provides seqan3::detail::affine_gap_policy.
Definition: aligned_sequence_concept.hpp:35
auto const get
A view calling std::get on each element in a range.
Definition: get.hpp:66
Provides seqan3::detail::align_result_selector.
Provides seqan3::scoring_scheme_base.
Provides seqan3::view::slice.
::ranges::empty empty
Alias for ranges::empty. Checks whether a range is empty.
Definition: ranges:194
Includes customized exception types for the alignment module .
T forward(T... args)
::ranges::end end
Alias for ranges::end. Returns an iterator to the end of a range.
Definition: ranges:179
constexpr auto transform
A range adaptor that takes a invocable and returns a view of the elements with the invocable applied...
Definition: ranges:911
Provides seqan3::debug_stream and related types.
Provides seqan3::view::take_exactly and seqan3::view::take_exactly_or_throw.
Whether a type behaves like a tuple.