17 #include <type_traits>
42 namespace seqan3::detail
78 template <
typename config_t,
typename ...algorithm_policies_t>
79 class alignment_algorithm :
80 public invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>...
85 using traits_t = alignment_configuration_traits<config_t>;
87 using alignment_column_t = decltype(std::declval<alignment_algorithm>().current_alignment_column());
89 using alignment_column_iterator_t = std::ranges::iterator_t<alignment_column_t>;
92 using score_debug_matrix_t =
94 two_dimensional_matrix<std::optional<typename traits_t::original_score_t>,
96 matrix_major_order::column>,
99 using trace_debug_matrix_t =
101 two_dimensional_matrix<std::optional<trace_directions>,
103 matrix_major_order::column>,
110 constexpr alignment_algorithm() =
default;
111 constexpr alignment_algorithm(alignment_algorithm
const &) =
default;
112 constexpr alignment_algorithm(alignment_algorithm &&) =
default;
113 constexpr alignment_algorithm & operator=(alignment_algorithm
const &) =
default;
114 constexpr alignment_algorithm & operator=(alignment_algorithm &&) =
default;
115 ~alignment_algorithm() =
default;
125 explicit constexpr alignment_algorithm(config_t
const & cfg) : cfg_ptr{std::make_shared<config_t>(cfg)}
127 this->
scoring_scheme = seqan3::get<align_cfg::scoring>(*cfg_ptr).value;
128 this->initialise_alignment_state(*cfg_ptr);
176 template <indexed_sequence_pair_range indexed_sequence_pairs_t>
178 requires !traits_t::is_vectorised
180 auto operator()(indexed_sequence_pairs_t && indexed_sequence_pairs)
182 using sequence_pairs_t = std::tuple_element_t<0, std::ranges::range_value_t<indexed_sequence_pairs_t>>;
183 using sequence1_t = std::tuple_element_t<0, sequence_pairs_t>;
184 using sequence2_t = std::tuple_element_t<1, sequence_pairs_t>;
185 using result_t =
typename align_result_selector<sequence1_t, sequence2_t, config_t>::type;
190 for (
auto && [sequence_pair, idx] : indexed_sequence_pairs)
191 results.
emplace_back(compute_single_pair(idx, get<0>(sequence_pair), get<1>(sequence_pair)));
197 template <indexed_sequence_pair_range indexed_sequence_pairs_t>
198 auto operator()(indexed_sequence_pairs_t && indexed_sequence_pairs)
200 requires traits_t::is_vectorised
203 assert(cfg_ptr !=
nullptr);
205 static_assert(simd_concept<typename traits_t::score_t>,
"Expected simd score type.");
206 static_assert(simd_concept<typename traits_t::trace_t>,
"Expected simd trace type.");
209 auto sequence1_range = indexed_sequence_pairs | views::get<0> | views::get<0>;
210 auto sequence2_range = indexed_sequence_pairs | views::get<0> | views::get<1>;
213 this->initialise_find_optimum_policy(sequence1_range,
218 auto simd_sequences1 = convert_batch_of_sequences_to_simd_vector(sequence1_range);
219 auto simd_sequences2 = convert_batch_of_sequences_to_simd_vector(sequence2_range);
221 max_size_in_collection =
std::pair{simd_sequences1.size(), simd_sequences2.size()};
223 this->alignment_state.reset_optimum();
225 compute_matrix(simd_sequences1, simd_sequences2);
227 return make_alignment_result(indexed_sequence_pairs);
245 template <
typename sequence_range_t>
246 constexpr
auto convert_batch_of_sequences_to_simd_vector(sequence_range_t & sequences)
248 assert(static_cast<size_t>(std::ranges::distance(sequences)) <= traits_t::alignments_per_vector);
250 using simd_score_t =
typename traits_t::score_t;
252 std::vector<simd_score_t, aligned_allocator<simd_score_t,
alignof(simd_score_t)>> simd_sequence{};
254 for (
auto && simd_vector_chunk : sequences | views::to_simd<simd_score_t>(traits_t::padding_symbol))
255 for (
auto && simd_vector : simd_vector_chunk)
258 return simd_sequence;
278 template <std::ranges::forward_range sequence1_t, std::ranges::forward_range sequence2_t>
279 constexpr
auto compute_single_pair(
size_t const idx, sequence1_t && sequence1, sequence2_t && sequence2)
281 assert(cfg_ptr !=
nullptr);
283 if constexpr (traits_t::is_debug)
284 initialise_debug_matrices(sequence1, sequence2);
287 this->alignment_state.reset_optimum();
289 if constexpr (traits_t::is_banded)
292 auto const & band = seqan3::get<align_cfg::band>(*cfg_ptr).value;
293 check_valid_band_parameter(sequence1, sequence2, band);
294 auto && [subsequence1, subsequence2] = this->slice_sequences(sequence1, sequence2, band);
296 compute_matrix(subsequence1, subsequence2, band);
297 return make_alignment_result(idx, subsequence1, subsequence2);
301 compute_matrix(sequence1, sequence2);
302 return make_alignment_result(idx, sequence1, sequence2);
322 template <
typename sequence1_t,
typename sequence2_t>
323 constexpr
void check_valid_band_parameter(sequence1_t && sequence1,
324 sequence2_t && sequence2,
325 static_band
const & band)
327 static_assert(config_t::template exists<align_cfg::band>(),
328 "The band configuration is required for the banded alignment algorithm.");
331 static_assert(std::is_signed_v<diff_type>,
"Only signed types can be used to test the band parameters.");
333 if (static_cast<diff_type>(band.lower_bound) > std::ranges::distance(sequence1))
335 throw invalid_alignment_configuration
337 "Invalid band error: The lower bound excludes the whole alignment matrix."
341 if (static_cast<diff_type>(band.upper_bound) < -std::ranges::distance(sequence2))
343 throw invalid_alignment_configuration
345 "Invalid band error: The upper bound excludes the whole alignment matrix."
362 template <
typename sequence1_t,
typename sequence2_t>
363 constexpr
void initialise_debug_matrices(sequence1_t & sequence1, sequence2_t & sequence2)
365 size_t rows = std::ranges::distance(sequence2) + 1;
366 size_t cols = std::ranges::distance(sequence1) + 1;
368 score_debug_matrix = score_debug_matrix_t{number_rows{rows}, number_cols{cols}};
369 trace_debug_matrix = trace_debug_matrix_t{number_rows{rows}, number_cols{cols}};
379 template <
typename sequence1_t,
typename sequence2_t>
380 void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2)
382 requires !traits_t::is_banded
389 this->allocate_matrix(sequence1, sequence2);
390 initialise_first_alignment_column(sequence2);
396 for (
auto const & seq1_value : sequence1)
398 compute_alignment_column<true>(seq1_value, sequence2);
399 finalise_last_cell_in_column(
true);
406 finalise_alignment();
410 template <
typename sequence1_t,
typename sequence2_t>
411 void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2, static_band
const & band)
413 requires traits_t::is_banded
421 this->allocate_matrix(sequence1, sequence2, band, this->alignment_state);
422 size_t last_row_index = this->score_matrix.band_row_index;
423 initialise_first_alignment_column(sequence2 |
views::take(last_row_index));
429 size_t sequence2_size = std::ranges::distance(sequence2);
430 for (
auto const & seq1_value : sequence1 |
views::take(this->score_matrix.band_col_index))
432 compute_alignment_column<true>(seq1_value, sequence2 |
views::take(++last_row_index));
434 finalise_last_cell_in_column(last_row_index >= sequence2_size);
441 size_t first_row_index = 0;
442 for (
auto const & seq1_value : sequence1 |
views::drop(this->score_matrix.band_col_index))
445 compute_alignment_column<false>(seq1_value, sequence2 |
views::slice(first_row_index++, ++last_row_index));
447 finalise_last_cell_in_column(last_row_index >= sequence2_size);
454 finalise_alignment();
469 template <
typename sequence2_t>
470 auto initialise_first_alignment_column(sequence2_t && sequence2)
473 alignment_column = this->current_alignment_column();
474 assert(!alignment_column.empty());
477 alignment_column_it = alignment_column.begin();
478 this->init_origin_cell(*alignment_column_it, this->alignment_state);
481 for (
auto it = std::ranges::begin(sequence2); it != std::ranges::end(sequence2); ++it)
482 this->init_column_cell(*++alignment_column_it, this->alignment_state);
485 bool at_last_row =
true;
486 if constexpr (traits_t::is_banded)
487 at_last_row = static_cast<size_t>(this->score_matrix.band_row_index) == this->score_matrix.num_rows - 1;
489 finalise_last_cell_in_column(at_last_row);
507 template <
bool initialise_first_cell,
typename sequence1_value_t,
typename sequence2_t>
508 void compute_alignment_column(sequence1_value_t
const & seq1_value, sequence2_t && sequence2)
510 this->next_alignment_column();
511 alignment_column = this->current_alignment_column();
512 alignment_column_it = alignment_column.begin();
516 if constexpr (initialise_first_cell)
518 this->init_row_cell(*alignment_column_it, this->alignment_state);
522 this->compute_first_band_cell(*alignment_column_it,
523 this->alignment_state,
528 for (; seq2_it != std::ranges::end(sequence2); ++seq2_it)
529 this->compute_cell(*++alignment_column_it,
530 this->alignment_state,
544 constexpr
void finalise_last_cell_in_column(
bool const at_last_row) noexcept
547 this->check_score_of_last_row_cell(*alignment_column_it, this->alignment_state);
549 if constexpr (traits_t::is_debug)
550 dump_alignment_column();
554 constexpr
void finalise_alignment() noexcept
560 this->check_score_of_cells_in_last_column(alignment_column, this->alignment_state);
561 this->check_score_of_last_cell(*alignment_column_it, this->alignment_state);
588 template <
typename index_t,
typename sequence1_t,
typename sequence2_t>
590 requires !traits_t::is_vectorised
592 constexpr
auto make_alignment_result(index_t
const idx,
593 sequence1_t & sequence1,
594 sequence2_t & sequence2)
600 static_assert(config_t::template exists<align_cfg::result>(),
601 "The configuration must contain an align_cfg::result element.");
603 using result_value_t =
typename align_result_selector<sequence1_t, sequence2_t, config_t>::type;
604 result_value_t res{};
609 if constexpr (traits_t::result_type_rank >= 0)
610 res.score = this->alignment_state.optimum.score;
612 if constexpr (traits_t::result_type_rank >= 1)
614 res.back_coordinate = alignment_coordinate{column_index_type{this->alignment_state.optimum.column_index},
615 row_index_type{this->alignment_state.optimum.row_index}};
617 if constexpr (traits_t::is_banded)
618 res.back_coordinate.second += res.back_coordinate.first - this->trace_matrix.band_col_index;
621 if constexpr (traits_t::result_type_rank >= 2)
624 aligned_sequence_builder builder{sequence1, sequence2};
625 auto optimum_coordinate = alignment_coordinate{column_index_type{this->alignment_state.optimum.column_index},
626 row_index_type{this->alignment_state.optimum.row_index}};
627 auto trace_res = builder(this->trace_matrix.trace_path(optimum_coordinate));
628 res.front_coordinate.first = trace_res.first_sequence_slice_positions.first;
629 res.front_coordinate.second = trace_res.second_sequence_slice_positions.first;
631 if constexpr (traits_t::result_type_rank == 3)
636 if constexpr (traits_t::is_debug)
638 res.score_debug_matrix =
std::move(score_debug_matrix);
639 if constexpr (traits_t::result_type_rank == 3)
640 res.trace_debug_matrix =
std::
move(trace_debug_matrix);
669 template <typename indexed_sequence_pair_range_t>
671 requires traits_t::is_vectorised
673 constexpr auto make_alignment_result(indexed_sequence_pair_range_t && index_sequence_pairs)
675 using indexed_sequence_pair_t = std::ranges::range_value_t<indexed_sequence_pair_range_t>;
676 using sequence_pair_t = std::tuple_element_t<0, indexed_sequence_pair_t>;
677 using sequence1_t = std::tuple_element_t<0, sequence_pair_t>;
678 using sequence2_t = std::tuple_element_t<1, sequence_pair_t>;
680 using result_value_t =
typename align_result_selector<sequence1_t, sequence2_t, config_t>::type;
683 results.
reserve(std::ranges::distance(index_sequence_pairs));
685 size_t simd_index = 0;
686 for (
auto && [sequence_pairs, alignment_index] : index_sequence_pairs)
688 (void) sequence_pairs;
689 result_value_t res{};
690 res.id = alignment_index;
692 res.score = this->alignment_state.optimum.score[simd_index];
694 if constexpr (traits_t::result_type_rank >= 1)
696 res.back_coordinate.first = this->alignment_state.optimum.column_index[simd_index] ;
697 res.back_coordinate.second = this->alignment_state.optimum.row_index[simd_index];
715 void dump_alignment_column()
719 auto column = this->current_alignment_column();
721 auto coord = get<1>(column.front()).coordinate;
722 if constexpr (traits_t::is_banded)
723 coord.second += coord.first - this->score_matrix.band_col_index;
725 matrix_offset
offset{row_index_type{static_cast<std::ptrdiff_t>(coord.second)},
726 column_index_type{static_cast<std::ptrdiff_t>(coord.first)}};
731 return get<0>(tpl).current;
732 }), score_debug_matrix.begin() +
offset);
735 if constexpr (config_t::template exists<align_cfg::result<with_alignment_type>>())
737 auto trace_matrix_it = trace_debug_matrix.begin() +
offset;
741 return get<1>(tpl).current;
742 }), trace_debug_matrix.begin() +
offset);
749 alignment_column_t alignment_column{};
751 alignment_column_iterator_t alignment_column_it{};
753 score_debug_matrix_t score_debug_matrix{};
755 trace_debug_matrix_t trace_debug_matrix{};