17 #include <type_traits>
42 namespace seqan3::detail
78 template <
typename config_t,
typename ...algorithm_policies_t>
79 class alignment_algorithm :
80 public invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>...
85 using traits_t = alignment_configuration_traits<config_t>;
87 using alignment_column_t = decltype(std::declval<alignment_algorithm>().current_alignment_column());
89 using alignment_column_iterator_t = std::ranges::iterator_t<alignment_column_t>;
91 using alignment_result_t =
typename traits_t::alignment_result_type;
93 static_assert(!std::same_as<alignment_result_t, empty_type>,
"Alignment result type was not configured.");
96 using score_debug_matrix_t =
98 two_dimensional_matrix<std::optional<typename traits_t::original_score_type>,
100 matrix_major_order::column>,
103 using trace_debug_matrix_t =
105 two_dimensional_matrix<std::optional<trace_directions>,
107 matrix_major_order::column>,
114 constexpr alignment_algorithm() =
default;
115 constexpr alignment_algorithm(alignment_algorithm
const &) =
default;
116 constexpr alignment_algorithm(alignment_algorithm &&) =
default;
117 constexpr alignment_algorithm & operator=(alignment_algorithm
const &) =
default;
118 constexpr alignment_algorithm & operator=(alignment_algorithm &&) =
default;
119 ~alignment_algorithm() =
default;
129 explicit constexpr alignment_algorithm(config_t
const & cfg) :
130 invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>{cfg}...,
131 cfg_ptr{std::make_shared<config_t>(cfg)}
133 this->scoring_scheme = seqan3::get<align_cfg::scoring_scheme>(*cfg_ptr).scheme;
134 this->initialise_alignment_state(*cfg_ptr);
183 template <indexed_sequence_pair_range indexed_sequence_pairs_t,
typename callback_t>
185 requires (!traits_t::is_vectorised) && std::invocable<callback_t, alignment_result_t>
187 void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
191 for (
auto && [sequence_pair, idx] : indexed_sequence_pairs)
192 compute_single_pair(idx, get<0>(sequence_pair), get<1>(sequence_pair), callback);
196 template <indexed_sequence_pair_range indexed_sequence_pairs_t,
typename callback_t>
198 requires traits_t::is_vectorised && std::invocable<callback_t, alignment_result_t>
200 void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
202 assert(cfg_ptr !=
nullptr);
204 static_assert(simd_concept<typename traits_t::score_type>,
"Expected simd score type.");
205 static_assert(simd_concept<typename traits_t::trace_type>,
"Expected simd trace type.");
208 auto sequence1_range = indexed_sequence_pairs | views::get<0> | views::get<0>;
209 auto sequence2_range = indexed_sequence_pairs | views::get<0> | views::get<1>;
212 this->initialise_find_optimum_policy(sequence1_range,
214 this->scoring_scheme.padding_match_score());
217 auto simd_sequences1 = convert_batch_of_sequences_to_simd_vector(sequence1_range);
218 auto simd_sequences2 = convert_batch_of_sequences_to_simd_vector(sequence2_range);
220 max_size_in_collection =
std::pair{simd_sequences1.size(), simd_sequences2.size()};
222 this->alignment_state.reset_optimum();
224 compute_matrix(simd_sequences1, simd_sequences2);
226 make_alignment_result(indexed_sequence_pairs, callback);
244 template <
typename sequence_range_t>
245 constexpr
auto convert_batch_of_sequences_to_simd_vector(sequence_range_t & sequences)
247 assert(
static_cast<size_t>(std::ranges::distance(sequences)) <= traits_t::alignments_per_vector);
249 using simd_score_t =
typename traits_t::score_type;
251 std::vector<simd_score_t, aligned_allocator<simd_score_t,
alignof(simd_score_t)>> simd_sequence{};
253 for (
auto && simd_vector_chunk : sequences | views::to_simd<simd_score_t>(traits_t::padding_symbol))
254 for (
auto && simd_vector : simd_vector_chunk)
257 return simd_sequence;
277 template <std::ranges::forward_range sequence1_t,
278 std::ranges::forward_range sequence2_t,
280 constexpr
void compute_single_pair(
size_t const idx,
281 sequence1_t && sequence1,
282 sequence2_t && sequence2,
283 callback_t & callback)
285 assert(cfg_ptr !=
nullptr);
287 if constexpr (traits_t::is_debug)
288 initialise_debug_matrices(sequence1, sequence2);
291 this->alignment_state.reset_optimum();
293 if constexpr (traits_t::is_banded)
297 auto const & band = get<align_cfg::band_fixed_size>(*cfg_ptr);
298 check_valid_band_parameter(sequence1, sequence2, band);
299 auto && [subsequence1, subsequence2] = this->slice_sequences(sequence1, sequence2, band);
301 compute_matrix(subsequence1, subsequence2, band);
302 make_alignment_result(idx, subsequence1, subsequence2, callback);
306 compute_matrix(sequence1, sequence2);
307 make_alignment_result(idx, sequence1, sequence2, callback);
327 template <
typename sequence1_t,
typename sequence2_t>
328 constexpr
void check_valid_band_parameter(sequence1_t && sequence1,
329 sequence2_t && sequence2,
330 align_cfg::band_fixed_size
const & band)
332 static_assert(config_t::template exists<align_cfg::band_fixed_size>(),
333 "The band configuration is required for the banded alignment algorithm.");
336 static_assert(std::is_signed_v<diff_type>,
"Only signed types can be used to test the band parameters.");
338 if (
static_cast<diff_type
>(band.lower_diagonal) > std::ranges::distance(sequence1))
340 throw invalid_alignment_configuration
342 "Invalid band error: The lower diagonal excludes the whole alignment matrix."
346 if (
static_cast<diff_type
>(band.upper_diagonal) < -std::ranges::distance(sequence2))
348 throw invalid_alignment_configuration
350 "Invalid band error: The upper diagonal excludes the whole alignment matrix."
367 template <
typename sequence1_t,
typename sequence2_t>
368 constexpr
void initialise_debug_matrices(sequence1_t & sequence1, sequence2_t & sequence2)
370 size_t rows = std::ranges::distance(sequence2) + 1;
371 size_t cols = std::ranges::distance(sequence1) + 1;
373 score_debug_matrix = score_debug_matrix_t{number_rows{rows}, number_cols{cols}};
374 trace_debug_matrix = trace_debug_matrix_t{number_rows{rows}, number_cols{cols}};
384 template <
typename sequence1_t,
typename sequence2_t>
385 void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2)
387 requires (!traits_t::is_banded)
394 this->allocate_matrix(sequence1, sequence2);
395 initialise_first_alignment_column(sequence2);
401 for (
auto const & seq1_value : sequence1)
403 compute_alignment_column<true>(seq1_value, sequence2);
404 finalise_last_cell_in_column(
true);
411 finalise_alignment();
415 template <
typename sequence1_t,
typename sequence2_t>
416 void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2, align_cfg::band_fixed_size
const & band)
418 requires traits_t::is_banded
426 this->allocate_matrix(sequence1, sequence2, band, this->alignment_state);
427 size_t last_row_index = this->score_matrix.band_row_index;
428 initialise_first_alignment_column(sequence2 |
views::take(last_row_index));
434 size_t sequence2_size = std::ranges::distance(sequence2);
435 for (
auto const & seq1_value : sequence1 |
views::take(this->score_matrix.band_col_index))
437 compute_alignment_column<true>(seq1_value, sequence2 |
views::take(++last_row_index));
439 finalise_last_cell_in_column(last_row_index >= sequence2_size);
446 size_t first_row_index = 0;
447 for (
auto const & seq1_value : sequence1 |
views::drop(this->score_matrix.band_col_index))
450 compute_alignment_column<false>(seq1_value, sequence2 |
views::slice(first_row_index++, ++last_row_index));
452 finalise_last_cell_in_column(last_row_index >= sequence2_size);
459 finalise_alignment();
474 template <
typename sequence2_t>
475 auto initialise_first_alignment_column(sequence2_t && sequence2)
478 alignment_column = this->current_alignment_column();
479 assert(!alignment_column.empty());
482 alignment_column_it = alignment_column.begin();
483 this->init_origin_cell(*alignment_column_it, this->alignment_state);
486 for (
auto it = std::ranges::begin(sequence2); it != std::ranges::end(sequence2); ++it)
487 this->init_column_cell(*++alignment_column_it, this->alignment_state);
490 bool at_last_row =
true;
491 if constexpr (traits_t::is_banded)
492 at_last_row =
static_cast<size_t>(this->score_matrix.band_row_index) == this->score_matrix.num_rows - 1;
494 finalise_last_cell_in_column(at_last_row);
512 template <
bool initialise_first_cell,
typename sequence1_value_t,
typename sequence2_t>
513 void compute_alignment_column(sequence1_value_t
const & seq1_value, sequence2_t && sequence2)
515 this->next_alignment_column();
516 alignment_column = this->current_alignment_column();
517 alignment_column_it = alignment_column.begin();
521 if constexpr (initialise_first_cell)
523 this->init_row_cell(*alignment_column_it, this->alignment_state);
527 this->compute_first_band_cell(*alignment_column_it,
528 this->alignment_state,
529 this->scoring_scheme.score(seq1_value, *seq2_it));
533 for (; seq2_it != std::ranges::end(sequence2); ++seq2_it)
534 this->compute_cell(*++alignment_column_it,
535 this->alignment_state,
536 this->scoring_scheme.score(seq1_value, *seq2_it));
549 constexpr
void finalise_last_cell_in_column(
bool const at_last_row) noexcept
552 this->check_score_of_last_row_cell(*alignment_column_it, this->alignment_state);
554 if constexpr (traits_t::is_debug)
555 dump_alignment_column();
559 constexpr
void finalise_alignment() noexcept
565 this->check_score_of_cells_in_last_column(alignment_column, this->alignment_state);
566 this->check_score_of_last_cell(*alignment_column_it, this->alignment_state);
595 template <
typename index_t,
typename sequence1_t,
typename sequence2_t,
typename callback_t>
597 requires (!traits_t::is_vectorised)
599 constexpr
void make_alignment_result([[maybe_unused]] index_t
const idx,
600 [[maybe_unused]] sequence1_t & sequence1,
601 [[maybe_unused]] sequence2_t & sequence2,
602 callback_t & callback)
604 using result_value_t =
typename alignment_result_value_type_accessor<alignment_result_t>::type;
610 static_assert(seqan3::detail::alignment_configuration_traits<config_t>::has_output_configuration,
611 "The configuration must contain at least one align_cfg::output_* element.");
613 result_value_t res{};
615 if constexpr (traits_t::output_sequence1_id)
616 res.sequence1_id = idx;
618 if constexpr (traits_t::output_sequence2_id)
619 res.sequence2_id = idx;
622 if constexpr (traits_t::compute_score)
623 res.score = this->alignment_state.optimum.score;
625 if constexpr (traits_t::compute_end_positions)
627 res.end_positions = alignment_coordinate{column_index_type{this->alignment_state.optimum.column_index},
628 row_index_type{this->alignment_state.optimum.row_index}};
630 if constexpr (traits_t::is_banded)
631 res.end_positions.second += res.end_positions.first - this->trace_matrix.band_col_index;
634 if constexpr (traits_t::compute_begin_positions)
637 aligned_sequence_builder builder{sequence1, sequence2};
638 auto optimum_coordinate = alignment_coordinate{column_index_type{this->alignment_state.optimum.column_index},
639 row_index_type{this->alignment_state.optimum.row_index}};
640 auto trace_res = builder(this->trace_matrix.trace_path(optimum_coordinate));
641 res.begin_positions.first = trace_res.first_sequence_slice_positions.first;
642 res.begin_positions.second = trace_res.second_sequence_slice_positions.first;
644 if constexpr (traits_t::compute_sequence_alignment)
645 res.alignment =
std::move(trace_res.alignment);
649 if constexpr (traits_t::is_debug)
651 res.score_debug_matrix =
std::move(score_debug_matrix);
652 if constexpr (traits_t::compute_sequence_alignment)
653 res.trace_debug_matrix =
std::move(trace_debug_matrix);
684 template <
typename indexed_sequence_pair_range_t,
typename callback_t>
686 requires traits_t::is_vectorised
688 constexpr
auto make_alignment_result(indexed_sequence_pair_range_t && index_sequence_pairs,
689 callback_t & callback)
691 using result_value_t =
typename alignment_result_value_type_accessor<alignment_result_t>::type;
693 size_t simd_index = 0;
694 for (
auto && [sequence_pairs, alignment_index] : index_sequence_pairs)
696 (void) sequence_pairs;
697 result_value_t res{};
699 if constexpr (traits_t::output_sequence1_id)
700 res.sequence1_id = alignment_index;
702 if constexpr (traits_t::output_sequence2_id)
703 res.sequence2_id = alignment_index;
705 if constexpr (traits_t::compute_score)
706 res.score = this->alignment_state.optimum.score[simd_index];
708 if constexpr (traits_t::compute_end_positions)
710 res.end_positions.first = this->alignment_state.optimum.column_index[simd_index];
711 res.end_positions.second = this->alignment_state.optimum.row_index[simd_index];
727 void dump_alignment_column()
731 auto column = this->current_alignment_column();
733 auto coord = get<1>(column.front()).coordinate;
734 if constexpr (traits_t::is_banded)
735 coord.second += coord.first - this->score_matrix.band_col_index;
737 matrix_offset offset{row_index_type{
static_cast<std::ptrdiff_t>(coord.second)},
743 return get<0>(tpl).current;
744 }), score_debug_matrix.begin() + offset);
747 if constexpr (traits_t::compute_sequence_alignment)
749 auto trace_matrix_it = trace_debug_matrix.begin() + offset;
753 return get<1>(tpl).current;
754 }), trace_debug_matrix.begin() + offset);
761 alignment_column_t alignment_column{};
763 alignment_column_iterator_t alignment_column_it{};
765 score_debug_matrix_t score_debug_matrix{};
767 trace_debug_matrix_t trace_debug_matrix{};