SeqAn3 3.1.0
The Modern C++ library for sequence analysis.
pairwise_alignment_algorithm.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <seqan3/std/concepts>
16#include <seqan3/std/ranges>
17
24
25namespace seqan3::detail
26{
27
44template <typename alignment_configuration_t, typename ...policies_t>
46 requires is_type_specialisation_of_v<alignment_configuration_t, configuration>
48class pairwise_alignment_algorithm : protected policies_t...
49{
50protected:
52 using traits_type = alignment_configuration_traits<alignment_configuration_t>;
54 using score_type = typename traits_type::score_type;
56 using alignment_result_type = typename traits_type::alignment_result_type;
57
58 static_assert(!std::same_as<alignment_result_type, empty_type>, "Alignment result type was not configured.");
59
60public:
64 pairwise_alignment_algorithm() = default;
65 pairwise_alignment_algorithm(pairwise_alignment_algorithm const &) = default;
66 pairwise_alignment_algorithm(pairwise_alignment_algorithm &&) = default;
67 pairwise_alignment_algorithm & operator=(pairwise_alignment_algorithm const &) = default;
68 pairwise_alignment_algorithm & operator=(pairwise_alignment_algorithm &&) = default;
69 ~pairwise_alignment_algorithm() = default;
70
78 pairwise_alignment_algorithm(alignment_configuration_t const & config) : policies_t(config)...
79 {}
81
127 template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
129 requires std::invocable<callback_t, alignment_result_type>
131 void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
132 {
133 using std::get;
134
135 for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
136 {
137 size_t const sequence1_size = std::ranges::distance(get<0>(sequence_pair));
138 size_t const sequence2_size = std::ranges::distance(get<1>(sequence_pair));
139
140 auto && [alignment_matrix, index_matrix] = this->acquire_matrices(sequence1_size, sequence2_size);
141
142 compute_matrix(get<0>(sequence_pair), get<1>(sequence_pair), alignment_matrix, index_matrix);
143 this->make_result_and_invoke(std::forward<decltype(sequence_pair)>(sequence_pair),
144 std::move(idx),
145 this->optimal_score,
146 this->optimal_coordinate,
147 alignment_matrix,
148 callback);
149 }
150 }
152
154 template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
156 requires traits_type::is_vectorised && std::invocable<callback_t, alignment_result_type>
158 auto operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
159 {
160 using simd_collection_t = std::vector<score_type, aligned_allocator<score_type, alignof(score_type)>>;
161 using original_score_t = typename traits_type::original_score_type;
162
163 // Extract the batch of sequences for the first and the second sequence.
164 auto seq1_collection = indexed_sequence_pairs | views::elements<0> | views::elements<0>;
165 auto seq2_collection = indexed_sequence_pairs | views::elements<0> | views::elements<1>;
166
167 this->initialise_tracker(seq1_collection, seq2_collection);
168
169 // Convert batch of sequences to sequence of simd vectors.
170 thread_local simd_collection_t simd_seq1_collection{};
171 thread_local simd_collection_t simd_seq2_collection{};
172
173 convert_batch_of_sequences_to_simd_vector(simd_seq1_collection,
174 seq1_collection,
175 this->scoring_scheme.padding_symbol);
176 convert_batch_of_sequences_to_simd_vector(simd_seq2_collection,
177 seq2_collection,
178 this->scoring_scheme.padding_symbol);
179
180 size_t const sequence1_size = std::ranges::distance(simd_seq1_collection);
181 size_t const sequence2_size = std::ranges::distance(simd_seq2_collection);
182
183 auto && [alignment_matrix, index_matrix] = this->acquire_matrices(sequence1_size, sequence2_size);
184
185 compute_matrix(simd_seq1_collection, simd_seq2_collection, alignment_matrix, index_matrix);
186
187 size_t index = 0;
188 for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
189 {
190 original_score_t score = this->optimal_score[index] -
191 (this->padding_offsets[index] * this->scoring_scheme.padding_match_score());
192 matrix_coordinate coordinate{row_index_type{size_t{this->optimal_coordinate.row[index]}},
193 column_index_type{size_t{this->optimal_coordinate.col[index]}}};
194 this->make_result_and_invoke(std::forward<decltype(sequence_pair)>(sequence_pair),
195 std::move(idx),
196 std::move(score),
197 std::move(coordinate),
198 alignment_matrix,
199 callback);
200 ++index;
201 }
202 }
203
204protected:
224 template <typename simd_sequence_t,
225 std::ranges::forward_range sequence_collection_t,
226 arithmetic padding_symbol_t>
228 requires std::ranges::output_range<simd_sequence_t, score_type>
230 void convert_batch_of_sequences_to_simd_vector(simd_sequence_t & simd_sequence,
231 sequence_collection_t & sequences,
232 padding_symbol_t const & padding_symbol)
233 {
234 assert(static_cast<size_t>(std::ranges::distance(sequences)) <= traits_type::alignments_per_vector);
235
236 simd_sequence.clear();
237 for (auto && simd_vector_chunk : sequences | views::to_simd<score_type>(padding_symbol))
238 std::ranges::move(simd_vector_chunk, std::cpp20::back_inserter(simd_sequence));
239 }
240
254 template <std::ranges::forward_range sequence1_t,
255 std::ranges::forward_range sequence2_t,
256 std::ranges::input_range alignment_matrix_t,
257 std::ranges::input_range index_matrix_t>
259 requires std::ranges::forward_range<std::ranges::range_reference_t<alignment_matrix_t>> &&
260 std::ranges::forward_range<std::ranges::range_reference_t<index_matrix_t>>
262 void compute_matrix(sequence1_t && sequence1,
263 sequence2_t && sequence2,
264 alignment_matrix_t && alignment_matrix,
265 index_matrix_t && index_matrix)
266 {
267 // ---------------------------------------------------------------------
268 // Initialisation phase: allocate memory and initialise first column.
269 // ---------------------------------------------------------------------
270
271 this->reset_optimum(); // Reset the tracker for the new alignment computation.
272
273 auto alignment_matrix_it = alignment_matrix.begin();
274 auto indexed_matrix_it = index_matrix.begin();
275
276 initialise_column(*alignment_matrix_it, *indexed_matrix_it, sequence2);
277
278 // ---------------------------------------------------------------------
279 // Iteration phase: compute column-wise the alignment matrix.
280 // ---------------------------------------------------------------------
281
282 for (auto alphabet1 : sequence1)
283 compute_column(*++alignment_matrix_it,
284 *++indexed_matrix_it,
285 this->scoring_scheme_profile_column(alphabet1),
286 sequence2);
287
288 // ---------------------------------------------------------------------
289 // Final phase: track score of last column
290 // ---------------------------------------------------------------------
291
292 auto && alignment_column = *alignment_matrix_it;
293 auto && cell_index_column = *indexed_matrix_it;
294
295 auto alignment_column_it = alignment_column.begin();
296 auto cell_index_column_it = cell_index_column.begin();
297
298 this->track_last_column_cell(*alignment_column_it, *cell_index_column_it);
299
300 for ([[maybe_unused]] auto && unused : sequence2)
301 this->track_last_column_cell(*++alignment_column_it, *++cell_index_column_it);
302
303 this->track_final_cell(*alignment_column_it, *cell_index_column_it);
304 }
305
324 template <std::ranges::input_range alignment_column_t,
325 std::ranges::input_range cell_index_column_t,
326 std::ranges::input_range sequence2_t>
327 void initialise_column(alignment_column_t && alignment_column,
328 cell_index_column_t && cell_index_column,
329 sequence2_t && sequence2)
330 {
331 // ---------------------------------------------------------------------
332 // Initial phase: prepare column and initialise first cell
333 // ---------------------------------------------------------------------
334
335 auto first_column_it = alignment_column.begin();
336 auto cell_index_column_it = cell_index_column.begin();
337 *first_column_it = this->track_cell(this->initialise_origin_cell(), *cell_index_column_it);
338
339 // ---------------------------------------------------------------------
340 // Iteration phase: iterate over column and compute each cell
341 // ---------------------------------------------------------------------
342
343 for ([[maybe_unused]] auto const & unused : sequence2)
344 {
345 ++first_column_it;
346 *first_column_it = this->track_cell(this->initialise_first_column_cell(*first_column_it),
347 *++cell_index_column_it);
348 }
349
350 // ---------------------------------------------------------------------
351 // Final phase: track last cell of initial column
352 // ---------------------------------------------------------------------
353
354 this->track_last_row_cell(*first_column_it, *cell_index_column_it);
355 }
356
375 template <std::ranges::input_range alignment_column_t,
376 std::ranges::input_range cell_index_column_t,
377 typename alphabet1_t,
378 std::ranges::input_range sequence2_t>
380 requires semialphabet<alphabet1_t> || simd_concept<alphabet1_t>
382 void compute_column(alignment_column_t && alignment_column,
383 cell_index_column_t && cell_index_column,
384 alphabet1_t const & alphabet1,
385 sequence2_t && sequence2)
386 {
387 using score_type = typename traits_type::score_type;
388
389 // ---------------------------------------------------------------------
390 // Initial phase: prepare column and initialise first cell
391 // ---------------------------------------------------------------------
392
393 auto alignment_column_it = alignment_column.begin();
394 auto cell_index_column_it = cell_index_column.begin();
395
396 auto cell = *alignment_column_it;
397 score_type diagonal = cell.best_score();
398 *alignment_column_it = this->track_cell(this->initialise_first_row_cell(cell), *cell_index_column_it);
399
400 // ---------------------------------------------------------------------
401 // Iteration phase: iterate over column and compute each cell
402 // ---------------------------------------------------------------------
403
404 for (auto const & alphabet2 : sequence2)
405 {
406 auto cell = *++alignment_column_it;
407 score_type next_diagonal = cell.best_score();
408 *alignment_column_it = this->track_cell(
409 this->compute_inner_cell(diagonal, cell, this->scoring_scheme.score(alphabet1, alphabet2)),
410 *++cell_index_column_it);
411 diagonal = next_diagonal;
412 }
413
414 // ---------------------------------------------------------------------
415 // Final phase: track last cell
416 // ---------------------------------------------------------------------
417
418 this->track_last_row_cell(*alignment_column_it, *cell_index_column_it);
419 }
420};
421
422} // namespace seqan3::detail
Provides seqan3::aligned_allocator.
Provides helper type traits for the configuration and execution of the alignment algorithm.
The <concepts> header from C++20's standard library.
Provides seqan3::views::elements.
Provides seqan3::detail::empty_type.
T forward(T... args)
A type that satisfies std::is_arithmetic_v<t>.
The basis for seqan3::alphabet, but requires only rank interface (not char).
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:429
The <ranges> header from C++20's standard library.
Provides seqan3::detail::to_simd view.
Provides traits to inspect some information of a type, for example its name.