SeqAn3 3.2.0
The Modern C++ library for sequence analysis.
pairwise_alignment_algorithm.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <concepts>
16#include <ranges>
17
24
25namespace seqan3::detail
26{
27
44template <typename alignment_configuration_t, typename... policies_t>
45 requires is_type_specialisation_of_v<alignment_configuration_t, configuration>
46class pairwise_alignment_algorithm : protected policies_t...
47{
48protected:
50 using traits_type = alignment_configuration_traits<alignment_configuration_t>;
52 using score_type = typename traits_type::score_type;
54 using alignment_result_type = typename traits_type::alignment_result_type;
55
56 static_assert(!std::same_as<alignment_result_type, empty_type>, "Alignment result type was not configured.");
57
58public:
62 pairwise_alignment_algorithm() = default;
63 pairwise_alignment_algorithm(pairwise_alignment_algorithm const &) = default;
64 pairwise_alignment_algorithm(pairwise_alignment_algorithm &&) = default;
65 pairwise_alignment_algorithm & operator=(pairwise_alignment_algorithm const &) = default;
66 pairwise_alignment_algorithm & operator=(pairwise_alignment_algorithm &&) = default;
67 ~pairwise_alignment_algorithm() = default;
68
76 pairwise_alignment_algorithm(alignment_configuration_t const & config) : policies_t(config)...
77 {}
79
125 template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
126 requires std::invocable<callback_t, alignment_result_type>
127 void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
128 {
129 using std::get;
130
131 for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
132 {
133 size_t const sequence1_size = std::ranges::distance(get<0>(sequence_pair));
134 size_t const sequence2_size = std::ranges::distance(get<1>(sequence_pair));
135
136 auto && [alignment_matrix, index_matrix] = this->acquire_matrices(sequence1_size, sequence2_size);
137
138 compute_matrix(get<0>(sequence_pair), get<1>(sequence_pair), alignment_matrix, index_matrix);
139 this->make_result_and_invoke(std::forward<decltype(sequence_pair)>(sequence_pair),
140 std::move(idx),
141 this->optimal_score,
142 this->optimal_coordinate,
143 alignment_matrix,
144 callback);
145 }
146 }
148
150 template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
151 requires traits_type::is_vectorised && std::invocable<callback_t, alignment_result_type>
152 auto operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
153 {
154 using simd_collection_t = std::vector<score_type, aligned_allocator<score_type, alignof(score_type)>>;
155 using original_score_t = typename traits_type::original_score_type;
156
157 // Extract the batch of sequences for the first and the second sequence.
158 auto seq1_collection = indexed_sequence_pairs | views::elements<0> | views::elements<0>;
159 auto seq2_collection = indexed_sequence_pairs | views::elements<0> | views::elements<1>;
160
161 this->initialise_tracker(seq1_collection, seq2_collection);
162
163 // Convert batch of sequences to sequence of simd vectors.
164 thread_local simd_collection_t simd_seq1_collection{};
165 thread_local simd_collection_t simd_seq2_collection{};
166
167 convert_batch_of_sequences_to_simd_vector(simd_seq1_collection,
168 seq1_collection,
169 this->scoring_scheme.padding_symbol);
170 convert_batch_of_sequences_to_simd_vector(simd_seq2_collection,
171 seq2_collection,
172 this->scoring_scheme.padding_symbol);
173
174 size_t const sequence1_size = std::ranges::distance(simd_seq1_collection);
175 size_t const sequence2_size = std::ranges::distance(simd_seq2_collection);
176
177 auto && [alignment_matrix, index_matrix] = this->acquire_matrices(sequence1_size, sequence2_size);
178
179 compute_matrix(simd_seq1_collection, simd_seq2_collection, alignment_matrix, index_matrix);
180
181 size_t index = 0;
182 for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
183 {
184 original_score_t score = this->optimal_score[index]
185 - (this->padding_offsets[index] * this->scoring_scheme.padding_match_score());
186 matrix_coordinate coordinate{row_index_type{size_t{this->optimal_coordinate.row[index]}},
187 column_index_type{size_t{this->optimal_coordinate.col[index]}}};
188 this->make_result_and_invoke(std::forward<decltype(sequence_pair)>(sequence_pair),
189 std::move(idx),
190 std::move(score),
191 std::move(coordinate),
192 alignment_matrix,
193 callback);
194 ++index;
195 }
196 }
197
198protected:
218 template <typename simd_sequence_t, std::ranges::forward_range sequence_collection_t, arithmetic padding_symbol_t>
219 requires std::ranges::output_range<simd_sequence_t, score_type>
220 void convert_batch_of_sequences_to_simd_vector(simd_sequence_t & simd_sequence,
221 sequence_collection_t & sequences,
222 padding_symbol_t const & padding_symbol)
223 {
224 assert(static_cast<size_t>(std::ranges::distance(sequences)) <= traits_type::alignments_per_vector);
225
226 simd_sequence.clear();
227 for (auto && simd_vector_chunk : sequences | views::to_simd<score_type>(padding_symbol))
228 std::ranges::move(simd_vector_chunk, std::back_inserter(simd_sequence));
229 }
230
244 template <std::ranges::forward_range sequence1_t,
245 std::ranges::forward_range sequence2_t,
246 std::ranges::input_range alignment_matrix_t,
247 std::ranges::input_range index_matrix_t>
248 requires std::ranges::forward_range<std::ranges::range_reference_t<alignment_matrix_t>>
249 && std::ranges::forward_range<std::ranges::range_reference_t<index_matrix_t>>
250 void compute_matrix(sequence1_t && sequence1,
251 sequence2_t && sequence2,
252 alignment_matrix_t && alignment_matrix,
253 index_matrix_t && index_matrix)
254 {
255 // ---------------------------------------------------------------------
256 // Initialisation phase: allocate memory and initialise first column.
257 // ---------------------------------------------------------------------
258
259 this->reset_optimum(); // Reset the tracker for the new alignment computation.
260
261 auto alignment_matrix_it = alignment_matrix.begin();
262 auto indexed_matrix_it = index_matrix.begin();
263
264 initialise_column(*alignment_matrix_it, *indexed_matrix_it, sequence2);
265
266 // ---------------------------------------------------------------------
267 // Iteration phase: compute column-wise the alignment matrix.
268 // ---------------------------------------------------------------------
269
270 for (auto alphabet1 : sequence1)
271 compute_column(*++alignment_matrix_it,
272 *++indexed_matrix_it,
273 this->scoring_scheme_profile_column(alphabet1),
274 sequence2);
275
276 // ---------------------------------------------------------------------
277 // Final phase: track score of last column
278 // ---------------------------------------------------------------------
279
280 auto && alignment_column = *alignment_matrix_it;
281 auto && cell_index_column = *indexed_matrix_it;
282
283 auto alignment_column_it = alignment_column.begin();
284 auto cell_index_column_it = cell_index_column.begin();
285
286 this->track_last_column_cell(*alignment_column_it, *cell_index_column_it);
287
288 for ([[maybe_unused]] auto && unused : sequence2)
289 this->track_last_column_cell(*++alignment_column_it, *++cell_index_column_it);
290
291 this->track_final_cell(*alignment_column_it, *cell_index_column_it);
292 }
293
312 template <std::ranges::input_range alignment_column_t,
313 std::ranges::input_range cell_index_column_t,
314 std::ranges::input_range sequence2_t>
315 void initialise_column(alignment_column_t && alignment_column,
316 cell_index_column_t && cell_index_column,
317 sequence2_t && sequence2)
318 {
319 // ---------------------------------------------------------------------
320 // Initial phase: prepare column and initialise first cell
321 // ---------------------------------------------------------------------
322
323 auto first_column_it = alignment_column.begin();
324 auto cell_index_column_it = cell_index_column.begin();
325 *first_column_it = this->track_cell(this->initialise_origin_cell(), *cell_index_column_it);
326
327 // ---------------------------------------------------------------------
328 // Iteration phase: iterate over column and compute each cell
329 // ---------------------------------------------------------------------
330
331 for ([[maybe_unused]] auto const & unused : sequence2)
332 {
333 ++first_column_it;
334 *first_column_it =
335 this->track_cell(this->initialise_first_column_cell(*first_column_it), *++cell_index_column_it);
336 }
337
338 // ---------------------------------------------------------------------
339 // Final phase: track last cell of initial column
340 // ---------------------------------------------------------------------
341
342 this->track_last_row_cell(*first_column_it, *cell_index_column_it);
343 }
344
363 template <std::ranges::input_range alignment_column_t,
364 std::ranges::input_range cell_index_column_t,
365 typename alphabet1_t,
366 std::ranges::input_range sequence2_t>
367 requires semialphabet<alphabet1_t> || simd_concept<alphabet1_t>
368 void compute_column(alignment_column_t && alignment_column,
369 cell_index_column_t && cell_index_column,
370 alphabet1_t const & alphabet1,
371 sequence2_t && sequence2)
372 {
373 using score_type = typename traits_type::score_type;
374
375 // ---------------------------------------------------------------------
376 // Initial phase: prepare column and initialise first cell
377 // ---------------------------------------------------------------------
378
379 auto alignment_column_it = alignment_column.begin();
380 auto cell_index_column_it = cell_index_column.begin();
381
382 auto cell = *alignment_column_it;
383 score_type diagonal = cell.best_score();
384 *alignment_column_it = this->track_cell(this->initialise_first_row_cell(cell), *cell_index_column_it);
385
386 // ---------------------------------------------------------------------
387 // Iteration phase: iterate over column and compute each cell
388 // ---------------------------------------------------------------------
389
390 for (auto const & alphabet2 : sequence2)
391 {
392 auto cell = *++alignment_column_it;
393 score_type next_diagonal = cell.best_score();
394 *alignment_column_it = this->track_cell(
395 this->compute_inner_cell(diagonal, cell, this->scoring_scheme.score(alphabet1, alphabet2)),
396 *++cell_index_column_it);
397 diagonal = next_diagonal;
398 }
399
400 // ---------------------------------------------------------------------
401 // Final phase: track last cell
402 // ---------------------------------------------------------------------
403
404 this->track_last_row_cell(*alignment_column_it, *cell_index_column_it);
405 }
406};
407
408} // namespace seqan3::detail
Provides seqan3::aligned_allocator.
Provides helper type traits for the configuration and execution of the alignment algorithm.
T back_inserter(T... args)
Provides seqan3::views::elements.
Provides seqan3::detail::empty_type.
T forward(T... args)
The basis for seqan3::alphabet, but requires only rank interface (not char).
T move(T... args)
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:415
Provides seqan3::detail::to_simd view.
Provides traits to inspect some information of a type, for example its name.