SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
edit_distance_unbanded.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <algorithm>
13#include <bitset>
14#include <ranges>
15#include <utility>
16
24
25namespace seqan3::detail
26{
30template <typename derived_t, typename edit_traits>
31class edit_distance_unbanded_max_errors_policy :
33 edit_traits
35{
36protected:
37 static_assert(edit_traits::use_max_errors, "This policy assumes that edit_traits::use_max_errors is true.");
38
40 friend derived_t;
41
45 edit_distance_unbanded_max_errors_policy() noexcept = default;
46 edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy const &) noexcept =
47 default;
48 edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy &&) noexcept =
49 default;
50 edit_distance_unbanded_max_errors_policy &
51 operator=(edit_distance_unbanded_max_errors_policy const &) noexcept = default;
52 edit_distance_unbanded_max_errors_policy &
53 operator=(edit_distance_unbanded_max_errors_policy &&) noexcept = default;
54 ~edit_distance_unbanded_max_errors_policy() noexcept = default;
55
57
58 using typename edit_traits::score_type;
59 using typename edit_traits::word_type;
60
66 score_type max_errors{255};
68 size_t last_block{0u};
70 word_type last_score_mask{};
72
78 void max_errors_init(size_t block_count) noexcept
79 {
80 derived_t * self = static_cast<derived_t *>(this);
81
82 max_errors = -get<align_cfg::min_score>(self->config).score;
83
84 assert(max_errors >= score_type{0});
85
86 if (std::ranges::empty(self->query)) // [[unlikely]]
87 {
88 last_block = 0u;
89 self->score_mask = 0u;
90 last_score_mask = self->score_mask;
91 return;
92 }
93
94 last_block = block_count - 1u;
95 last_score_mask = self->score_mask;
96
97 // local_max_errors either stores the maximal number of _score (me.max_errors) or the needle size minus one. It
98 // is used for the mask computation and setting the initial score (the minus one is there because of the Ukkonen
99 // trick).
100 size_t const local_max_errors = std::min<size_t>(max_errors, std::ranges::size(self->query) - 1u);
101 self->score_mask = word_type{1u} << (local_max_errors % self->word_size);
102 last_block = std::min(local_max_errors / self->word_size, last_block);
103 self->_score = local_max_errors + 1u;
104 }
105
107 bool is_last_active_cell_within_last_row() const noexcept
108 {
109 derived_t const * self = static_cast<derived_t const *>(this);
110 return (self->score_mask == this->last_score_mask) && (this->last_block == self->vp.size() - 1u);
111 }
112
114 bool prev_last_active_cell() noexcept
115 {
116 derived_t * self = static_cast<derived_t *>(this);
117 self->score_mask >>= 1u;
118 if (self->score_mask != 0u)
119 return true;
120
121 if constexpr (edit_traits::is_global)
122 {
123 if (last_block == 0u) // [[unlikely]]
124 return false;
125 }
126
127 last_block--;
128
129 self->score_mask = word_type{1u} << (edit_traits::word_size - 1u);
130 return true;
131 }
132
134 void next_last_active_cell() noexcept
135 {
136 derived_t * self = static_cast<derived_t *>(this);
137 self->score_mask <<= 1u;
138 if (self->score_mask)
139 return;
140
141 self->score_mask = 1u;
142 last_block++;
143 }
144
148 bool update_last_active_cell() noexcept
149 {
150 derived_t * self = static_cast<derived_t *>(this);
151 // update the last active cell
152 while (!(self->_score <= max_errors))
153 {
154 self->advance_score(self->vn[last_block], self->vp[last_block], self->score_mask);
155 if (!prev_last_active_cell())
156 {
157 // prev_last_active_cell = false can only happen for global alignments
158 assert(edit_traits::is_global);
159 // we abort here if we don't need to compute a matrix, because the continued
160 // computation can't produce an alignment.
161 return !edit_traits::compute_matrix;
162 }
163 }
164
165 if (is_last_active_cell_within_last_row())
166 {
167 assert(self->_score <= max_errors);
168
169 if constexpr (edit_traits::is_semi_global)
170 self->update_best_score();
171
172 return self->on_hit();
173 }
174 else
175 {
176 next_last_active_cell();
177 self->advance_score(self->vp[last_block], self->vn[last_block], self->score_mask);
178 }
179
180 return false;
181 }
183
185 static size_t max_rows(word_type const score_mask,
186 unsigned const last_block,
187 score_type const score,
188 score_type const max_errors) noexcept
189 {
190 using score_matrix_type = typename edit_traits::score_matrix_type;
191 return score_matrix_type::max_rows(score_mask, last_block, score, max_errors);
192 }
193};
194
198template <typename derived_t, typename edit_traits>
199class edit_distance_unbanded_global_policy :
201 edit_traits
203{
204protected:
205 static_assert(edit_traits::is_global || edit_traits::is_semi_global,
206 "This policy assumes that edit_traits::is_global or edit_traits::is_semi_global is true.");
207
209 friend derived_t;
210
214 edit_distance_unbanded_global_policy() noexcept = default;
215 edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy const &) noexcept =
216 default;
217 edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy &&) noexcept = default;
218 edit_distance_unbanded_global_policy &
219 operator=(edit_distance_unbanded_global_policy const &) noexcept = default;
220 edit_distance_unbanded_global_policy &
221 operator=(edit_distance_unbanded_global_policy &&) noexcept = default;
222 ~edit_distance_unbanded_global_policy() noexcept = default;
223
225
227 using typename edit_traits::score_type;
228
237 score_type _best_score{};
239
245 void score_init() noexcept
246 {
247 derived_t const * self = static_cast<derived_t const *>(this);
248 _best_score = self->_score;
249 }
250
252 bool is_valid() const noexcept
253 {
254 [[maybe_unused]] derived_t const * self = static_cast<derived_t const *>(this);
255 // This condition uses the observation that after each computation of a column, _score has either the initial
256 // value of the first row (i.e. the entire column consist of INF's), has the value _score = max_errors + 1
257 // (there exists a cell within the column that has value <= max_errors, but is not on the last row) or _score <=
258 // max_errors (the score of the last active cell is <= max_errors)
259 if constexpr (edit_traits::use_max_errors)
260 return _best_score <= self->max_errors;
261
262 // When not using max_errors there is always a valid alignment, because the last row will always be updated and
263 // with it the score.
264 return true;
265 }
266
268 seqan3::detail::advanceable_alignment_coordinate<> invalid_coordinate() const noexcept
269 {
270 derived_t const * self = static_cast<derived_t const *>(this);
271 return {column_index_type{std::ranges::size(self->database)}, row_index_type{std::ranges::size(self->query)}};
272 }
273
275 void update_best_score() noexcept
276 {
277 derived_t const * self = static_cast<derived_t const *>(this);
278 _best_score = self->_score;
279 }
280
282 size_t end_positions_first() const noexcept
283 {
284 derived_t const * self = static_cast<derived_t const *>(this);
285 return std::ranges::size(self->database);
286 }
288
289public:
296 std::optional<score_type> score() const noexcept
297 {
298 derived_t const * self = static_cast<derived_t const *>(this);
299 static_assert(edit_traits::compute_score,
300 "score() can only be computed if you specify the result type within "
301 "your alignment config.");
302 if (!self->is_valid())
303 return std::nullopt;
304
305 return -_best_score;
306 }
307
310 seqan3::detail::advanceable_alignment_coordinate<> end_positions() const noexcept
311 {
312 derived_t const * self = static_cast<derived_t const *>(this);
313 static_assert(edit_traits::compute_end_positions,
314 "end_positions() can only be computed if you specify the "
315 "result type within your alignment config.");
316 if (!self->is_valid())
317 return self->invalid_coordinate();
318
319 column_index_type const first{self->end_positions_first()};
320 row_index_type const second{std::ranges::size(self->query)};
321 return {first, second};
322 }
324};
325
327template <typename derived_t, typename edit_traits>
328class edit_distance_unbanded_semi_global_policy : public edit_distance_unbanded_global_policy<derived_t, edit_traits>
329{
330protected:
331 static_assert(edit_traits::is_semi_global, "This policy assumes that edit_traits::is_semi_global is true.");
332
334 friend derived_t;
335
339 edit_distance_unbanded_semi_global_policy() noexcept = default;
340 edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy const &) noexcept =
341 default;
342 edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy &&) noexcept =
343 default;
344 edit_distance_unbanded_semi_global_policy &
345 operator=(edit_distance_unbanded_semi_global_policy const &) noexcept = default;
346 edit_distance_unbanded_semi_global_policy &
347 operator=(edit_distance_unbanded_semi_global_policy &&) noexcept = default;
348 ~edit_distance_unbanded_semi_global_policy() noexcept = default;
349
351
353 using base_t = edit_distance_unbanded_global_policy<derived_t, edit_traits>;
355 using database_iterator = typename edit_traits::database_iterator;
356 using base_t::_best_score;
358
370 database_iterator _best_score_col{};
372
378 void score_init() noexcept
379 {
380 derived_t const * self = static_cast<derived_t const *>(this);
381 base_t::score_init();
382 _best_score_col = self->database_it_end;
383 }
384
386 void update_best_score() noexcept
387 {
388 derived_t const * self = static_cast<derived_t const *>(this);
389 // we have to make sure that update_best_score is only called after a score update within the last row.
390 if constexpr (edit_traits::use_max_errors)
391 {
392 assert(std::ranges::empty(self->query) || self->is_last_active_cell_within_last_row());
393 }
394
395 _best_score_col = (self->_score <= _best_score) ? self->database_it : _best_score_col;
396 _best_score = (self->_score <= _best_score) ? self->_score : _best_score;
397 }
398
400 size_t end_positions_first() const noexcept
401 {
402 derived_t const * self = static_cast<derived_t const *>(this);
403 // offset == 0u is a special case if database sequence is empty, because in this case the best column is zero.
404 size_t offset = std::ranges::empty(self->database) ? 0u : 1u;
405 return std::ranges::distance(std::ranges::begin(self->database), _best_score_col) + offset;
406 }
408};
409
413template <typename derived_t, typename edit_traits>
414class edit_distance_unbanded_score_matrix_policy :
416 edit_traits
418{
419protected:
420 static_assert(edit_traits::compute_score_matrix,
421 "This policy assumes that edit_traits::compute_score_matrix is true.");
422
424 friend derived_t;
425
429 edit_distance_unbanded_score_matrix_policy() noexcept = default;
430 edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy const &) noexcept =
431 default;
432 edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy &&) noexcept =
433 default;
434 edit_distance_unbanded_score_matrix_policy &
435 operator=(edit_distance_unbanded_score_matrix_policy const &) noexcept = default;
436 edit_distance_unbanded_score_matrix_policy &
437 operator=(edit_distance_unbanded_score_matrix_policy &&) noexcept = default;
438 ~edit_distance_unbanded_score_matrix_policy() noexcept = default;
439
441
442 using typename edit_traits::score_matrix_type;
443
449 score_matrix_type _score_matrix{};
451
464 void score_matrix_init()
465 {
466 derived_t const * self = static_cast<derived_t const *>(this);
467
468 _score_matrix = score_matrix_type{std::ranges::size(self->query) + 1u};
469 _score_matrix.reserve(std::ranges::size(self->database) + 1u);
470 }
472
473public:
481 score_matrix_type const & score_matrix() const noexcept
482 {
483 static_assert(edit_traits::compute_score_matrix,
484 "score_matrix() can only be computed if you specify the "
485 "result type within your alignment config.");
486 return _score_matrix;
487 }
489};
490
494template <typename derived_t, typename edit_traits>
495class edit_distance_unbanded_trace_matrix_policy :
497 edit_traits
499{
500protected:
501 static_assert(edit_traits::compute_trace_matrix,
502 "This policy assumes that edit_traits::compute_trace_matrix is true.");
503
505 friend derived_t;
506
510 edit_distance_unbanded_trace_matrix_policy() noexcept = default;
511 edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy const &) noexcept =
512 default;
513 edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy &&) noexcept =
514 default;
515 edit_distance_unbanded_trace_matrix_policy &
516 operator=(edit_distance_unbanded_trace_matrix_policy const &) noexcept = default;
517 edit_distance_unbanded_trace_matrix_policy &
518 operator=(edit_distance_unbanded_trace_matrix_policy &&) noexcept = default;
519 ~edit_distance_unbanded_trace_matrix_policy() noexcept = default;
520
522
523 using typename edit_traits::alignment_result_type;
524 using typename edit_traits::trace_matrix_type;
525 using typename edit_traits::word_type;
526
532 std::vector<word_type> hp{};
535
537 trace_matrix_type _trace_matrix{};
539
552 void trace_matrix_init(size_t block_count)
553 {
554 derived_t const * self = static_cast<derived_t const *>(this);
555
556 _trace_matrix = trace_matrix_type{std::ranges::size(self->query) + 1u};
557 _trace_matrix.reserve(std::ranges::size(self->database) + 1u);
558
559 hp.resize(block_count, 0u);
560 db.resize(block_count, 0u);
561 }
563
564public:
570 trace_matrix_type const & trace_matrix() const noexcept
571 {
572 static_assert(edit_traits::compute_trace_matrix,
573 "trace_matrix() can only be computed if you specify the "
574 "result type within your alignment config.");
575 return _trace_matrix;
576 }
577
580 seqan3::detail::advanceable_alignment_coordinate<> begin_positions() const noexcept
581 {
582 derived_t const * self = static_cast<derived_t const *>(this);
583 static_assert(edit_traits::compute_begin_positions,
584 "begin_positions() can only be computed if you specify the "
585 "result type within your alignment config.");
586 if (!self->is_valid())
587 return self->invalid_coordinate();
588
589 auto [first, second] = self->end_positions();
590 detail::matrix_coordinate const end_positions{detail::row_index_type{second}, detail::column_index_type{first}};
591 auto trace_path = self->trace_matrix().trace_path(end_positions);
592 auto trace_path_it = std::ranges::begin(trace_path);
593 std::ranges::advance(trace_path_it, std::ranges::end(trace_path));
594 detail::matrix_coordinate const begin_positions = trace_path_it.coordinate();
595 return {detail::column_index_type{begin_positions.col}, detail::row_index_type{begin_positions.row}};
596 }
597
600 auto alignment() const noexcept
601 {
603
604 derived_t const * self = static_cast<derived_t const *>(this);
605 static_assert(edit_traits::compute_sequence_alignment,
606 "alignment() can only be computed if you specify the "
607 "result type within your alignment config.");
608
609 if (!self->is_valid())
610 return alignment_t{};
611
612 auto [first, second] = self->end_positions();
613 detail::matrix_coordinate const end_positions{detail::row_index_type{second}, detail::column_index_type{first}};
614 aligned_sequence_builder builder{self->database, self->query};
615 auto trace_path = self->trace_matrix().trace_path(end_positions);
616 return builder(trace_path).alignment;
617 }
619};
620
624template <typename value_t>
625class proxy_reference
626{
627public:
631 proxy_reference() noexcept = default;
632 proxy_reference(proxy_reference const &) noexcept = default;
633 proxy_reference(proxy_reference &&) noexcept = default;
634 proxy_reference & operator=(proxy_reference const &) noexcept = default;
635 proxy_reference & operator=(proxy_reference &&) noexcept = default;
636 ~proxy_reference() = default;
637
639 proxy_reference(value_t & t) noexcept : ptr(std::addressof(t))
640 {}
641
642 proxy_reference(value_t &&) = delete;
643
645 template <typename other_value_t>
646 requires std::convertible_to<other_value_t, value_t>
647 proxy_reference & operator=(other_value_t && u) noexcept
648 {
649 get() = std::forward<other_value_t>(u);
650 return *this;
651 }
653
655 value_t & get() const noexcept
656 {
657 assert(ptr != nullptr);
658 return *ptr;
659 }
660
662 operator value_t &() const noexcept
663 {
664 return get();
665 }
666
667private:
669 value_t * ptr{nullptr};
670};
671
683template <std::ranges::viewable_range database_t,
684 std::ranges::viewable_range query_t,
685 typename align_config_t,
686 typename edit_traits>
687class edit_distance_unbanded :
689 // Hide this section in doxygen, because it messes up the inheritance.
690 public edit_distance_base<edit_traits::use_max_errors,
691 edit_distance_unbanded_max_errors_policy,
692 edit_traits,
693 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
694 public edit_distance_base<edit_traits::is_global,
695 edit_distance_unbanded_global_policy,
696 edit_traits,
697 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
698 public edit_distance_base<edit_traits::is_semi_global,
699 edit_distance_unbanded_semi_global_policy,
700 edit_traits,
701 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
702 public edit_distance_base<edit_traits::compute_score_matrix,
703 edit_distance_unbanded_score_matrix_policy,
704 edit_traits,
705 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
706 public edit_distance_base<edit_traits::compute_trace_matrix,
707 edit_distance_unbanded_trace_matrix_policy,
708 edit_traits,
709 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>
711{
712public:
713 using edit_traits::word_size;
714 using typename edit_traits::align_config_type;
715 using typename edit_traits::database_type;
716 using typename edit_traits::query_type;
717 using typename edit_traits::score_type;
718 using typename edit_traits::word_type;
719
720private:
722 template <typename other_derived_t, typename other_edit_traits>
723 friend class edit_distance_unbanded_max_errors_policy;
725 template <typename other_derived_t, typename other_edit_traits>
726 friend class edit_distance_unbanded_global_policy;
728 template <typename other_derived_t, typename other_edit_traits>
729 friend class edit_distance_unbanded_semi_global_policy;
731 template <typename other_derived_t, typename other_edit_traits>
732 friend class edit_distance_unbanded_score_matrix_policy;
734 template <typename other_derived_t, typename other_edit_traits>
735 friend class edit_distance_unbanded_trace_matrix_policy;
736
737 using edit_traits::compute_begin_positions;
738 using edit_traits::compute_end_positions;
739 using edit_traits::compute_matrix;
740 using edit_traits::compute_score;
741 using edit_traits::compute_score_matrix;
742 using edit_traits::compute_sequence_alignment;
743 using edit_traits::compute_trace_matrix;
744 using edit_traits::is_global;
745 using edit_traits::is_semi_global;
746 using edit_traits::use_max_errors;
747 using typename edit_traits::alignment_result_type;
748 using typename edit_traits::database_iterator;
749 using typename edit_traits::query_alphabet_type;
750 using typename edit_traits::score_matrix_type;
751 using typename edit_traits::trace_matrix_type;
752
754 database_t database;
756 query_t query;
758 align_config_t config;
759
761 static constexpr word_type hp0 = is_global ? 1u : 0u;
763 static constexpr word_type hn0 = 0u;
765 static constexpr word_type vp0 = ~word_type{0u};
767 static constexpr word_type vn0 = 0u;
768
770 score_type _score{};
777 word_type score_mask{0u};
788 std::vector<word_type> bit_masks{};
789
791 database_iterator database_it{};
793 database_iterator database_it_end{};
794
796 struct compute_state_trace_matrix
797 {
799 proxy_reference<word_type> db{};
800 };
801
803 struct compute_state : enable_state_t<edit_traits::compute_trace_matrix, compute_state_trace_matrix>
804 {
807
809 word_type b{};
811 word_type d0{};
813 hp_type hp{};
815 word_type hn{};
817 proxy_reference<word_type> vp{};
819 proxy_reference<word_type> vn{};
821 word_type carry_d0{};
823 word_type carry_hp{hp0};
825 word_type carry_hn{};
826 };
827
829 void add_state()
830 {
831 if constexpr (!use_max_errors && compute_score_matrix)
832 this->_score_matrix.add_column(vp, vn);
833
834 if constexpr (!use_max_errors && compute_trace_matrix)
835 this->_trace_matrix.add_column(this->hp, this->db, vp);
836
837 if constexpr (use_max_errors && compute_matrix)
838 {
839 size_t max_rows = this->max_rows(score_mask, this->last_block, _score, this->max_errors);
840 if constexpr (compute_score_matrix)
841 this->_score_matrix.add_column(vp, vn, max_rows);
842
843 if constexpr (compute_trace_matrix)
844 this->_trace_matrix.add_column(this->hp, this->db, vp, max_rows);
845 }
846 }
847
848public:
853 edit_distance_unbanded() = delete;
854 edit_distance_unbanded(edit_distance_unbanded const &) = default;
855 edit_distance_unbanded(edit_distance_unbanded &&) = default;
856 edit_distance_unbanded & operator=(edit_distance_unbanded const &) = default;
857 edit_distance_unbanded & operator=(edit_distance_unbanded &&) = default;
858 ~edit_distance_unbanded() = default;
859
866 edit_distance_unbanded(database_t _database,
867 query_t _query,
868 align_config_t _config,
869 edit_traits const & SEQAN3_DOXYGEN_ONLY(_traits)) :
870 database{std::forward<database_t>(_database)},
871 query{std::forward<query_t>(_query)},
872 config{std::forward<align_config_t>(_config)},
873 _score{static_cast<score_type>(std::ranges::size(query))},
874 database_it{std::ranges::begin(database)},
875 database_it_end{std::ranges::end(database)}
876 {
877 static constexpr size_t alphabet_size_ = alphabet_size<query_alphabet_type>;
878
879 size_t const block_count = (std::ranges::size(query) - 1u + word_size) / word_size;
880 score_mask = word_type{1u} << ((std::ranges::size(query) - 1u + word_size) % word_size);
881
882 this->score_init();
883 if constexpr (use_max_errors)
884 this->max_errors_init(block_count);
885
886 if constexpr (compute_score_matrix)
887 this->score_matrix_init();
888
889 if constexpr (compute_trace_matrix)
890 this->trace_matrix_init(block_count);
891
892 vp.resize(block_count, vp0);
893 vn.resize(block_count, vn0);
894 bit_masks.resize((alphabet_size_ + 1u) * block_count, 0u);
895
896 // encoding the letters as bit-vectors
897 for (size_t j = 0u; j < std::ranges::size(query); j++)
898 {
899 size_t const i = block_count * seqan3::to_rank(query[j]) + j / word_size;
900 bit_masks[i] |= word_type{1u} << (j % word_size);
901 }
902
903 add_state();
904 }
906
907private:
909 template <bool with_carry>
910 static void compute_step(compute_state & state) noexcept
911 {
912 word_type x, t;
913 assert(state.carry_d0 <= 1u);
914 assert(state.carry_hp <= 1u);
915 assert(state.carry_hn <= 1u);
916
917 x = state.b | state.vn;
918 t = state.vp + (x & state.vp) + state.carry_d0;
919
920 state.d0 = (t ^ state.vp) | x;
921 state.hn = state.vp & state.d0;
922 state.hp = state.vn | ~(state.vp | state.d0);
923
924 if constexpr (with_carry)
925 state.carry_d0 = (state.carry_d0 != 0u) ? t <= state.vp : t < state.vp;
926
927 x = (state.hp << 1u) | state.carry_hp;
928 state.vn = x & state.d0;
929 state.vp = (state.hn << 1u) | ~(x | state.d0) | state.carry_hn;
930
931 if constexpr (with_carry)
932 {
933 state.carry_hp = state.hp >> (word_size - 1u);
934 state.carry_hn = state.hn >> (word_size - 1u);
935 }
936 }
937
939 template <bool with_carry>
940 void compute_kernel(compute_state & state, size_t const block_offset, size_t const current_block) noexcept
941 {
942 state.vp = proxy_reference<word_type>{this->vp[current_block]};
943 state.vn = proxy_reference<word_type>{this->vn[current_block]};
944 if constexpr (compute_trace_matrix)
945 {
946 state.hp = proxy_reference<word_type>{this->hp[current_block]};
947 state.db = proxy_reference<word_type>{this->db[current_block]};
948 }
949 state.b = bit_masks[block_offset + current_block];
950
951 compute_step<with_carry>(state);
952 if constexpr (compute_trace_matrix)
953 state.db = ~(state.b ^ state.d0);
954 }
955
957 void advance_score(word_type P, word_type N, word_type mask) noexcept
958 {
959 if ((P & mask) != word_type{0u})
960 _score++;
961 else if ((N & mask) != word_type{0u})
962 _score--;
963 }
964
966 bool on_hit() noexcept
967 {
968 // TODO: call external on_hit functor
969 return false;
970 }
971
973 inline bool small_patterns();
974
976 inline bool large_patterns();
977
979 inline void compute_empty_query_sequence()
980 {
981 assert(std::ranges::empty(query));
982
983 bool abort_computation = false;
984
985 for (; database_it != database_it_end; ++database_it)
986 {
987 if constexpr (is_global)
988 ++_score;
989 else // is_semi_global
990 this->update_best_score();
991
992 // call on_hit
993 if constexpr (use_max_errors)
994 abort_computation = on_hit();
995
996 this->add_state();
997 if (abort_computation)
998 break;
999 }
1000 }
1001
1003 void compute()
1004 {
1005 // limit search width for prefix search (if no matrix needs to be computed)
1006 if constexpr (use_max_errors && is_global && !compute_matrix)
1007 {
1008 // Note: For global alignments we know that the database can only be max_length long to have a score less
1009 // than or equal max_errors in the last cell.
1010 //
1011 // The following matrix shows a minimal value for each entry (because a diagonal must be +0 or +1, each
1012 // diagonal is at least the value of the initial value in the first row)
1013 // 0 1 2 3 4 5 6...
1014 // 1 0 1 2 3 4 5...
1015 // ...
1016 // m ... 3 2 1 0 1 2 3 4 5
1017 // Thus, after |query| + max_errors entries the score will always be higher than max_errors.
1018 size_t const max_length = std::ranges::size(query) + this->max_errors + 1u;
1019 size_t const haystack_length = std::min(std::ranges::size(database), max_length);
1020 database_it_end -= std::ranges::size(database) - haystack_length;
1021 }
1022
1023 // distinguish between the version for needles not longer than
1024 // one machine word and the version for longer needles
1025 // A special cases is if the second sequence is empty (vp.size() == 0u).
1026 if (vp.size() == 0u) // [[unlikely]]
1027 compute_empty_query_sequence();
1028 else if (vp.size() == 1u)
1029 small_patterns();
1030 else
1031 large_patterns();
1032
1033 if constexpr (is_global)
1034 this->update_best_score();
1035 }
1036
1037public:
1042 template <typename callback_t>
1043 void operator()([[maybe_unused]] size_t const idx, callback_t && callback)
1044 {
1045 using traits_type = alignment_configuration_traits<align_config_t>;
1046 using result_value_type = typename alignment_result_value_type_accessor<alignment_result_type>::type;
1047
1048 compute();
1049
1050 // First cache the begin and end positions if enabled by the edit distance traits.
1051 // Note, that they might be activated even if the user did not configure them, but in order to
1052 // compute for example the alignment both information are required and enabled internally.
1053 auto cached_end_positions = this->invalid_coordinate();
1054 auto cached_begin_positions = this->invalid_coordinate();
1055
1056 if constexpr (compute_end_positions)
1057 cached_end_positions = this->end_positions();
1058
1059 if constexpr (compute_begin_positions && !compute_sequence_alignment)
1060 {
1061 static_assert(compute_end_positions, "End positions required to compute the begin positions.");
1062 cached_begin_positions = this->begin_positions();
1063 }
1064
1065 // Fill the result value type. Note we need to ask what was enabled on the user side in order to store
1066 // the correct information in the alignment result type. This information is stored in the alignment
1067 // configuration traits and not in the edit distance traits.
1068 result_value_type res_vt{};
1069
1070 if constexpr (traits_type::output_sequence1_id)
1071 res_vt.sequence1_id = idx;
1072
1073 if constexpr (traits_type::output_sequence2_id)
1074 res_vt.sequence2_id = idx;
1075
1076 if constexpr (traits_type::compute_score)
1077 res_vt.score = this->score().value_or(matrix_inf<score_type>);
1078
1079 if constexpr (traits_type::compute_sequence_alignment)
1080 {
1081 if (this->is_valid())
1082 {
1083 auto [first, second] = cached_end_positions;
1084 detail::matrix_coordinate const end_positions{detail::row_index_type{second},
1085 detail::column_index_type{first}};
1086
1087 aligned_sequence_builder builder{this->database, this->query};
1088 auto trace_res = builder(this->trace_matrix().trace_path(end_positions));
1089 res_vt.alignment = std::move(trace_res.alignment);
1090 cached_begin_positions.first = trace_res.first_sequence_slice_positions.first;
1091 cached_begin_positions.second = trace_res.second_sequence_slice_positions.first;
1092 }
1093 }
1094
1095 if constexpr (traits_type::compute_end_positions)
1096 res_vt.end_positions = std::move(cached_end_positions);
1097
1098 if constexpr (traits_type::compute_begin_positions)
1099 res_vt.begin_positions = std::move(cached_begin_positions);
1100
1101 callback(alignment_result_type{std::move(res_vt)});
1102 }
1103};
1104
1105template <std::ranges::viewable_range database_t,
1106 std::ranges::viewable_range query_t,
1107 typename align_config_t,
1108 typename traits_t>
1109bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::small_patterns()
1110{
1111 bool abort_computation = false;
1112
1113 // computing the blocks
1114 while (database_it != database_it_end)
1115 {
1116 compute_state state{};
1117 size_t const block_offset = seqan3::to_rank((query_alphabet_type)*database_it);
1118
1119 compute_kernel<false>(state, block_offset, 0u);
1120 advance_score(state.hp, state.hn, score_mask);
1121
1122 // semi-global without max_errors guarantees that the score stays within the last row
1123 if constexpr (is_semi_global && !use_max_errors)
1124 this->update_best_score();
1125
1126 // updating the last active cell
1127 if constexpr (use_max_errors)
1128 abort_computation = this->update_last_active_cell();
1129
1130 add_state();
1131 ++database_it;
1132 if (abort_computation)
1133 return true;
1134 }
1135
1136 return false;
1137}
1138
1139template <std::ranges::viewable_range database_t,
1140 std::ranges::viewable_range query_t,
1141 typename align_config_t,
1142 typename traits_t>
1143bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::large_patterns()
1144{
1145 bool abort_computation = false;
1146
1147 while (database_it != database_it_end)
1148 {
1149 compute_state state{};
1150 size_t const block_offset = vp.size() * seqan3::to_rank((query_alphabet_type)*database_it);
1151
1152 size_t block_count = vp.size();
1153 if constexpr (use_max_errors)
1154 block_count = this->last_block + 1;
1155
1156 // compute each block in the current column; carries between blocks will be propagated.
1157 for (size_t current_block = 0u; current_block < block_count; current_block++)
1158 compute_kernel<true>(state, block_offset, current_block);
1159
1160 advance_score(state.hp, state.hn, score_mask);
1161
1162 // semi-global without max_errors guarantees that the score stays within the last row
1163 if constexpr (is_semi_global && !use_max_errors)
1164 this->update_best_score();
1165
1166 if constexpr (use_max_errors)
1167 {
1168 // if the last active cell reached the end within the current block we have to compute the next block.
1169 bool additional_block = score_mask >> (word_size - 1u);
1170 bool reached_last_block = this->last_block + 1u == vp.size();
1171 // If there is no next block we skip the computation.
1172 if (reached_last_block)
1173 additional_block = false;
1174
1175 if (additional_block)
1176 {
1177 size_t const current_block = this->last_block + 1u;
1178 // this might not be necessary, but carry_d0 = 1u might have an influence on the result of vn and vp.
1179 vp[current_block] = vp0;
1180 vn[current_block] = vn0;
1181 compute_kernel<false>(state, block_offset, current_block);
1182 }
1183
1184 // updating the last active cell
1185 abort_computation = this->update_last_active_cell();
1186 }
1187
1188 add_state();
1189 ++database_it;
1190
1191 if (abort_computation)
1192 return true;
1193 }
1194
1195 return false;
1196}
1197
1204template <typename database_t, typename query_t, typename config_t, typename traits_t>
1205edit_distance_unbanded(database_t && database, query_t && query, config_t config, traits_t)
1206 -> edit_distance_unbanded<database_t, query_t, config_t, traits_t>;
1208
1209} // namespace seqan3::detail
Provides seqan3::detail::advanceable_alignment_coordinate.
Provides seqan3::alignment_result.
T begin(T... args)
Provides seqan3::configuration and utility functions.
Forwards for seqan3::edit_distance_unbanded related types.
Provides seqan3::detail::edit_distance_score_matrix_full.
Provides seqan3::detail::edit_distance_trace_matrix_full.
T end(T... args)
T forward(T... args)
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition alphabet/concept.hpp:152
@ alignment
The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
constexpr size_t size
The size of a type pack.
Definition type_pack/traits.hpp:143
Provides seqan3::detail::matrix.
T min(T... args)
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition configuration.hpp:412
SeqAn specific customisations in the standard namespace.
T reserve(T... args)
Hide me