SeqAn3 3.2.0
The Modern C++ library for sequence analysis.
edit_distance_unbanded.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <algorithm>
16#include <bitset>
17#include <ranges>
18#include <utility>
19
27
28namespace seqan3::detail
29{
33template <typename derived_t, typename edit_traits>
34class edit_distance_unbanded_max_errors_policy :
36 edit_traits
38{
39protected:
40 static_assert(edit_traits::use_max_errors, "This policy assumes that edit_traits::use_max_errors is true.");
41
43 friend derived_t;
44
48 edit_distance_unbanded_max_errors_policy() noexcept = default;
49 edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy const &) noexcept =
50 default;
51 edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy &&) noexcept =
52 default;
53 edit_distance_unbanded_max_errors_policy &
54 operator=(edit_distance_unbanded_max_errors_policy const &) noexcept = default;
55 edit_distance_unbanded_max_errors_policy &
56 operator=(edit_distance_unbanded_max_errors_policy &&) noexcept = default;
57 ~edit_distance_unbanded_max_errors_policy() noexcept = default;
58
60
61 using typename edit_traits::score_type;
62 using typename edit_traits::word_type;
63
69 score_type max_errors{255};
71 size_t last_block{0u};
73 word_type last_score_mask{};
75
81 void max_errors_init(size_t block_count) noexcept
82 {
83 derived_t * self = static_cast<derived_t *>(this);
84
85 max_errors = -get<align_cfg::min_score>(self->config).score;
86
87 assert(max_errors >= score_type{0});
88
89 if (std::ranges::empty(self->query)) // [[unlikely]]
90 {
91 last_block = 0u;
92 self->score_mask = 0u;
93 last_score_mask = self->score_mask;
94 return;
95 }
96
97 last_block = block_count - 1u;
98 last_score_mask = self->score_mask;
99
100 // local_max_errors either stores the maximal number of _score (me.max_errors) or the needle size minus one. It
101 // is used for the mask computation and setting the initial score (the minus one is there because of the Ukkonen
102 // trick).
103 size_t const local_max_errors = std::min<size_t>(max_errors, std::ranges::size(self->query) - 1u);
104 self->score_mask = word_type{1u} << (local_max_errors % self->word_size);
105 last_block = std::min(local_max_errors / self->word_size, last_block);
106 self->_score = local_max_errors + 1u;
107 }
108
110 bool is_last_active_cell_within_last_row() const noexcept
111 {
112 derived_t const * self = static_cast<derived_t const *>(this);
113 return (self->score_mask == this->last_score_mask) && (this->last_block == self->vp.size() - 1u);
114 }
115
117 bool prev_last_active_cell() noexcept
118 {
119 derived_t * self = static_cast<derived_t *>(this);
120 self->score_mask >>= 1u;
121 if (self->score_mask != 0u)
122 return true;
123
124 if constexpr (edit_traits::is_global)
125 {
126 if (last_block == 0u) // [[unlikely]]
127 return false;
128 }
129
130 last_block--;
131
132 self->score_mask = word_type{1u} << (edit_traits::word_size - 1u);
133 return true;
134 }
135
137 void next_last_active_cell() noexcept
138 {
139 derived_t * self = static_cast<derived_t *>(this);
140 self->score_mask <<= 1u;
141 if (self->score_mask)
142 return;
143
144 self->score_mask = 1u;
145 last_block++;
146 }
147
151 bool update_last_active_cell() noexcept
152 {
153 derived_t * self = static_cast<derived_t *>(this);
154 // update the last active cell
155 while (!(self->_score <= max_errors))
156 {
157 self->advance_score(self->vn[last_block], self->vp[last_block], self->score_mask);
158 if (!prev_last_active_cell())
159 {
160 // prev_last_active_cell = false can only happen for global alignments
161 assert(edit_traits::is_global);
162 // we abort here if we don't need to compute a matrix, because the continued
163 // computation can't produce an alignment.
164 return !edit_traits::compute_matrix;
165 }
166 }
167
168 if (is_last_active_cell_within_last_row())
169 {
170 assert(self->_score <= max_errors);
171
172 if constexpr (edit_traits::is_semi_global)
173 self->update_best_score();
174
175 return self->on_hit();
176 }
177 else
178 {
179 next_last_active_cell();
180 self->advance_score(self->vp[last_block], self->vn[last_block], self->score_mask);
181 }
182
183 return false;
184 }
186
188 static size_t max_rows(word_type const score_mask,
189 unsigned const last_block,
190 score_type const score,
191 score_type const max_errors) noexcept
192 {
193 using score_matrix_type = typename edit_traits::score_matrix_type;
194 return score_matrix_type::max_rows(score_mask, last_block, score, max_errors);
195 }
196};
197
201template <typename derived_t, typename edit_traits>
202class edit_distance_unbanded_global_policy :
204 edit_traits
206{
207protected:
208 static_assert(edit_traits::is_global || edit_traits::is_semi_global,
209 "This policy assumes that edit_traits::is_global or edit_traits::is_semi_global is true.");
210
212 friend derived_t;
213
217 edit_distance_unbanded_global_policy() noexcept = default;
218 edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy const &) noexcept =
219 default;
220 edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy &&) noexcept = default;
221 edit_distance_unbanded_global_policy &
222 operator=(edit_distance_unbanded_global_policy const &) noexcept = default;
223 edit_distance_unbanded_global_policy &
224 operator=(edit_distance_unbanded_global_policy &&) noexcept = default;
225 ~edit_distance_unbanded_global_policy() noexcept = default;
226
228
230 using typename edit_traits::score_type;
231
240 score_type _best_score{};
242
248 void score_init() noexcept
249 {
250 derived_t const * self = static_cast<derived_t const *>(this);
251 _best_score = self->_score;
252 }
253
255 bool is_valid() const noexcept
256 {
257 [[maybe_unused]] derived_t const * self = static_cast<derived_t const *>(this);
258 // This condition uses the observation that after each computation of a column, _score has either the initial
259 // value of the first row (i.e. the entire column consist of INF's), has the value _score = max_errors + 1
260 // (there exists a cell within the column that has value <= max_errors, but is not on the last row) or _score <=
261 // max_errors (the score of the last active cell is <= max_errors)
262 if constexpr (edit_traits::use_max_errors)
263 return _best_score <= self->max_errors;
264
265 // When not using max_errors there is always a valid alignment, because the last row will always be updated and
266 // with it the score.
267 return true;
268 }
269
271 seqan3::detail::advanceable_alignment_coordinate<> invalid_coordinate() const noexcept
272 {
273 derived_t const * self = static_cast<derived_t const *>(this);
274 return {column_index_type{std::ranges::size(self->database)}, row_index_type{std::ranges::size(self->query)}};
275 }
276
278 void update_best_score() noexcept
279 {
280 derived_t const * self = static_cast<derived_t const *>(this);
281 _best_score = self->_score;
282 }
283
285 size_t end_positions_first() const noexcept
286 {
287 derived_t const * self = static_cast<derived_t const *>(this);
288 return std::ranges::size(self->database);
289 }
291
292public:
299 std::optional<score_type> score() const noexcept
300 {
301 derived_t const * self = static_cast<derived_t const *>(this);
302 static_assert(edit_traits::compute_score,
303 "score() can only be computed if you specify the result type within "
304 "your alignment config.");
305 if (!self->is_valid())
306 return std::nullopt;
307
308 return -_best_score;
309 }
310
313 seqan3::detail::advanceable_alignment_coordinate<> end_positions() const noexcept
314 {
315 derived_t const * self = static_cast<derived_t const *>(this);
316 static_assert(edit_traits::compute_end_positions,
317 "end_positions() can only be computed if you specify the "
318 "result type within your alignment config.");
319 if (!self->is_valid())
320 return self->invalid_coordinate();
321
322 column_index_type const first{self->end_positions_first()};
323 row_index_type const second{std::ranges::size(self->query)};
324 return {first, second};
325 }
327};
328
330template <typename derived_t, typename edit_traits>
331class edit_distance_unbanded_semi_global_policy : public edit_distance_unbanded_global_policy<derived_t, edit_traits>
332{
333protected:
334 static_assert(edit_traits::is_semi_global, "This policy assumes that edit_traits::is_semi_global is true.");
335
337 friend derived_t;
338
342 edit_distance_unbanded_semi_global_policy() noexcept = default;
343 edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy const &) noexcept =
344 default;
345 edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy &&) noexcept =
346 default;
347 edit_distance_unbanded_semi_global_policy &
348 operator=(edit_distance_unbanded_semi_global_policy const &) noexcept = default;
349 edit_distance_unbanded_semi_global_policy &
350 operator=(edit_distance_unbanded_semi_global_policy &&) noexcept = default;
351 ~edit_distance_unbanded_semi_global_policy() noexcept = default;
352
354
356 using base_t = edit_distance_unbanded_global_policy<derived_t, edit_traits>;
358 using database_iterator = typename edit_traits::database_iterator;
359 using base_t::_best_score;
361
373 database_iterator _best_score_col{};
375
381 void score_init() noexcept
382 {
383 derived_t const * self = static_cast<derived_t const *>(this);
384 base_t::score_init();
385 _best_score_col = self->database_it_end;
386 }
387
389 void update_best_score() noexcept
390 {
391 derived_t const * self = static_cast<derived_t const *>(this);
392 // we have to make sure that update_best_score is only called after a score update within the last row.
393 if constexpr (edit_traits::use_max_errors)
394 {
395 assert(std::ranges::empty(self->query) || self->is_last_active_cell_within_last_row());
396 }
397
398 _best_score_col = (self->_score <= _best_score) ? self->database_it : _best_score_col;
399 _best_score = (self->_score <= _best_score) ? self->_score : _best_score;
400 }
401
403 size_t end_positions_first() const noexcept
404 {
405 derived_t const * self = static_cast<derived_t const *>(this);
406 // offset == 0u is a special case if database sequence is empty, because in this case the best column is zero.
407 size_t offset = std::ranges::empty(self->database) ? 0u : 1u;
408 return std::ranges::distance(std::ranges::begin(self->database), _best_score_col) + offset;
409 }
411};
412
416template <typename derived_t, typename edit_traits>
417class edit_distance_unbanded_score_matrix_policy :
419 edit_traits
421{
422protected:
423 static_assert(edit_traits::compute_score_matrix,
424 "This policy assumes that edit_traits::compute_score_matrix is true.");
425
427 friend derived_t;
428
432 edit_distance_unbanded_score_matrix_policy() noexcept = default;
433 edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy const &) noexcept =
434 default;
435 edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy &&) noexcept =
436 default;
437 edit_distance_unbanded_score_matrix_policy &
438 operator=(edit_distance_unbanded_score_matrix_policy const &) noexcept = default;
439 edit_distance_unbanded_score_matrix_policy &
440 operator=(edit_distance_unbanded_score_matrix_policy &&) noexcept = default;
441 ~edit_distance_unbanded_score_matrix_policy() noexcept = default;
442
444
445 using typename edit_traits::score_matrix_type;
446
452 score_matrix_type _score_matrix{};
454
467 void score_matrix_init()
468 {
469 derived_t const * self = static_cast<derived_t const *>(this);
470
471 _score_matrix = score_matrix_type{std::ranges::size(self->query) + 1u};
472 _score_matrix.reserve(std::ranges::size(self->database) + 1u);
473 }
475
476public:
484 score_matrix_type const & score_matrix() const noexcept
485 {
486 static_assert(edit_traits::compute_score_matrix,
487 "score_matrix() can only be computed if you specify the "
488 "result type within your alignment config.");
489 return _score_matrix;
490 }
492};
493
497template <typename derived_t, typename edit_traits>
498class edit_distance_unbanded_trace_matrix_policy :
500 edit_traits
502{
503protected:
504 static_assert(edit_traits::compute_trace_matrix,
505 "This policy assumes that edit_traits::compute_trace_matrix is true.");
506
508 friend derived_t;
509
513 edit_distance_unbanded_trace_matrix_policy() noexcept = default;
514 edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy const &) noexcept =
515 default;
516 edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy &&) noexcept =
517 default;
518 edit_distance_unbanded_trace_matrix_policy &
519 operator=(edit_distance_unbanded_trace_matrix_policy const &) noexcept = default;
520 edit_distance_unbanded_trace_matrix_policy &
521 operator=(edit_distance_unbanded_trace_matrix_policy &&) noexcept = default;
522 ~edit_distance_unbanded_trace_matrix_policy() noexcept = default;
523
525
526 using typename edit_traits::alignment_result_type;
527 using typename edit_traits::trace_matrix_type;
528 using typename edit_traits::word_type;
529
535 std::vector<word_type> hp{};
538
540 trace_matrix_type _trace_matrix{};
542
555 void trace_matrix_init(size_t block_count)
556 {
557 derived_t const * self = static_cast<derived_t const *>(this);
558
559 _trace_matrix = trace_matrix_type{std::ranges::size(self->query) + 1u};
560 _trace_matrix.reserve(std::ranges::size(self->database) + 1u);
561
562 hp.resize(block_count, 0u);
563 db.resize(block_count, 0u);
564 }
566
567public:
573 trace_matrix_type const & trace_matrix() const noexcept
574 {
575 static_assert(edit_traits::compute_trace_matrix,
576 "trace_matrix() can only be computed if you specify the "
577 "result type within your alignment config.");
578 return _trace_matrix;
579 }
580
583 seqan3::detail::advanceable_alignment_coordinate<> begin_positions() const noexcept
584 {
585 derived_t const * self = static_cast<derived_t const *>(this);
586 static_assert(edit_traits::compute_begin_positions,
587 "begin_positions() can only be computed if you specify the "
588 "result type within your alignment config.");
589 if (!self->is_valid())
590 return self->invalid_coordinate();
591
592 auto [first, second] = self->end_positions();
593 detail::matrix_coordinate const end_positions{detail::row_index_type{second}, detail::column_index_type{first}};
594 auto trace_path = self->trace_matrix().trace_path(end_positions);
595 auto trace_path_it = std::ranges::begin(trace_path);
596 std::ranges::advance(trace_path_it, std::ranges::end(trace_path));
597 detail::matrix_coordinate const begin_positions = trace_path_it.coordinate();
598 return {detail::column_index_type{begin_positions.col}, detail::row_index_type{begin_positions.row}};
599 }
600
603 auto alignment() const noexcept
604 {
606
607 derived_t const * self = static_cast<derived_t const *>(this);
608 static_assert(edit_traits::compute_sequence_alignment,
609 "alignment() can only be computed if you specify the "
610 "result type within your alignment config.");
611
612 if (!self->is_valid())
613 return alignment_t{};
614
615 auto [first, second] = self->end_positions();
616 detail::matrix_coordinate const end_positions{detail::row_index_type{second}, detail::column_index_type{first}};
617 aligned_sequence_builder builder{self->database, self->query};
618 auto trace_path = self->trace_matrix().trace_path(end_positions);
619 return builder(trace_path).alignment;
620 }
622};
623
627template <typename value_t>
628class proxy_reference
629{
630public:
634 proxy_reference() noexcept = default;
635 proxy_reference(proxy_reference const &) noexcept = default;
636 proxy_reference(proxy_reference &&) noexcept = default;
637 proxy_reference & operator=(proxy_reference const &) noexcept = default;
638 proxy_reference & operator=(proxy_reference &&) noexcept = default;
639 ~proxy_reference() = default;
640
642 proxy_reference(value_t & t) noexcept : ptr(std::addressof(t))
643 {}
644
645 proxy_reference(value_t &&) = delete;
646
648 template <typename other_value_t>
649 requires std::convertible_to<other_value_t, value_t>
650 proxy_reference & operator=(other_value_t && u) noexcept
651 {
652 get() = std::forward<other_value_t>(u);
653 return *this;
654 }
656
658 value_t & get() const noexcept
659 {
660 assert(ptr != nullptr);
661 return *ptr;
662 }
663
665 operator value_t &() const noexcept
666 {
667 return get();
668 }
669
670private:
672 value_t * ptr{nullptr};
673};
674
686template <std::ranges::viewable_range database_t,
687 std::ranges::viewable_range query_t,
688 typename align_config_t,
689 typename edit_traits>
690class edit_distance_unbanded :
692 // Hide this section in doxygen, because it messes up the inheritance.
693 public edit_distance_base<edit_traits::use_max_errors,
694 edit_distance_unbanded_max_errors_policy,
695 edit_traits,
696 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
697 public edit_distance_base<edit_traits::is_global,
698 edit_distance_unbanded_global_policy,
699 edit_traits,
700 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
701 public edit_distance_base<edit_traits::is_semi_global,
702 edit_distance_unbanded_semi_global_policy,
703 edit_traits,
704 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
705 public edit_distance_base<edit_traits::compute_score_matrix,
706 edit_distance_unbanded_score_matrix_policy,
707 edit_traits,
708 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
709 public edit_distance_base<edit_traits::compute_trace_matrix,
710 edit_distance_unbanded_trace_matrix_policy,
711 edit_traits,
712 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>
714{
715public:
716 using edit_traits::word_size;
717 using typename edit_traits::align_config_type;
718 using typename edit_traits::database_type;
719 using typename edit_traits::query_type;
720 using typename edit_traits::score_type;
721 using typename edit_traits::word_type;
722
723private:
725 template <typename other_derived_t, typename other_edit_traits>
726 friend class edit_distance_unbanded_max_errors_policy;
728 template <typename other_derived_t, typename other_edit_traits>
729 friend class edit_distance_unbanded_global_policy;
731 template <typename other_derived_t, typename other_edit_traits>
732 friend class edit_distance_unbanded_semi_global_policy;
734 template <typename other_derived_t, typename other_edit_traits>
735 friend class edit_distance_unbanded_score_matrix_policy;
737 template <typename other_derived_t, typename other_edit_traits>
738 friend class edit_distance_unbanded_trace_matrix_policy;
739
740 using edit_traits::compute_begin_positions;
741 using edit_traits::compute_end_positions;
742 using edit_traits::compute_matrix;
743 using edit_traits::compute_score;
744 using edit_traits::compute_score_matrix;
745 using edit_traits::compute_sequence_alignment;
746 using edit_traits::compute_trace_matrix;
747 using edit_traits::is_global;
748 using edit_traits::is_semi_global;
749 using edit_traits::use_max_errors;
750 using typename edit_traits::alignment_result_type;
751 using typename edit_traits::database_iterator;
752 using typename edit_traits::query_alphabet_type;
753 using typename edit_traits::score_matrix_type;
754 using typename edit_traits::trace_matrix_type;
755
757 database_t database;
759 query_t query;
761 align_config_t config;
762
764 static constexpr word_type hp0 = is_global ? 1u : 0u;
766 static constexpr word_type hn0 = 0u;
768 static constexpr word_type vp0 = ~word_type{0u};
770 static constexpr word_type vn0 = 0u;
771
773 score_type _score{};
780 word_type score_mask{0u};
791 std::vector<word_type> bit_masks{};
792
794 database_iterator database_it{};
796 database_iterator database_it_end{};
797
799 struct compute_state_trace_matrix
800 {
802 proxy_reference<word_type> db{};
803 };
804
806 struct compute_state : enable_state_t<edit_traits::compute_trace_matrix, compute_state_trace_matrix>
807 {
810
812 word_type b{};
814 word_type d0{};
816 hp_type hp{};
818 word_type hn{};
820 proxy_reference<word_type> vp{};
822 proxy_reference<word_type> vn{};
824 word_type carry_d0{};
826 word_type carry_hp{hp0};
828 word_type carry_hn{};
829 };
830
832 void add_state()
833 {
834 if constexpr (!use_max_errors && compute_score_matrix)
835 this->_score_matrix.add_column(vp, vn);
836
837 if constexpr (!use_max_errors && compute_trace_matrix)
838 this->_trace_matrix.add_column(this->hp, this->db, vp);
839
840 if constexpr (use_max_errors && compute_matrix)
841 {
842 size_t max_rows = this->max_rows(score_mask, this->last_block, _score, this->max_errors);
843 if constexpr (compute_score_matrix)
844 this->_score_matrix.add_column(vp, vn, max_rows);
845
846 if constexpr (compute_trace_matrix)
847 this->_trace_matrix.add_column(this->hp, this->db, vp, max_rows);
848 }
849 }
850
851public:
856 edit_distance_unbanded() = delete;
857 edit_distance_unbanded(edit_distance_unbanded const &) = default;
858 edit_distance_unbanded(edit_distance_unbanded &&) = default;
859 edit_distance_unbanded & operator=(edit_distance_unbanded const &) = default;
860 edit_distance_unbanded & operator=(edit_distance_unbanded &&) = default;
861 ~edit_distance_unbanded() = default;
862
869 edit_distance_unbanded(database_t _database,
870 query_t _query,
871 align_config_t _config,
872 edit_traits const & SEQAN3_DOXYGEN_ONLY(_traits)) :
873 database{std::forward<database_t>(_database)},
874 query{std::forward<query_t>(_query)},
875 config{std::forward<align_config_t>(_config)},
876 _score{static_cast<score_type>(std::ranges::size(query))},
877 database_it{std::ranges::begin(database)},
878 database_it_end{std::ranges::end(database)}
879 {
880 static constexpr size_t alphabet_size_ = alphabet_size<query_alphabet_type>;
881
882 size_t const block_count = (std::ranges::size(query) - 1u + word_size) / word_size;
883 score_mask = word_type{1u} << ((std::ranges::size(query) - 1u + word_size) % word_size);
884
885 this->score_init();
886 if constexpr (use_max_errors)
887 this->max_errors_init(block_count);
888
889 if constexpr (compute_score_matrix)
890 this->score_matrix_init();
891
892 if constexpr (compute_trace_matrix)
893 this->trace_matrix_init(block_count);
894
895 vp.resize(block_count, vp0);
896 vn.resize(block_count, vn0);
897 bit_masks.resize((alphabet_size_ + 1u) * block_count, 0u);
898
899 // encoding the letters as bit-vectors
900 for (size_t j = 0u; j < std::ranges::size(query); j++)
901 {
902 size_t const i = block_count * seqan3::to_rank(query[j]) + j / word_size;
903 bit_masks[i] |= word_type{1u} << (j % word_size);
904 }
905
906 add_state();
907 }
909
910private:
912 template <bool with_carry>
913 static void compute_step(compute_state & state) noexcept
914 {
915 word_type x, t;
916 assert(state.carry_d0 <= 1u);
917 assert(state.carry_hp <= 1u);
918 assert(state.carry_hn <= 1u);
919
920 x = state.b | state.vn;
921 t = state.vp + (x & state.vp) + state.carry_d0;
922
923 state.d0 = (t ^ state.vp) | x;
924 state.hn = state.vp & state.d0;
925 state.hp = state.vn | ~(state.vp | state.d0);
926
927 if constexpr (with_carry)
928 state.carry_d0 = (state.carry_d0 != 0u) ? t <= state.vp : t < state.vp;
929
930 x = (state.hp << 1u) | state.carry_hp;
931 state.vn = x & state.d0;
932 state.vp = (state.hn << 1u) | ~(x | state.d0) | state.carry_hn;
933
934 if constexpr (with_carry)
935 {
936 state.carry_hp = state.hp >> (word_size - 1u);
937 state.carry_hn = state.hn >> (word_size - 1u);
938 }
939 }
940
942 template <bool with_carry>
943 void compute_kernel(compute_state & state, size_t const block_offset, size_t const current_block) noexcept
944 {
945 state.vp = proxy_reference<word_type>{this->vp[current_block]};
946 state.vn = proxy_reference<word_type>{this->vn[current_block]};
947 if constexpr (compute_trace_matrix)
948 {
949 state.hp = proxy_reference<word_type>{this->hp[current_block]};
950 state.db = proxy_reference<word_type>{this->db[current_block]};
951 }
952 state.b = bit_masks[block_offset + current_block];
953
954 compute_step<with_carry>(state);
955 if constexpr (compute_trace_matrix)
956 state.db = ~(state.b ^ state.d0);
957 }
958
960 void advance_score(word_type P, word_type N, word_type mask) noexcept
961 {
962 if ((P & mask) != word_type{0u})
963 _score++;
964 else if ((N & mask) != word_type{0u})
965 _score--;
966 }
967
969 bool on_hit() noexcept
970 {
971 // TODO: call external on_hit functor
972 return false;
973 }
974
976 inline bool small_patterns();
977
979 inline bool large_patterns();
980
982 inline void compute_empty_query_sequence()
983 {
984 assert(std::ranges::empty(query));
985
986 bool abort_computation = false;
987
988 for (; database_it != database_it_end; ++database_it)
989 {
990 if constexpr (is_global)
991 ++_score;
992 else // is_semi_global
993 this->update_best_score();
994
995 // call on_hit
996 if constexpr (use_max_errors)
997 abort_computation = on_hit();
998
999 this->add_state();
1000 if (abort_computation)
1001 break;
1002 }
1003 }
1004
1006 void compute()
1007 {
1008 // limit search width for prefix search (if no matrix needs to be computed)
1009 if constexpr (use_max_errors && is_global && !compute_matrix)
1010 {
1011 // Note: For global alignments we know that the database can only be max_length long to have a score less
1012 // than or equal max_errors in the last cell.
1013 //
1014 // The following matrix shows a minimal value for each entry (because a diagonal must be +0 or +1, each
1015 // diagonal is at least the value of the initial value in the first row)
1016 // 0 1 2 3 4 5 6...
1017 // 1 0 1 2 3 4 5...
1018 // ...
1019 // m ... 3 2 1 0 1 2 3 4 5
1020 // Thus, after |query| + max_errors entries the score will always be higher than max_errors.
1021 size_t const max_length = std::ranges::size(query) + this->max_errors + 1u;
1022 size_t const haystack_length = std::min(std::ranges::size(database), max_length);
1023 database_it_end -= std::ranges::size(database) - haystack_length;
1024 }
1025
1026 // distinguish between the version for needles not longer than
1027 // one machine word and the version for longer needles
1028 // A special cases is if the second sequence is empty (vp.size() == 0u).
1029 if (vp.size() == 0u) // [[unlikely]]
1030 compute_empty_query_sequence();
1031 else if (vp.size() == 1u)
1032 small_patterns();
1033 else
1034 large_patterns();
1035
1036 if constexpr (is_global)
1037 this->update_best_score();
1038 }
1039
1040public:
1045 template <typename callback_t>
1046 void operator()([[maybe_unused]] size_t const idx, callback_t && callback)
1047 {
1048 using traits_type = alignment_configuration_traits<align_config_t>;
1049 using result_value_type = typename alignment_result_value_type_accessor<alignment_result_type>::type;
1050
1051 compute();
1052
1053 // First cache the begin and end positions if enabled by the edit distance traits.
1054 // Note, that they might be activated even if the user did not configure them, but in order to
1055 // compute for example the alignment both information are required and enabled internally.
1056 auto cached_end_positions = this->invalid_coordinate();
1057 auto cached_begin_positions = this->invalid_coordinate();
1058
1059 if constexpr (compute_end_positions)
1060 cached_end_positions = this->end_positions();
1061
1062 if constexpr (compute_begin_positions && !compute_sequence_alignment)
1063 {
1064 static_assert(compute_end_positions, "End positions required to compute the begin positions.");
1065 cached_begin_positions = this->begin_positions();
1066 }
1067
1068 // Fill the result value type. Note we need to ask what was enabled on the user side in order to store
1069 // the correct information in the alignment result type. This information is stored in the alignment
1070 // configuration traits and not in the edit distance traits.
1071 result_value_type res_vt{};
1072
1073 if constexpr (traits_type::output_sequence1_id)
1074 res_vt.sequence1_id = idx;
1075
1076 if constexpr (traits_type::output_sequence2_id)
1077 res_vt.sequence2_id = idx;
1078
1079 if constexpr (traits_type::compute_score)
1080 res_vt.score = this->score().value_or(matrix_inf<score_type>);
1081
1082 if constexpr (traits_type::compute_sequence_alignment)
1083 {
1084 if (this->is_valid())
1085 {
1086 auto [first, second] = cached_end_positions;
1087 detail::matrix_coordinate const end_positions{detail::row_index_type{second},
1088 detail::column_index_type{first}};
1089
1090 aligned_sequence_builder builder{this->database, this->query};
1091 auto trace_res = builder(this->trace_matrix().trace_path(end_positions));
1092 res_vt.alignment = std::move(trace_res.alignment);
1093 cached_begin_positions.first = trace_res.first_sequence_slice_positions.first;
1094 cached_begin_positions.second = trace_res.second_sequence_slice_positions.first;
1095 }
1096 }
1097
1098 if constexpr (traits_type::compute_end_positions)
1099 res_vt.end_positions = std::move(cached_end_positions);
1100
1101 if constexpr (traits_type::compute_begin_positions)
1102 res_vt.begin_positions = std::move(cached_begin_positions);
1103
1104 callback(alignment_result_type{std::move(res_vt)});
1105 }
1106};
1107
1108template <std::ranges::viewable_range database_t,
1109 std::ranges::viewable_range query_t,
1110 typename align_config_t,
1111 typename traits_t>
1112bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::small_patterns()
1113{
1114 bool abort_computation = false;
1115
1116 // computing the blocks
1117 while (database_it != database_it_end)
1118 {
1119 compute_state state{};
1120 size_t const block_offset = seqan3::to_rank((query_alphabet_type)*database_it);
1121
1122 compute_kernel<false>(state, block_offset, 0u);
1123 advance_score(state.hp, state.hn, score_mask);
1124
1125 // semi-global without max_errors guarantees that the score stays within the last row
1126 if constexpr (is_semi_global && !use_max_errors)
1127 this->update_best_score();
1128
1129 // updating the last active cell
1130 if constexpr (use_max_errors)
1131 abort_computation = this->update_last_active_cell();
1132
1133 add_state();
1134 ++database_it;
1135 if (abort_computation)
1136 return true;
1137 }
1138
1139 return false;
1140}
1141
1142template <std::ranges::viewable_range database_t,
1143 std::ranges::viewable_range query_t,
1144 typename align_config_t,
1145 typename traits_t>
1146bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::large_patterns()
1147{
1148 bool abort_computation = false;
1149
1150 while (database_it != database_it_end)
1151 {
1152 compute_state state{};
1153 size_t const block_offset = vp.size() * seqan3::to_rank((query_alphabet_type)*database_it);
1154
1155 size_t block_count = vp.size();
1156 if constexpr (use_max_errors)
1157 block_count = this->last_block + 1;
1158
1159 // compute each block in the current column; carries between blocks will be propagated.
1160 for (size_t current_block = 0u; current_block < block_count; current_block++)
1161 compute_kernel<true>(state, block_offset, current_block);
1162
1163 advance_score(state.hp, state.hn, score_mask);
1164
1165 // semi-global without max_errors guarantees that the score stays within the last row
1166 if constexpr (is_semi_global && !use_max_errors)
1167 this->update_best_score();
1168
1169 if constexpr (use_max_errors)
1170 {
1171 // if the last active cell reached the end within the current block we have to compute the next block.
1172 bool additional_block = score_mask >> (word_size - 1u);
1173 bool reached_last_block = this->last_block + 1u == vp.size();
1174 // If there is no next block we skip the computation.
1175 if (reached_last_block)
1176 additional_block = false;
1177
1178 if (additional_block)
1179 {
1180 size_t const current_block = this->last_block + 1u;
1181 // this might not be necessary, but carry_d0 = 1u might have an influence on the result of vn and vp.
1182 vp[current_block] = vp0;
1183 vn[current_block] = vn0;
1184 compute_kernel<false>(state, block_offset, current_block);
1185 }
1186
1187 // updating the last active cell
1188 abort_computation = this->update_last_active_cell();
1189 }
1190
1191 add_state();
1192 ++database_it;
1193
1194 if (abort_computation)
1195 return true;
1196 }
1197
1198 return false;
1199}
1200
1207template <typename database_t, typename query_t, typename config_t, typename traits_t>
1208edit_distance_unbanded(database_t && database, query_t && query, config_t config, traits_t)
1209 -> edit_distance_unbanded<database_t, query_t, config_t, traits_t>;
1211
1212} // namespace seqan3::detail
Provides seqan3::detail::advanceable_alignment_coordinate.
Provides seqan3::alignment_result.
T begin(T... args)
Provides seqan3::configuration and utility functions.
Forwards for seqan3::edit_distance_unbanded related types.
Provides seqan3::detail::edit_distance_score_matrix_full.
Provides seqan3::detail::edit_distance_trace_matrix_full.
T end(T... args)
T forward(T... args)
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
@ alignment
The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:146
Provides seqan3::detail::matrix.
T min(T... args)
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:415
SeqAn specific customisations in the standard namespace.