SeqAn3 3.1.0
The Modern C++ library for sequence analysis.
edit_distance_unbanded.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <algorithm>
16#include <seqan3/std/algorithm>
17#include <bitset>
18#include <seqan3/std/ranges>
19#include <utility>
20
28
29namespace seqan3::detail
30{
34template <typename derived_t, typename edit_traits>
35class edit_distance_unbanded_max_errors_policy :
37 edit_traits
39{
40protected:
41 static_assert(edit_traits::use_max_errors, "This policy assumes that edit_traits::use_max_errors is true.");
42
44 friend derived_t;
45
49 edit_distance_unbanded_max_errors_policy() noexcept = default;
50 edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy const &) noexcept
51 = default;
52 edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy &&) noexcept
53 = default;
54 edit_distance_unbanded_max_errors_policy & operator=(edit_distance_unbanded_max_errors_policy const &) noexcept
55 = default;
56 edit_distance_unbanded_max_errors_policy & operator=(edit_distance_unbanded_max_errors_policy &&) noexcept
57 = default;
58 ~edit_distance_unbanded_max_errors_policy() noexcept = default;
59
61
62 using typename edit_traits::word_type;
63 using typename edit_traits::score_type;
64
70 score_type max_errors{255};
72 size_t last_block{0u};
74 word_type last_score_mask{};
76
82 void max_errors_init(size_t block_count) noexcept
83 {
84 derived_t * self = static_cast<derived_t *>(this);
85
86 max_errors = -get<align_cfg::min_score>(self->config).score;
87
88 assert(max_errors >= score_type{0});
89
90 if (std::ranges::empty(self->query)) // [[unlikely]]
91 {
92 last_block = 0u;
93 self->score_mask = 0u;
94 last_score_mask = self->score_mask;
95 return;
96 }
97
98 last_block = block_count - 1u;
99 last_score_mask = self->score_mask;
100
101 // local_max_errors either stores the maximal number of _score (me.max_errors) or the needle size minus one. It
102 // is used for the mask computation and setting the initial score (the minus one is there because of the Ukkonen
103 // trick).
104 size_t const local_max_errors = std::min<size_t>(max_errors, std::ranges::size(self->query) - 1u);
105 self->score_mask = word_type{1u} << (local_max_errors % self->word_size);
106 last_block = std::min(local_max_errors / self->word_size, last_block);
107 self->_score = local_max_errors + 1u;
108 }
109
111 bool is_last_active_cell_within_last_row() const noexcept
112 {
113 derived_t const * self = static_cast<derived_t const *>(this);
114 return (self->score_mask == this->last_score_mask) && (this->last_block == self->vp.size() - 1u);
115 }
116
118 bool prev_last_active_cell() noexcept
119 {
120 derived_t * self = static_cast<derived_t *>(this);
121 self->score_mask >>= 1u;
122 if (self->score_mask != 0u)
123 return true;
124
125 if constexpr (edit_traits::is_global)
126 {
127 if (last_block == 0u) // [[unlikely]]
128 return false;
129 }
130
131 last_block--;
132
133 self->score_mask = word_type{1u} << (edit_traits::word_size - 1u);
134 return true;
135 }
136
138 void next_last_active_cell() noexcept
139 {
140 derived_t * self = static_cast<derived_t *>(this);
141 self->score_mask <<= 1u;
142 if (self->score_mask)
143 return;
144
145 self->score_mask = 1u;
146 last_block++;
147 }
148
152 bool update_last_active_cell() noexcept
153 {
154 derived_t * self = static_cast<derived_t *>(this);
155 // update the last active cell
156 while (!(self->_score <= max_errors))
157 {
158 self->advance_score(self->vn[last_block], self->vp[last_block], self->score_mask);
159 if (!prev_last_active_cell())
160 {
161 // prev_last_active_cell = false can only happen for global alignments
162 assert(edit_traits::is_global);
163 // we abort here if we don't need to compute a matrix, because the continued
164 // computation can't produce an alignment.
165 return !edit_traits::compute_matrix;
166 }
167 }
168
169 if (is_last_active_cell_within_last_row())
170 {
171 assert(self->_score <= max_errors);
172
173 if constexpr(edit_traits::is_semi_global)
174 self->update_best_score();
175
176 return self->on_hit();
177 }
178 else
179 {
180 next_last_active_cell();
181 self->advance_score(self->vp[last_block], self->vn[last_block], self->score_mask);
182 }
183
184 return false;
185 }
187
189 static size_t max_rows(word_type const score_mask, unsigned const last_block,
190 score_type const score, score_type const max_errors) noexcept
191 {
192 using score_matrix_type = typename edit_traits::score_matrix_type;
193 return score_matrix_type::max_rows(score_mask,
194 last_block,
195 score,
196 max_errors);
197 }
198};
199
203template <typename derived_t, typename edit_traits>
204class edit_distance_unbanded_global_policy :
206 edit_traits
208{
209protected:
210 static_assert(edit_traits::is_global || edit_traits::is_semi_global,
211 "This policy assumes that edit_traits::is_global or edit_traits::is_semi_global is true.");
212
214 friend derived_t;
215
219 edit_distance_unbanded_global_policy() noexcept = default;
220 edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy const &) noexcept
221 = default;
222 edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy &&) noexcept
223 = default;
224 edit_distance_unbanded_global_policy & operator=(edit_distance_unbanded_global_policy const &) noexcept
225 = default;
226 edit_distance_unbanded_global_policy & operator=(edit_distance_unbanded_global_policy &&) noexcept
227 = default;
228 ~edit_distance_unbanded_global_policy() noexcept = default;
229
231
233 using typename edit_traits::score_type;
234
243 score_type _best_score{};
245
251 void score_init() noexcept
252 {
253 derived_t const * self = static_cast<derived_t const *>(this);
254 _best_score = self->_score;
255 }
256
258 bool is_valid() const noexcept
259 {
260 [[maybe_unused]] derived_t const * self = static_cast<derived_t const *>(this);
261 // This condition uses the observation that after each computation of a column, _score has either the initial
262 // value of the first row (i.e. the entire column consist of INF's), has the value _score = max_errors + 1
263 // (there exists a cell within the column that has value <= max_errors, but is not on the last row) or _score <=
264 // max_errors (the score of the last active cell is <= max_errors)
265 if constexpr(edit_traits::use_max_errors)
266 return _best_score <= self->max_errors;
267
268 // When not using max_errors there is always a valid alignment, because the last row will always be updated and
269 // with it the score.
270 return true;
271 }
272
274 seqan3::detail::advanceable_alignment_coordinate<> invalid_coordinate() const noexcept
275 {
276 derived_t const * self = static_cast<derived_t const *>(this);
277 return {column_index_type{std::ranges::size(self->database)}, row_index_type{std::ranges::size(self->query)}};
278 }
279
281 void update_best_score() noexcept
282 {
283 derived_t const * self = static_cast<derived_t const *>(this);
284 _best_score = self->_score;
285 }
286
288 size_t end_positions_first() const noexcept
289 {
290 derived_t const * self = static_cast<derived_t const *>(this);
291 return std::ranges::size(self->database);
292 }
294
295public:
302 std::optional<score_type> score() const noexcept
303 {
304 derived_t const * self = static_cast<derived_t const *>(this);
305 static_assert(edit_traits::compute_score, "score() can only be computed if you specify the result type within "
306 "your alignment config.");
307 if (!self->is_valid())
308 return std::nullopt;
309
310 return -_best_score;
311 }
312
315 seqan3::detail::advanceable_alignment_coordinate<> end_positions() const noexcept
316 {
317 derived_t const * self = static_cast<derived_t const *>(this);
318 static_assert(edit_traits::compute_end_positions, "end_positions() can only be computed if you specify the "
319 "result type within your alignment config.");
320 if (!self->is_valid())
321 return self->invalid_coordinate();
322
323 column_index_type const first{self->end_positions_first()};
324 row_index_type const second{std::ranges::size(self->query)};
325 return {first, second};
326 }
328};
329
331template <typename derived_t, typename edit_traits>
332class edit_distance_unbanded_semi_global_policy :
333 public edit_distance_unbanded_global_policy<derived_t, edit_traits>
334{
335protected:
336 static_assert(edit_traits::is_semi_global, "This policy assumes that edit_traits::is_semi_global is true.");
337
339 friend derived_t;
340
344 edit_distance_unbanded_semi_global_policy() noexcept = default;
345 edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy const &) noexcept
346 = default;
347 edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy &&) noexcept
348 = default;
349 edit_distance_unbanded_semi_global_policy & operator=(edit_distance_unbanded_semi_global_policy const &) noexcept
350 = default;
351 edit_distance_unbanded_semi_global_policy & operator=(edit_distance_unbanded_semi_global_policy &&) noexcept
352 = default;
353 ~edit_distance_unbanded_semi_global_policy() noexcept = default;
354
356
358 using base_t = edit_distance_unbanded_global_policy<derived_t, edit_traits>;
360 using database_iterator = typename edit_traits::database_iterator;
361 using base_t::_best_score;
363
375 database_iterator _best_score_col{};
377
383 void score_init() noexcept
384 {
385 derived_t const * self = static_cast<derived_t const *>(this);
386 base_t::score_init();
387 _best_score_col = self->database_it_end;
388 }
389
391 void update_best_score() noexcept
392 {
393 derived_t const * self = static_cast<derived_t const *>(this);
394 // we have to make sure that update_best_score is only called after a score update within the last row.
395 if constexpr(edit_traits::use_max_errors)
396 {
397 assert(std::ranges::empty(self->query) || self->is_last_active_cell_within_last_row());
398 }
399
400 _best_score_col = (self->_score <= _best_score) ? self->database_it : _best_score_col;
401 _best_score = (self->_score <= _best_score) ? self->_score : _best_score;
402 }
403
405 size_t end_positions_first() const noexcept
406 {
407 derived_t const * self = static_cast<derived_t const *>(this);
408 // offset == 0u is a special case if database sequence is empty, because in this case the best column is zero.
409 size_t offset = std::ranges::empty(self->database) ? 0u : 1u;
410 return std::ranges::distance(std::ranges::begin(self->database), _best_score_col) + offset;
411 }
413};
414
418template <typename derived_t, typename edit_traits>
419class edit_distance_unbanded_score_matrix_policy :
421 edit_traits
423{
424protected:
425 static_assert(edit_traits::compute_score_matrix,
426 "This policy assumes that edit_traits::compute_score_matrix is true.");
427
429 friend derived_t;
430
434 edit_distance_unbanded_score_matrix_policy() noexcept = default;
435 edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy const &) noexcept
436 = default;
437 edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy &&) noexcept
438 = default;
439 edit_distance_unbanded_score_matrix_policy & operator=(edit_distance_unbanded_score_matrix_policy const &) noexcept
440 = default;
441 edit_distance_unbanded_score_matrix_policy & operator=(edit_distance_unbanded_score_matrix_policy &&) noexcept
442 = default;
443 ~edit_distance_unbanded_score_matrix_policy() noexcept = default;
444
446
447 using typename edit_traits::score_matrix_type;
448
454 score_matrix_type _score_matrix{};
456
469 void score_matrix_init()
470 {
471 derived_t const * self = static_cast<derived_t const *>(this);
472
473 _score_matrix = score_matrix_type{std::ranges::size(self->query) + 1u};
474 _score_matrix.reserve(std::ranges::size(self->database) + 1u);
475 }
477
478public:
486 score_matrix_type const & score_matrix() const noexcept
487 {
488 static_assert(edit_traits::compute_score_matrix, "score_matrix() can only be computed if you specify the "
489 "result type within your alignment config.");
490 return _score_matrix;
491 }
493};
494
498template <typename derived_t, typename edit_traits>
499class edit_distance_unbanded_trace_matrix_policy :
501 edit_traits
503{
504protected:
505 static_assert(edit_traits::compute_trace_matrix,
506 "This policy assumes that edit_traits::compute_trace_matrix is true.");
507
509 friend derived_t;
510
514 edit_distance_unbanded_trace_matrix_policy() noexcept = default;
515 edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy const &) noexcept
516 = default;
517 edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy &&) noexcept
518 = default;
519 edit_distance_unbanded_trace_matrix_policy & operator=(edit_distance_unbanded_trace_matrix_policy const &) noexcept
520 = default;
521 edit_distance_unbanded_trace_matrix_policy & operator=(edit_distance_unbanded_trace_matrix_policy &&) noexcept
522 = default;
523 ~edit_distance_unbanded_trace_matrix_policy() noexcept = default;
524
526
527 using typename edit_traits::word_type;
528 using typename edit_traits::trace_matrix_type;
529 using typename edit_traits::alignment_result_type;
530
536 std::vector<word_type> hp{};
539
541 trace_matrix_type _trace_matrix{};
543
556 void trace_matrix_init(size_t block_count)
557 {
558 derived_t const * self = static_cast<derived_t const *>(this);
559
560 _trace_matrix = trace_matrix_type{std::ranges::size(self->query) + 1u};
561 _trace_matrix.reserve(std::ranges::size(self->database) + 1u);
562
563 hp.resize(block_count, 0u);
564 db.resize(block_count, 0u);
565 }
567
568public:
574 trace_matrix_type const & trace_matrix() const noexcept
575 {
576 static_assert(edit_traits::compute_trace_matrix, "trace_matrix() can only be computed if you specify the "
577 "result type within your alignment config.");
578 return _trace_matrix;
579 }
580
583 seqan3::detail::advanceable_alignment_coordinate<> begin_positions() const noexcept
584 {
585 derived_t const * self = static_cast<derived_t const *>(this);
586 static_assert(edit_traits::compute_begin_positions, "begin_positions() can only be computed if you specify the "
587 "result type within your alignment config.");
588 if (!self->is_valid())
589 return self->invalid_coordinate();
590
591 auto [first, second] = self->end_positions();
592 detail::matrix_coordinate const end_positions{detail::row_index_type{second}, detail::column_index_type{first}};
593 auto trace_path = self->trace_matrix().trace_path(end_positions);
594 auto trace_path_it = std::ranges::begin(trace_path);
595 std::ranges::advance(trace_path_it, std::ranges::end(trace_path));
596 detail::matrix_coordinate const begin_positions = trace_path_it.coordinate();
597 return {detail::column_index_type{begin_positions.col}, detail::row_index_type{begin_positions.row}};
598 }
599
602 auto alignment() const noexcept
603 {
605
606 derived_t const * self = static_cast<derived_t const *>(this);
607 static_assert(edit_traits::compute_sequence_alignment, "alignment() can only be computed if you specify the "
608 "result type within your alignment config.");
609
610 if (!self->is_valid())
611 return alignment_t{};
612
613 auto [first, second] = self->end_positions();
614 detail::matrix_coordinate const end_positions{detail::row_index_type{second}, detail::column_index_type{first}};
615 aligned_sequence_builder builder{self->database, self->query};
616 auto trace_path = self->trace_matrix().trace_path(end_positions);
617 return builder(trace_path).alignment;
618 }
620};
621
625template <typename value_t>
626class proxy_reference
627{
628public:
632 proxy_reference() noexcept = default;
633 proxy_reference(proxy_reference const &) noexcept = default;
634 proxy_reference(proxy_reference &&) noexcept = default;
635 proxy_reference & operator=(proxy_reference const &) noexcept = default;
636 proxy_reference & operator=(proxy_reference &&) noexcept = default;
637 ~proxy_reference() = default;
638
640 proxy_reference(value_t & t) noexcept
641 : ptr(std::addressof(t))
642 {}
643
644 proxy_reference(value_t &&) = delete;
645
647 template <typename other_value_t>
649 requires std::convertible_to<other_value_t, value_t>
651 proxy_reference & operator=(other_value_t && u) noexcept
652 {
653 get() = std::forward<other_value_t>(u);
654 return *this;
655 }
657
659 value_t & get() const noexcept
660 {
661 assert(ptr != nullptr);
662 return *ptr;
663 }
664
666 operator value_t & () const noexcept
667 {
668 return get();
669 }
670
671private:
673 value_t * ptr{nullptr};
674};
675
687template <std::ranges::viewable_range database_t,
688 std::ranges::viewable_range query_t,
689 typename align_config_t,
690 typename edit_traits>
691class edit_distance_unbanded :
693// Hide this section in doxygen, because it messes up the inheritance.
694 public edit_distance_base<
695 edit_traits::use_max_errors,
696 edit_distance_unbanded_max_errors_policy,
697 edit_traits,
698 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
699 public edit_distance_base<
700 edit_traits::is_global,
701 edit_distance_unbanded_global_policy,
702 edit_traits,
703 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
704 public edit_distance_base<
705 edit_traits::is_semi_global,
706 edit_distance_unbanded_semi_global_policy,
707 edit_traits,
708 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
709 public edit_distance_base<
710 edit_traits::compute_score_matrix,
711 edit_distance_unbanded_score_matrix_policy,
712 edit_traits,
713 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
714 public edit_distance_base<
715 edit_traits::compute_trace_matrix,
716 edit_distance_unbanded_trace_matrix_policy,
717 edit_traits,
718 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>
720{
721public:
722 using typename edit_traits::word_type;
723 using typename edit_traits::score_type;
724 using typename edit_traits::database_type;
725 using typename edit_traits::query_type;
726 using typename edit_traits::align_config_type;
727 using edit_traits::word_size;
728
729private:
731 template <typename other_derived_t, typename other_edit_traits>
732 friend class edit_distance_unbanded_max_errors_policy;
734 template <typename other_derived_t, typename other_edit_traits>
735 friend class edit_distance_unbanded_global_policy;
737 template <typename other_derived_t, typename other_edit_traits>
738 friend class edit_distance_unbanded_semi_global_policy;
740 template <typename other_derived_t, typename other_edit_traits>
741 friend class edit_distance_unbanded_score_matrix_policy;
743 template <typename other_derived_t, typename other_edit_traits>
744 friend class edit_distance_unbanded_trace_matrix_policy;
745
746 using typename edit_traits::database_iterator;
747 using typename edit_traits::query_alphabet_type;
748 using typename edit_traits::alignment_result_type;
749 using edit_traits::use_max_errors;
750 using edit_traits::is_semi_global;
751 using edit_traits::is_global;
752 using edit_traits::compute_score;
753 using edit_traits::compute_end_positions;
754 using edit_traits::compute_begin_positions;
755 using edit_traits::compute_sequence_alignment;
756 using edit_traits::compute_score_matrix;
757 using edit_traits::compute_trace_matrix;
758 using edit_traits::compute_matrix;
759 using typename edit_traits::score_matrix_type;
760 using typename edit_traits::trace_matrix_type;
761
763 database_t database;
765 query_t query;
767 align_config_t config;
768
770 static constexpr word_type hp0 = is_global ? 1u : 0u;
772 static constexpr word_type hn0 = 0u;
774 static constexpr word_type vp0 = ~word_type{0u};
776 static constexpr word_type vn0 = 0u;
777
779 score_type _score{};
786 word_type score_mask{0u};
797 std::vector<word_type> bit_masks{};
798
800 database_iterator database_it{};
802 database_iterator database_it_end{};
803
805 struct compute_state_trace_matrix
806 {
808 proxy_reference<word_type> db{};
809 };
810
812 struct compute_state : enable_state_t<edit_traits::compute_trace_matrix, compute_state_trace_matrix>
813 {
816
818 word_type b{};
820 word_type d0{};
822 hp_type hp{};
824 word_type hn{};
826 proxy_reference<word_type> vp{};
828 proxy_reference<word_type> vn{};
830 word_type carry_d0{};
832 word_type carry_hp{hp0};
834 word_type carry_hn{};
835 };
836
838 void add_state()
839 {
840 if constexpr(!use_max_errors && compute_score_matrix)
841 this->_score_matrix.add_column(vp, vn);
842
843 if constexpr(!use_max_errors && compute_trace_matrix)
844 this->_trace_matrix.add_column(this->hp, this->db, vp);
845
846 if constexpr(use_max_errors && compute_matrix)
847 {
848 size_t max_rows = this->max_rows(score_mask, this->last_block, _score, this->max_errors);
849 if constexpr(compute_score_matrix)
850 this->_score_matrix.add_column(vp, vn, max_rows);
851
852 if constexpr(compute_trace_matrix)
853 this->_trace_matrix.add_column(this->hp, this->db, vp, max_rows);
854 }
855 }
856
857public:
862 edit_distance_unbanded() = delete;
863 edit_distance_unbanded(edit_distance_unbanded const &) = default;
864 edit_distance_unbanded(edit_distance_unbanded &&) = default;
865 edit_distance_unbanded & operator=(edit_distance_unbanded const &) = default;
866 edit_distance_unbanded & operator=(edit_distance_unbanded &&) = default;
867 ~edit_distance_unbanded() = default;
868
875 edit_distance_unbanded(database_t _database,
876 query_t _query,
877 align_config_t _config,
878 edit_traits const & SEQAN3_DOXYGEN_ONLY(_traits)) :
879 database{std::forward<database_t>(_database)},
880 query{std::forward<query_t>(_query)},
881 config{std::forward<align_config_t>(_config)},
882 _score{static_cast<score_type>(std::ranges::size(query))},
883 database_it{ranges::begin(database)},
884 database_it_end{ranges::end(database)}
885 {
886 static constexpr size_t alphabet_size_ = alphabet_size<query_alphabet_type>;
887
888 size_t const block_count = (std::ranges::size(query) - 1u + word_size) / word_size;
889 score_mask = word_type{1u} << ((std::ranges::size(query) - 1u + word_size) % word_size);
890
891 this->score_init();
892 if constexpr(use_max_errors)
893 this->max_errors_init(block_count);
894
895 if constexpr(compute_score_matrix)
896 this->score_matrix_init();
897
898 if constexpr(compute_trace_matrix)
899 this->trace_matrix_init(block_count);
900
901 vp.resize(block_count, vp0);
902 vn.resize(block_count, vn0);
903 bit_masks.resize((alphabet_size_ + 1u) * block_count, 0u);
904
905 // encoding the letters as bit-vectors
906 for (size_t j = 0u; j < std::ranges::size(query); j++)
907 {
908 size_t const i = block_count * seqan3::to_rank(query[j]) + j / word_size;
909 bit_masks[i] |= word_type{1u} << (j % word_size);
910 }
911
912 add_state();
913 }
915
916private:
918 template <bool with_carry>
919 static void compute_step(compute_state & state) noexcept
920 {
921 word_type x, t;
922 assert(state.carry_d0 <= 1u);
923 assert(state.carry_hp <= 1u);
924 assert(state.carry_hn <= 1u);
925
926 x = state.b | state.vn;
927 t = state.vp + (x & state.vp) + state.carry_d0;
928
929 state.d0 = (t ^ state.vp) | x;
930 state.hn = state.vp & state.d0;
931 state.hp = state.vn | ~(state.vp | state.d0);
932
933 if constexpr(with_carry)
934 state.carry_d0 = (state.carry_d0 != 0u) ? t <= state.vp : t < state.vp;
935
936 x = (state.hp << 1u) | state.carry_hp;
937 state.vn = x & state.d0;
938 state.vp = (state.hn << 1u) | ~(x | state.d0) | state.carry_hn;
939
940 if constexpr(with_carry)
941 {
942 state.carry_hp = state.hp >> (word_size - 1u);
943 state.carry_hn = state.hn >> (word_size - 1u);
944 }
945 }
946
948 template <bool with_carry>
949 void compute_kernel(compute_state & state, size_t const block_offset, size_t const current_block) noexcept
950 {
951 state.vp = proxy_reference<word_type>{this->vp[current_block]};
952 state.vn = proxy_reference<word_type>{this->vn[current_block]};
953 if constexpr(compute_trace_matrix)
954 {
955 state.hp = proxy_reference<word_type>{this->hp[current_block]};
956 state.db = proxy_reference<word_type>{this->db[current_block]};
957 }
958 state.b = bit_masks[block_offset + current_block];
959
960 compute_step<with_carry>(state);
961 if constexpr(compute_trace_matrix)
962 state.db = ~(state.b ^ state.d0);
963 }
964
966 void advance_score(word_type P, word_type N, word_type mask) noexcept
967 {
968 if ((P & mask) != word_type{0u})
969 _score++;
970 else if ((N & mask) != word_type{0u})
971 _score--;
972 }
973
975 bool on_hit() noexcept
976 {
977 // TODO: call external on_hit functor
978 return false;
979 }
980
982 inline bool small_patterns();
983
985 inline bool large_patterns();
986
988 inline void compute_empty_query_sequence()
989 {
990 assert(std::ranges::empty(query));
991
992 bool abort_computation = false;
993
994 for (; database_it != database_it_end; ++database_it)
995 {
996 if constexpr(is_global)
997 ++_score;
998 else // is_semi_global
999 this->update_best_score();
1000
1001 // call on_hit
1002 if constexpr(use_max_errors)
1003 abort_computation = on_hit();
1004
1005 this->add_state();
1006 if (abort_computation)
1007 break;
1008 }
1009 }
1010
1012 void compute()
1013 {
1014 // limit search width for prefix search (if no matrix needs to be computed)
1015 if constexpr(use_max_errors && is_global && !compute_matrix)
1016 {
1017 // Note: For global alignments we know that the database can only be max_length long to have a score less
1018 // than or equal max_errors in the last cell.
1019 //
1020 // The following matrix shows a minimal value for each entry (because a diagonal must be +0 or +1, each
1021 // diagonal is at least the value of the initial value in the first row)
1022 // 0 1 2 3 4 5 6...
1023 // 1 0 1 2 3 4 5...
1024 // ...
1025 // m ... 3 2 1 0 1 2 3 4 5
1026 // Thus, after |query| + max_errors entries the score will always be higher than max_errors.
1027 size_t const max_length = std::ranges::size(query) + this->max_errors + 1u;
1028 size_t const haystack_length = std::min(std::ranges::size(database), max_length);
1029 database_it_end -= std::ranges::size(database) - haystack_length;
1030 }
1031
1032 // distinguish between the version for needles not longer than
1033 // one machine word and the version for longer needles
1034 // A special cases is if the second sequence is empty (vp.size() == 0u).
1035 if (vp.size() == 0u) // [[unlikely]]
1036 compute_empty_query_sequence();
1037 else if (vp.size() == 1u)
1038 small_patterns();
1039 else
1040 large_patterns();
1041
1042 if constexpr(is_global)
1043 this->update_best_score();
1044 }
1045
1046public:
1051 template <typename callback_t>
1052 void operator()([[maybe_unused]] size_t const idx, callback_t && callback)
1053 {
1054 using traits_type = alignment_configuration_traits<align_config_t>;
1055 using result_value_type = typename alignment_result_value_type_accessor<alignment_result_type>::type;
1056
1057 compute();
1058
1059 // First cache the begin and end positions if enabled by the edit distance traits.
1060 // Note, that they might be activated even if the user did not configure them, but in order to
1061 // compute for example the alignment both information are required and enabled internally.
1062 auto cached_end_positions = this->invalid_coordinate();
1063 auto cached_begin_positions = this->invalid_coordinate();
1064
1065 if constexpr (compute_end_positions)
1066 cached_end_positions = this->end_positions();
1067
1068 if constexpr (compute_begin_positions && !compute_sequence_alignment)
1069 {
1070 static_assert(compute_end_positions, "End positions required to compute the begin positions.");
1071 cached_begin_positions = this->begin_positions();
1072 }
1073
1074 // Fill the result value type. Note we need to ask what was enabled on the user side in order to store
1075 // the correct information in the alignment result type. This information is stored in the alignment
1076 // configuration traits and not in the edit distance traits.
1077 result_value_type res_vt{};
1078
1079 if constexpr (traits_type::output_sequence1_id)
1080 res_vt.sequence1_id = idx;
1081
1082 if constexpr (traits_type::output_sequence2_id)
1083 res_vt.sequence2_id = idx;
1084
1085 if constexpr (traits_type::compute_score)
1086 res_vt.score = this->score().value_or(matrix_inf<score_type>);
1087
1088 if constexpr (traits_type::compute_sequence_alignment)
1089 {
1090 if (this->is_valid())
1091 {
1092 auto [first, second] = cached_end_positions;
1093 detail::matrix_coordinate const end_positions{detail::row_index_type{second},
1094 detail::column_index_type{first}};
1095
1096 aligned_sequence_builder builder{this->database, this->query};
1097 auto trace_res = builder(this->trace_matrix().trace_path(end_positions));
1098 res_vt.alignment = std::move(trace_res.alignment);
1099 cached_begin_positions.first = trace_res.first_sequence_slice_positions.first;
1100 cached_begin_positions.second = trace_res.second_sequence_slice_positions.first;
1101 }
1102 }
1103
1104 if constexpr (traits_type::compute_end_positions)
1105 res_vt.end_positions = std::move(cached_end_positions);
1106
1107 if constexpr (traits_type::compute_begin_positions)
1108 res_vt.begin_positions = std::move(cached_begin_positions);
1109
1110 callback(alignment_result_type{std::move(res_vt)});
1111 }
1112};
1113
1114template <typename database_t, typename query_t, typename align_config_t, typename traits_t>
1115bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::small_patterns()
1116{
1117 bool abort_computation = false;
1118
1119 // computing the blocks
1120 while (database_it != database_it_end)
1121 {
1122 compute_state state{};
1123 size_t const block_offset = seqan3::to_rank((query_alphabet_type) *database_it);
1124
1125 compute_kernel<false>(state, block_offset, 0u);
1126 advance_score(state.hp, state.hn, score_mask);
1127
1128 // semi-global without max_errors guarantees that the score stays within the last row
1129 if constexpr(is_semi_global && !use_max_errors)
1130 this->update_best_score();
1131
1132 // updating the last active cell
1133 if constexpr(use_max_errors)
1134 abort_computation = this->update_last_active_cell();
1135
1136 add_state();
1137 ++database_it;
1138 if (abort_computation)
1139 return true;
1140 }
1141
1142 return false;
1143}
1144
1145template <typename database_t, typename query_t, typename align_config_t, typename traits_t>
1146bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::large_patterns()
1147{
1148 bool abort_computation = false;
1149
1150 while (database_it != database_it_end)
1151 {
1152 compute_state state{};
1153 size_t const block_offset = vp.size() * seqan3::to_rank((query_alphabet_type) *database_it);
1154
1155 size_t block_count = vp.size();
1156 if constexpr(use_max_errors)
1157 block_count = this->last_block + 1;
1158
1159 // compute each block in the current column; carries between blocks will be propagated.
1160 for (size_t current_block = 0u; current_block < block_count; current_block++)
1161 compute_kernel<true>(state, block_offset, current_block);
1162
1163 advance_score(state.hp, state.hn, score_mask);
1164
1165 // semi-global without max_errors guarantees that the score stays within the last row
1166 if constexpr(is_semi_global && !use_max_errors)
1167 this->update_best_score();
1168
1169 if constexpr(use_max_errors)
1170 {
1171 // if the last active cell reached the end within the current block we have to compute the next block.
1172 bool additional_block = score_mask >> (word_size - 1u);
1173 bool reached_last_block = this->last_block + 1u == vp.size();
1174 // If there is no next block we skip the computation.
1175 if (reached_last_block)
1176 additional_block = false;
1177
1178 if (additional_block)
1179 {
1180 size_t const current_block = this->last_block + 1u;
1181 // this might not be necessary, but carry_d0 = 1u might have an influence on the result of vn and vp.
1182 vp[current_block] = vp0;
1183 vn[current_block] = vn0;
1184 compute_kernel<false>(state, block_offset, current_block);
1185 }
1186
1187 // updating the last active cell
1188 abort_computation = this->update_last_active_cell();
1189 }
1190
1191 add_state();
1192 ++database_it;
1193
1194 if (abort_computation)
1195 return true;
1196 }
1197
1198 return false;
1199}
1200
1207template <typename database_t, typename query_t, typename config_t, typename traits_t>
1208edit_distance_unbanded(database_t && database, query_t && query, config_t config, traits_t)
1209 -> edit_distance_unbanded<database_t, query_t, config_t, traits_t>;
1211
1212} // namespace seqan3::detail
Provides seqan3::detail::advanceable_alignment_coordinate.
Provides seqan3::alignment_result.
T begin(T... args)
Provides seqan3::configuration and utility functions.
Forwards for seqan3::edit_distance_unbanded related types.
Provides seqan3::detail::edit_distance_score_matrix_full.
Provides seqan3::detail::edit_distance_trace_matrix_full.
T end(T... args)
T forward(T... args)
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
@ alignment
The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
Provides seqan3::detail::matrix.
T min(T... args)
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:429
SeqAn specific customisations in the standard namespace.
The <ranges> header from C++20's standard library.