19 #include <range/v3/algorithm/copy.hpp> 36 template <
typename derived_t,
typename edit_traits>
37 class edit_distance_unbanded_max_errors_policy :
42 static_assert(edit_traits::use_max_errors,
"This policy assumes that edit_traits::use_max_errors is true.");
50 edit_distance_unbanded_max_errors_policy() noexcept = default;
51 edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy const &) noexcept
53 edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy &&) noexcept
55 edit_distance_unbanded_max_errors_policy & operator=(edit_distance_unbanded_max_errors_policy const &) noexcept
57 edit_distance_unbanded_max_errors_policy & operator=(edit_distance_unbanded_max_errors_policy &&) noexcept
59 ~edit_distance_unbanded_max_errors_policy() noexcept = default;
62 using typename edit_traits::word_type;
63 using typename edit_traits::score_type;
69 score_type max_errors{255};
72 size_t last_block{0u};
74 word_type last_score_mask{};
81 void max_errors_init(
size_t block_count) noexcept
84 derived_t *
self =
static_cast<derived_t *
>(
this);
86 max_errors = get<align_cfg::max_error>(
self->config).value;
87 assert(max_errors >= score_type{0});
89 last_block = block_count - 1u;
90 last_score_mask =
self->score_mask;
95 size_t const local_max_errors = std::min<size_t>(max_errors,
std::ranges::size(self->query) - 1u);
96 self->score_mask = word_type{1u} << (local_max_errors %
self->word_size);
97 last_block =
std::min(local_max_errors / self->word_size, last_block);
98 self->_score = local_max_errors + 1u;
102 bool is_last_active_cell_within_last_row() const noexcept
104 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
105 return (self->score_mask == this->last_score_mask) && (this->last_block ==
self->vp.size() - 1u);
109 bool prev_last_active_cell() noexcept
111 derived_t *
self =
static_cast<derived_t *
>(
this);
112 self->score_mask >>= 1u;
113 if (self->score_mask != 0u)
116 if constexpr (edit_traits::is_global)
118 if (last_block == 0u)
124 self->score_mask = word_type{1u} << (edit_traits::word_size - 1u);
129 void next_last_active_cell() noexcept
131 derived_t *
self =
static_cast<derived_t *
>(
this);
132 self->score_mask <<= 1u;
133 if (self->score_mask)
136 self->score_mask = 1u;
143 bool update_last_active_cell() noexcept
145 derived_t *
self =
static_cast<derived_t *
>(
this);
147 while (!(self->_score <= max_errors))
149 self->advance_score(self->vn[last_block], self->vp[last_block], self->score_mask);
150 if (!prev_last_active_cell())
153 assert(edit_traits::is_global);
156 return !edit_traits::compute_matrix;
160 if (is_last_active_cell_within_last_row())
162 assert(self->_score <= max_errors);
164 if constexpr(edit_traits::is_semi_global)
165 self->update_best_score();
167 return self->on_hit();
171 next_last_active_cell();
172 self->advance_score(self->vp[last_block], self->vn[last_block], self->score_mask);
180 static size_t max_rows(word_type
const score_mask,
unsigned const last_block,
181 score_type
const score, score_type
const max_errors) noexcept
183 using score_matrix_type =
typename edit_traits::score_matrix_type;
184 return score_matrix_type::max_rows(score_mask,
194 template <
typename derived_t,
typename edit_traits>
195 class edit_distance_unbanded_global_policy :
200 static_assert(edit_traits::is_global || edit_traits::is_semi_global,
201 "This policy assumes that edit_traits::is_global or edit_traits::is_semi_global is true.");
209 edit_distance_unbanded_global_policy() noexcept = default;
210 edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy const &) noexcept
212 edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy &&) noexcept
214 edit_distance_unbanded_global_policy & operator=(edit_distance_unbanded_global_policy const &) noexcept
216 edit_distance_unbanded_global_policy & operator=(edit_distance_unbanded_global_policy &&) noexcept
218 ~edit_distance_unbanded_global_policy() noexcept = default;
222 using typename edit_traits::score_type;
232 score_type _best_score{};
239 void score_init() noexcept
242 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
243 _best_score =
self->_score;
247 bool is_valid() const noexcept
249 [[maybe_unused]] derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
254 if constexpr(edit_traits::use_max_errors)
255 return _best_score <=
self->max_errors;
263 alignment_coordinate invalid_coordinate() const noexcept
265 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
270 void update_best_score() noexcept
272 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
273 _best_score =
self->_score;
277 size_t best_score_column() const noexcept
279 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
293 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
294 static_assert(edit_traits::compute_score,
"score() can only be computed if you specify the result type within " 295 "your alignment config.");
296 if (!self->is_valid())
304 alignment_coordinate back_coordinate() const noexcept
306 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
307 static_assert(edit_traits::compute_back_coordinate,
"back_coordinate() can only be computed if you specify the" 308 "result type within your alignment config.");
309 if (!self->is_valid())
310 return self->invalid_coordinate();
312 size_t col =
self->best_score_column();
313 return {column_index_type{col}, row_index_type{
std::ranges::size(self->query) - 1u}};
319 template <
typename derived_t,
typename edit_traits>
320 class edit_distance_unbanded_semi_global_policy :
321 public edit_distance_unbanded_global_policy<derived_t, edit_traits>
323 static_assert(edit_traits::is_semi_global,
"This policy assumes that edit_traits::is_semi_global is true.");
331 edit_distance_unbanded_semi_global_policy() noexcept = default;
332 edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy const &) noexcept
334 edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy &&) noexcept
336 edit_distance_unbanded_semi_global_policy & operator=(edit_distance_unbanded_semi_global_policy const &) noexcept
338 edit_distance_unbanded_semi_global_policy & operator=(edit_distance_unbanded_semi_global_policy &&) noexcept
340 ~edit_distance_unbanded_semi_global_policy() noexcept = default;
344 using base_t = edit_distance_unbanded_global_policy<derived_t, edit_traits>;
346 using typename base_t::database_iterator;
347 using base_t::_best_score;
361 database_iterator _best_score_col{};
368 void score_init() noexcept
371 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
372 base_t::score_init();
373 _best_score_col =
self->database_it_end;
377 bool is_valid() const noexcept
379 [[maybe_unused]] derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
382 if constexpr(edit_traits::use_max_errors)
383 return _best_score_col !=
self->database_it_end;
391 void update_best_score() noexcept
393 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
396 if constexpr(edit_traits::use_max_errors)
398 assert(self->is_last_active_cell_within_last_row());
401 _best_score_col = (
self->_score <= _best_score) ? self->database_it : _best_score_col;
402 _best_score = (self->_score <= _best_score) ?
self->_score : _best_score;
406 size_t best_score_column() const noexcept
408 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
417 template <
typename derived_t,
typename edit_traits>
418 class edit_distance_unbanded_score_matrix_policy :
423 static_assert(edit_traits::compute_score_matrix,
424 "This policy assumes that edit_traits::compute_score_matrix is true.");
432 edit_distance_unbanded_score_matrix_policy() noexcept = default;
433 edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy const &) noexcept
435 edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy &&) noexcept
437 edit_distance_unbanded_score_matrix_policy & operator=(edit_distance_unbanded_score_matrix_policy const &) noexcept
439 edit_distance_unbanded_score_matrix_policy & operator=(edit_distance_unbanded_score_matrix_policy &&) noexcept
441 ~edit_distance_unbanded_score_matrix_policy() noexcept = default;
444 using typename edit_traits::score_matrix_type;
450 score_matrix_type _score_matrix{};
466 void score_matrix_init()
468 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
483 score_matrix_type
const & score_matrix() const noexcept
485 static_assert(edit_traits::compute_score_matrix,
"score_matrix() can only be computed if you specify the " 486 "result type within your alignment config.");
487 return _score_matrix;
495 template <
typename derived_t,
typename edit_traits>
496 class edit_distance_unbanded_trace_matrix_policy :
501 static_assert(edit_traits::compute_trace_matrix,
502 "This policy assumes that edit_traits::compute_trace_matrix is true.");
510 edit_distance_unbanded_trace_matrix_policy() noexcept = default;
511 edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy const &) noexcept
513 edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy &&) noexcept
515 edit_distance_unbanded_trace_matrix_policy & operator=(edit_distance_unbanded_trace_matrix_policy const &) noexcept
517 edit_distance_unbanded_trace_matrix_policy & operator=(edit_distance_unbanded_trace_matrix_policy &&) noexcept
519 ~edit_distance_unbanded_trace_matrix_policy() noexcept = default;
522 using typename edit_traits::word_type;
523 using typename edit_traits::trace_matrix_type;
524 using typename edit_traits::result_value_type;
530 std::vector<word_type> hp{};
536 trace_matrix_type _trace_matrix{};
551 void trace_matrix_init(
size_t block_count)
553 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
558 hp.resize(block_count, 0u);
559 db.resize(block_count, 0u);
568 trace_matrix_type
const & trace_matrix() const noexcept
571 static_assert(edit_traits::compute_trace_matrix,
"trace_matrix() can only be computed if you specify the " 572 "result type within your alignment config.");
573 return _trace_matrix;
578 alignment_coordinate front_coordinate() const noexcept
580 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
581 static_assert(edit_traits::compute_front_coordinate,
"front_coordinate() can only be computed if you specify " 582 "the result type within your alignment config.");
583 if (!self->is_valid())
584 return self->invalid_coordinate();
586 alignment_coordinate
const back =
self->back_coordinate();
587 return alignment_front_coordinate(trace_matrix(), back);
592 auto alignment() const noexcept
594 using alignment_t = decltype(result_value_type{}.alignment);
596 derived_t
const *
self =
static_cast<derived_t
const *
>(
this);
597 static_assert(edit_traits::compute_sequence_alignment,
"alignment() can only be computed if you specify the " 598 "result type within your alignment config.");
600 if (!self->is_valid())
601 return alignment_t{};
603 return alignment_trace<alignment_t>(
self->database,
606 self->back_coordinate(),
615 template <
typename value_t>
616 class proxy_reference
622 proxy_reference() noexcept = default;
623 proxy_reference(proxy_reference const &) noexcept = default;
624 proxy_reference(proxy_reference &&) noexcept = default;
625 proxy_reference & operator=(proxy_reference const &) noexcept = default;
626 proxy_reference & operator=(proxy_reference &&) noexcept = default;
627 ~proxy_reference() = default;
630 proxy_reference(value_t & t) noexcept
631 : ptr(
std::addressof(t))
634 proxy_reference(value_t &&) =
delete;
637 template <std::ConvertibleTo<value_t> other_value_t>
638 proxy_reference & operator=(other_value_t && u) noexcept
640 get() = std::forward<other_value_t>(u);
646 value_t &
get()
const noexcept
648 assert(ptr !=
nullptr);
653 operator value_t & ()
const noexcept
660 value_t * ptr{
nullptr};
676 typename align_config_t,
677 typename edit_traits>
678 class edit_distance_unbanded :
681 public edit_distance_base<
682 edit_traits::use_max_errors,
683 edit_distance_unbanded_max_errors_policy,
685 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
686 public edit_distance_base<
687 edit_traits::is_global,
688 edit_distance_unbanded_global_policy,
690 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
691 public edit_distance_base<
692 edit_traits::is_semi_global,
693 edit_distance_unbanded_semi_global_policy,
695 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
696 public edit_distance_base<
697 edit_traits::compute_score_matrix,
698 edit_distance_unbanded_score_matrix_policy,
700 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
701 public edit_distance_base<
702 edit_traits::compute_trace_matrix,
703 edit_distance_unbanded_trace_matrix_policy,
705 edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>
709 using typename edit_traits::word_type;
710 using typename edit_traits::score_type;
711 using typename edit_traits::database_type;
712 using typename edit_traits::query_type;
713 using typename edit_traits::align_config_type;
714 using edit_traits::word_size;
717 using typename edit_traits::database_iterator;
718 using typename edit_traits::query_alphabet_type;
719 using typename edit_traits::result_value_type;
720 using edit_traits::use_max_errors;
721 using edit_traits::is_semi_global;
722 using edit_traits::is_global;
723 using edit_traits::compute_score;
724 using edit_traits::compute_back_coordinate;
725 using edit_traits::compute_front_coordinate;
726 using edit_traits::compute_sequence_alignment;
727 using edit_traits::compute_score_matrix;
728 using edit_traits::compute_trace_matrix;
729 using edit_traits::compute_matrix;
730 using typename edit_traits::score_matrix_type;
731 using typename edit_traits::trace_matrix_type;
738 align_config_t config;
741 static constexpr word_type hp0 = is_global ? 1u : 0u;
743 static constexpr word_type hn0 = 0u;
745 static constexpr word_type vp0 = ~word_type{0u};
747 static constexpr word_type vn0 = 0u;
757 word_type score_mask{0u};
771 database_iterator database_it{};
773 database_iterator database_it_end{};
776 struct compute_state_trace_matrix
779 proxy_reference<word_type> db{};
783 struct compute_state : enable_state_t<compute_trace_matrix, compute_state_trace_matrix>
797 proxy_reference<word_type> vp{};
799 proxy_reference<word_type> vn{};
801 word_type carry_d0{};
803 word_type carry_hp{hp0};
805 word_type carry_hn{};
811 if constexpr(!use_max_errors && compute_score_matrix)
812 this->_score_matrix.add_column(vp, vn);
814 if constexpr(!use_max_errors && compute_trace_matrix)
815 this->_trace_matrix.add_column(this->hp, this->db, vp);
817 if constexpr(use_max_errors && compute_matrix)
819 size_t max_rows = this->max_rows(score_mask, this->last_block, _score, this->max_errors);
820 if constexpr(compute_score_matrix)
821 this->_score_matrix.add_column(vp, vn, max_rows);
823 if constexpr(compute_trace_matrix)
824 this->_trace_matrix.add_column(this->hp, this->db, vp, max_rows);
832 edit_distance_unbanded() =
delete;
834 edit_distance_unbanded(edit_distance_unbanded
const &) =
default;
835 edit_distance_unbanded(edit_distance_unbanded &&) =
default;
836 edit_distance_unbanded & operator=(edit_distance_unbanded
const &) =
default;
837 edit_distance_unbanded & operator=(edit_distance_unbanded &&) =
default;
838 ~edit_distance_unbanded() =
default;
846 edit_distance_unbanded(database_t _database,
848 align_config_t _config,
849 edit_traits
const & SEQAN3_DOXYGEN_ONLY(_traits) = edit_traits{}) :
850 database{std::forward<database_t>(_database)},
851 query{std::forward<query_t>(_query)},
852 config{std::forward<align_config_t>(_config)},
857 static constexpr
size_t alphabet_size_ = alphabet_size<query_alphabet_type>;
859 size_t const block_count = (
std::ranges::size(query) - 1u + word_size) / word_size;
860 score_mask = word_type{1u} << ((
std::ranges::size(query) - 1u + word_size) % word_size);
863 if constexpr(use_max_errors)
864 this->max_errors_init(block_count);
866 if constexpr(compute_score_matrix)
867 this->score_matrix_init();
869 if constexpr(compute_trace_matrix)
870 this->trace_matrix_init(block_count);
872 vp.resize(block_count, vp0);
873 vn.resize(block_count, vn0);
874 bit_masks.resize((alphabet_size_ + 1u) * block_count, 0u);
879 size_t const i = block_count *
seqan3::to_rank(query[j]) + j / word_size;
880 bit_masks[i] |= word_type{1u} << (j % word_size);
889 template <
bool with_carry>
890 static void compute_step(compute_state & state) noexcept
893 assert(state.carry_d0 <= 1u);
894 assert(state.carry_hp <= 1u);
895 assert(state.carry_hn <= 1u);
897 x = state.b | state.vn;
898 t = state.vp + (x & state.vp) + state.carry_d0;
900 state.d0 = (t ^ state.vp) | x;
901 state.hn = state.vp & state.d0;
902 state.hp = state.vn | ~(state.vp | state.d0);
904 if constexpr(with_carry)
905 state.carry_d0 = (state.carry_d0 != 0u) ? t <= state.vp : t < state.vp;
907 x = (state.hp << 1u) | state.carry_hp;
908 state.vn = x & state.d0;
909 state.vp = (state.hn << 1u) | ~(x | state.d0) | state.carry_hn;
911 if constexpr(with_carry)
913 state.carry_hp = state.hp >> (word_size - 1u);
914 state.carry_hn = state.hn >> (word_size - 1u);
919 template <
bool with_carry>
920 void compute_kernel(compute_state & state,
size_t const block_offset,
size_t const current_block) noexcept
922 state.vp = proxy_reference<word_type>{this->vp[current_block]};
923 state.vn = proxy_reference<word_type>{this->vn[current_block]};
924 if constexpr(compute_trace_matrix)
926 state.hp = proxy_reference<word_type>{this->hp[current_block]};
927 state.db = proxy_reference<word_type>{this->db[current_block]};
929 state.b = bit_masks[block_offset + current_block];
931 compute_step<with_carry>(state);
932 if constexpr(compute_trace_matrix)
933 state.db = ~(state.b ^ state.d0);
937 void advance_score(word_type P, word_type N, word_type mask) noexcept
939 if ((P & mask) != word_type{0u})
941 else if ((N & mask) != word_type{0u})
946 bool on_hit() noexcept
953 inline bool small_patterns();
956 inline bool large_patterns();
962 if constexpr(use_max_errors && is_global && !compute_matrix)
986 if constexpr(is_global)
987 this->update_best_score();
995 alignment_result<result_value_type> operator()(
size_t const idx)
998 result_value_type res_vt{};
1000 if constexpr (compute_score)
1002 res_vt.score = this->score().value_or(matrix_inf<score_type>);
1005 if constexpr (compute_back_coordinate)
1007 res_vt.back_coordinate = this->back_coordinate();
1010 if constexpr (compute_front_coordinate)
1012 if (this->is_valid())
1013 res_vt.front_coordinate = alignment_front_coordinate(this->trace_matrix(), res_vt.back_coordinate);
1015 res_vt.front_coordinate = this->invalid_coordinate();
1018 if constexpr (compute_sequence_alignment)
1020 if (this->is_valid())
1022 using alignment_t = decltype(res_vt.alignment);
1023 res_vt.alignment = alignment_trace<alignment_t>(database,
1025 this->trace_matrix(),
1026 res_vt.back_coordinate,
1027 res_vt.front_coordinate);
1030 return alignment_result<result_value_type>{std::move(res_vt)};
1034 template <
typename database_t,
typename query_t,
typename align_config_t,
typename traits_t>
1035 bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::small_patterns()
1037 bool abort_computation =
false;
1040 while (database_it != database_it_end)
1042 compute_state state{};
1043 size_t const block_offset =
seqan3::to_rank((query_alphabet_type) *database_it);
1045 compute_kernel<false>(state, block_offset, 0u);
1046 advance_score(state.hp, state.hn, score_mask);
1049 if constexpr(is_semi_global && !use_max_errors)
1050 this->update_best_score();
1053 if constexpr(use_max_errors)
1054 abort_computation = this->update_last_active_cell();
1058 if (abort_computation)
1065 template <
typename database_t,
typename query_t,
typename align_config_t,
typename traits_t>
1066 bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::large_patterns()
1068 bool abort_computation =
false;
1070 while (database_it != database_it_end)
1072 compute_state state{};
1073 size_t const block_offset = vp.size() *
seqan3::to_rank((query_alphabet_type) *database_it);
1075 size_t block_count = vp.size();
1076 if constexpr(use_max_errors)
1077 block_count = this->last_block + 1;
1080 for (
size_t current_block = 0u; current_block < block_count; current_block++)
1081 compute_kernel<true>(state, block_offset, current_block);
1083 advance_score(state.hp, state.hn, score_mask);
1086 if constexpr(is_semi_global && !use_max_errors)
1087 this->update_best_score();
1089 if constexpr(use_max_errors)
1092 bool additional_block = score_mask >> (word_size - 1u);
1093 bool reached_last_block = this->last_block + 1u == vp.size();
1095 if (reached_last_block)
1096 additional_block =
false;
1098 if (additional_block)
1100 size_t const current_block = this->last_block + 1u;
1102 vp[current_block] = vp0;
1103 vn[current_block] = vn0;
1104 compute_kernel<false>(state, block_offset, current_block);
1108 abort_computation = this->update_last_active_cell();
1114 if (abort_computation)
1126 template<
typename database_t,
typename query_t,
typename config_t>
1128 edit_distance_unbanded(database_t && database, query_t && query, config_t config)
1129 -> edit_distance_unbanded<database_t, query_t, config_t>;
1132 template<
typename database_t,
typename query_t,
typename config_t,
typename traits_t>
1133 edit_distance_unbanded(database_t && database, query_t && query, config_t config, traits_t)
1134 -> edit_distance_unbanded<database_t, query_t, config_t, traits_t>;
::ranges::distance distance
Alias for ranges::distance. Returns the number of hops from first to last.
Definition: iterator:321
Specifies the requirements of a Range type that is either a std::ranges::View or an lvalue-reference...
Provides various shortcuts for common std::ranges functions.
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:103
Forwards for seqan3::edit_distance_unbanded related types.
SeqAn specific customisations in the standard namespace.
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:189
Provides seqan3::alignment_result.
Adaptations of concepts from the Ranges TS.
::ranges::begin begin
Alias for ranges::begin. Returns an iterator to the beginning of a range.
Definition: ranges:174
Provides seqan3::detail::edit_distance_score_matrix_full.
Definition: aligned_sequence_concept.hpp:35
Provides seqan3::detail::edit_distance_trace_matrix_full.
Provides seqan3::detail::alignment_coordinate.
Provides seqan3::detail::configuration and utility functions.
Provides algorithms that operate on seqan3::detail::alignment_trace_matrix.
::ranges::end end
Alias for ranges::end. Returns an iterator to the end of a range.
Definition: ranges:179