SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
edit_distance_unbanded.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <algorithm>
16 #include <bitset>
17 #include <utility>
18 
19 #include <range/v3/algorithm/copy.hpp>
20 
29 #include <seqan3/std/ranges>
30 
31 namespace seqan3::detail
32 {
36 template <typename derived_t, typename edit_traits>
37 class edit_distance_unbanded_max_errors_policy :
39  edit_traits
41 {
42 protected:
43  static_assert(edit_traits::use_max_errors, "This policy assumes that edit_traits::use_max_errors is true.");
44 
46  friend derived_t;
47 
51  edit_distance_unbanded_max_errors_policy() noexcept = default;
52  edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy const &) noexcept
53  = default;
54  edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy &&) noexcept
55  = default;
56  edit_distance_unbanded_max_errors_policy & operator=(edit_distance_unbanded_max_errors_policy const &) noexcept
57  = default;
58  edit_distance_unbanded_max_errors_policy & operator=(edit_distance_unbanded_max_errors_policy &&) noexcept
59  = default;
60  ~edit_distance_unbanded_max_errors_policy() noexcept = default;
61 
63  using typename edit_traits::word_type;
64  using typename edit_traits::score_type;
65 
70  score_type max_errors{255};
73  size_t last_block{0u};
75  word_type last_score_mask{};
77 
82  void max_errors_init(size_t block_count) noexcept
84  {
85  derived_t * self = static_cast<derived_t *>(this);
86 
87  max_errors = get<align_cfg::max_error>(self->config).value;
88  assert(max_errors >= score_type{0});
89 
90  if (std::ranges::empty(self->query)) // [[unlikely]]
91  {
92  last_block = 0u;
93  self->score_mask = 0u;
94  last_score_mask = self->score_mask;
95  return;
96  }
97 
98  last_block = block_count - 1u;
99  last_score_mask = self->score_mask;
100 
101  // local_max_errors either stores the maximal number of _score (me.max_errors) or the needle size minus one. It
102  // is used for the mask computation and setting the initial score (the minus one is there because of the Ukkonen
103  // trick).
104  size_t const local_max_errors = std::min<size_t>(max_errors, std::ranges::size(self->query) - 1u);
105  self->score_mask = word_type{1u} << (local_max_errors % self->word_size);
106  last_block = std::min(local_max_errors / self->word_size, last_block);
107  self->_score = local_max_errors + 1u;
108  }
109 
111  bool is_last_active_cell_within_last_row() const noexcept
112  {
113  derived_t const * self = static_cast<derived_t const *>(this);
114  return (self->score_mask == this->last_score_mask) && (this->last_block == self->vp.size() - 1u);
115  }
116 
118  bool prev_last_active_cell() noexcept
119  {
120  derived_t * self = static_cast<derived_t *>(this);
121  self->score_mask >>= 1u;
122  if (self->score_mask != 0u)
123  return true;
124 
125  if constexpr (edit_traits::is_global)
126  {
127  if (last_block == 0u) // [[unlikely]]
128  return false;
129  }
130 
131  last_block--;
132 
133  self->score_mask = word_type{1u} << (edit_traits::word_size - 1u);
134  return true;
135  }
136 
138  void next_last_active_cell() noexcept
139  {
140  derived_t * self = static_cast<derived_t *>(this);
141  self->score_mask <<= 1u;
142  if (self->score_mask)
143  return;
144 
145  self->score_mask = 1u;
146  last_block++;
147  }
148 
152  bool update_last_active_cell() noexcept
153  {
154  derived_t * self = static_cast<derived_t *>(this);
155  // update the last active cell
156  while (!(self->_score <= max_errors))
157  {
158  self->advance_score(self->vn[last_block], self->vp[last_block], self->score_mask);
159  if (!prev_last_active_cell())
160  {
161  // prev_last_active_cell = false can only happen for global alignments
162  assert(edit_traits::is_global);
163  // we abort here if we don't need to compute a matrix, because the continued
164  // computation can't produce an alignment.
165  return !edit_traits::compute_matrix;
166  }
167  }
168 
169  if (is_last_active_cell_within_last_row())
170  {
171  assert(self->_score <= max_errors);
172 
173  if constexpr(edit_traits::is_semi_global)
174  self->update_best_score();
175 
176  return self->on_hit();
177  }
178  else
179  {
180  next_last_active_cell();
181  self->advance_score(self->vp[last_block], self->vn[last_block], self->score_mask);
182  }
183 
184  return false;
185  }
187 
189  static size_t max_rows(word_type const score_mask, unsigned const last_block,
190  score_type const score, score_type const max_errors) noexcept
191  {
192  using score_matrix_type = typename edit_traits::score_matrix_type;
193  return score_matrix_type::max_rows(score_mask,
194  last_block,
195  score,
196  max_errors);
197  }
198 };
199 
203 template <typename derived_t, typename edit_traits>
204 class edit_distance_unbanded_global_policy :
206  edit_traits
208 {
209 protected:
210  static_assert(edit_traits::is_global || edit_traits::is_semi_global,
211  "This policy assumes that edit_traits::is_global or edit_traits::is_semi_global is true.");
212 
214  friend derived_t;
215 
219  edit_distance_unbanded_global_policy() noexcept = default;
220  edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy const &) noexcept
221  = default;
222  edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy &&) noexcept
223  = default;
224  edit_distance_unbanded_global_policy & operator=(edit_distance_unbanded_global_policy const &) noexcept
225  = default;
226  edit_distance_unbanded_global_policy & operator=(edit_distance_unbanded_global_policy &&) noexcept
227  = default;
228  ~edit_distance_unbanded_global_policy() noexcept = default;
229 
232  using typename edit_traits::score_type;
233 
242  score_type _best_score{};
244 
249  void score_init() noexcept
251  {
252  derived_t const * self = static_cast<derived_t const *>(this);
253  _best_score = self->_score;
254  }
255 
257  bool is_valid() const noexcept
258  {
259  [[maybe_unused]] derived_t const * self = static_cast<derived_t const *>(this);
260  // This condition uses the observation that after each computation of a column, _score has either the initial
261  // value of the first row (i.e. the entire column consist of INF's), has the value _score = max_errors + 1
262  // (there exists a cell within the column that has value <= max_errors, but is not on the last row) or _score <=
263  // max_errors (the score of the last active cell is <= max_errors)
264  if constexpr(edit_traits::use_max_errors)
265  return _best_score <= self->max_errors;
266 
267  // When not using max_errors there is always a valid alignment, because the last row will always be updated and
268  // with it the score.
269  return true;
270  }
271 
273  alignment_coordinate invalid_coordinate() const noexcept
274  {
275  derived_t const * self = static_cast<derived_t const *>(this);
276  return {column_index_type{std::ranges::size(self->database)}, row_index_type{std::ranges::size(self->query)}};
277  }
278 
280  void update_best_score() noexcept
281  {
282  derived_t const * self = static_cast<derived_t const *>(this);
283  _best_score = self->_score;
284  }
285 
287  size_t back_coordinate_first() const noexcept
288  {
289  derived_t const * self = static_cast<derived_t const *>(this);
290  return std::ranges::size(self->database);
291  }
293 
294 public:
299  std::optional<score_type> score() const noexcept
302  {
303  derived_t const * self = static_cast<derived_t const *>(this);
304  static_assert(edit_traits::compute_score, "score() can only be computed if you specify the result type within "
305  "your alignment config.");
306  if (!self->is_valid())
307  return std::nullopt;
308 
309  return -_best_score;
310  }
311 
314  alignment_coordinate back_coordinate() const noexcept
315  {
316  derived_t const * self = static_cast<derived_t const *>(this);
317  static_assert(edit_traits::compute_back_coordinate, "back_coordinate() can only be computed if you specify the"
318  "result type within your alignment config.");
319  if (!self->is_valid())
320  return self->invalid_coordinate();
321 
322  column_index_type const first{self->back_coordinate_first()};
323  row_index_type const second{std::ranges::size(self->query)};
324  return {first, second};
325  }
327 };
328 
330 template <typename derived_t, typename edit_traits>
331 class edit_distance_unbanded_semi_global_policy :
332  public edit_distance_unbanded_global_policy<derived_t, edit_traits>
333 {
334 protected:
335  static_assert(edit_traits::is_semi_global, "This policy assumes that edit_traits::is_semi_global is true.");
336 
338  friend derived_t;
339 
343  edit_distance_unbanded_semi_global_policy() noexcept = default;
344  edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy const &) noexcept
345  = default;
346  edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy &&) noexcept
347  = default;
348  edit_distance_unbanded_semi_global_policy & operator=(edit_distance_unbanded_semi_global_policy const &) noexcept
349  = default;
350  edit_distance_unbanded_semi_global_policy & operator=(edit_distance_unbanded_semi_global_policy &&) noexcept
351  = default;
352  ~edit_distance_unbanded_semi_global_policy() noexcept = default;
353 
356  using base_t = edit_distance_unbanded_global_policy<derived_t, edit_traits>;
358  using database_iterator = typename edit_traits::database_iterator;
359  using base_t::_best_score;
361 
373  database_iterator _best_score_col{};
375 
380  void score_init() noexcept
382  {
383  derived_t const * self = static_cast<derived_t const *>(this);
384  base_t::score_init();
385  _best_score_col = self->database_it_end;
386  }
387 
389  void update_best_score() noexcept
390  {
391  derived_t const * self = static_cast<derived_t const *>(this);
392  // we have to make sure that update_best_score is only called after a score update within the last row.
393  if constexpr(edit_traits::use_max_errors)
394  {
395  assert(std::ranges::empty(self->query) || self->is_last_active_cell_within_last_row());
396  }
397 
398  _best_score_col = (self->_score <= _best_score) ? self->database_it : _best_score_col;
399  _best_score = (self->_score <= _best_score) ? self->_score : _best_score;
400  }
401 
403  size_t back_coordinate_first() const noexcept
404  {
405  derived_t const * self = static_cast<derived_t const *>(this);
406  // offset == 0u is a special case if database sequence is empty, because in this case the best column is zero.
407  size_t offset = std::ranges::empty(self->database) ? 0u : 1u;
408  return std::ranges::distance(std::ranges::begin(self->database), _best_score_col) + offset;
409  }
411 };
412 
416 template <typename derived_t, typename edit_traits>
417 class edit_distance_unbanded_score_matrix_policy :
419  edit_traits
421 {
422 protected:
423  static_assert(edit_traits::compute_score_matrix,
424  "This policy assumes that edit_traits::compute_score_matrix is true.");
425 
427  friend derived_t;
428 
432  edit_distance_unbanded_score_matrix_policy() noexcept = default;
433  edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy const &) noexcept
434  = default;
435  edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy &&) noexcept
436  = default;
437  edit_distance_unbanded_score_matrix_policy & operator=(edit_distance_unbanded_score_matrix_policy const &) noexcept
438  = default;
439  edit_distance_unbanded_score_matrix_policy & operator=(edit_distance_unbanded_score_matrix_policy &&) noexcept
440  = default;
441  ~edit_distance_unbanded_score_matrix_policy() noexcept = default;
442 
444  using typename edit_traits::score_matrix_type;
445 
450  score_matrix_type _score_matrix{};
453 
466  void score_matrix_init()
467  {
468  derived_t const * self = static_cast<derived_t const *>(this);
469 
470  _score_matrix = score_matrix_type{std::ranges::size(self->query) + 1u};
471  _score_matrix.reserve(std::ranges::size(self->database) + 1u);
472  }
474 
475 public:
483  score_matrix_type const & score_matrix() const noexcept
484  {
485  static_assert(edit_traits::compute_score_matrix, "score_matrix() can only be computed if you specify the "
486  "result type within your alignment config.");
487  return _score_matrix;
488  }
490 };
491 
495 template <typename derived_t, typename edit_traits>
496 class edit_distance_unbanded_trace_matrix_policy :
498  edit_traits
500 {
501 protected:
502  static_assert(edit_traits::compute_trace_matrix,
503  "This policy assumes that edit_traits::compute_trace_matrix is true.");
504 
506  friend derived_t;
507 
511  edit_distance_unbanded_trace_matrix_policy() noexcept = default;
512  edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy const &) noexcept
513  = default;
514  edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy &&) noexcept
515  = default;
516  edit_distance_unbanded_trace_matrix_policy & operator=(edit_distance_unbanded_trace_matrix_policy const &) noexcept
517  = default;
518  edit_distance_unbanded_trace_matrix_policy & operator=(edit_distance_unbanded_trace_matrix_policy &&) noexcept
519  = default;
520  ~edit_distance_unbanded_trace_matrix_policy() noexcept = default;
521 
523  using typename edit_traits::word_type;
524  using typename edit_traits::trace_matrix_type;
525  using typename edit_traits::result_value_type;
526 
531  std::vector<word_type> hp{};
535 
537  trace_matrix_type _trace_matrix{};
539 
552  void trace_matrix_init(size_t block_count)
553  {
554  derived_t const * self = static_cast<derived_t const *>(this);
555 
556  _trace_matrix = trace_matrix_type{std::ranges::size(self->query) + 1u};
557  _trace_matrix.reserve(std::ranges::size(self->database) + 1u);
558 
559  hp.resize(block_count, 0u);
560  db.resize(block_count, 0u);
561  }
563 
564 public:
569  trace_matrix_type const & trace_matrix() const noexcept
571  {
572  static_assert(edit_traits::compute_trace_matrix, "trace_matrix() can only be computed if you specify the "
573  "result type within your alignment config.");
574  return _trace_matrix;
575  }
576 
579  alignment_coordinate front_coordinate() const noexcept
580  {
581  derived_t const * self = static_cast<derived_t const *>(this);
582  static_assert(edit_traits::compute_front_coordinate, "front_coordinate() can only be computed if you specify "
583  "the result type within your alignment config.");
584  if (!self->is_valid())
585  return self->invalid_coordinate();
586 
587  alignment_coordinate const back = self->back_coordinate();
588  return alignment_front_coordinate(trace_matrix(), back);
589  }
590 
593  auto alignment() const noexcept
594  {
595  using alignment_t = decltype(result_value_type{}.alignment);
596 
597  derived_t const * self = static_cast<derived_t const *>(this);
598  static_assert(edit_traits::compute_sequence_alignment, "alignment() can only be computed if you specify the "
599  "result type within your alignment config.");
600 
601  if (!self->is_valid())
602  return alignment_t{};
603 
604  return alignment_trace<alignment_t>(self->database,
605  self->query,
606  trace_matrix(),
607  self->back_coordinate(),
608  front_coordinate());
609  }
611 };
612 
616 template <typename value_t>
617 class proxy_reference
618 {
619 public:
623  proxy_reference() noexcept = default;
624  proxy_reference(proxy_reference const &) noexcept = default;
625  proxy_reference(proxy_reference &&) noexcept = default;
626  proxy_reference & operator=(proxy_reference const &) noexcept = default;
627  proxy_reference & operator=(proxy_reference &&) noexcept = default;
628  ~proxy_reference() = default;
629 
631  proxy_reference(value_t & t) noexcept
632  : ptr(std::addressof(t))
633  {}
634 
635  proxy_reference(value_t &&) = delete;
636 
638  template <typename other_value_t>
642  proxy_reference & operator=(other_value_t && u) noexcept
643  {
644  get() = std::forward<other_value_t>(u);
645  return *this;
646  }
648 
650  value_t & get() const noexcept
651  {
652  assert(ptr != nullptr);
653  return *ptr;
654  }
655 
657  operator value_t & () const noexcept
658  {
659  return get();
660  }
661 
662 private:
664  value_t * ptr{nullptr};
665 };
666 
678 template <std::ranges::viewable_range database_t,
679  std::ranges::viewable_range query_t,
680  typename align_config_t,
681  typename edit_traits>
682 class edit_distance_unbanded :
684 // Hide this section in doxygen, because it messes up the inheritance.
685  public edit_distance_base<
686  edit_traits::use_max_errors,
687  edit_distance_unbanded_max_errors_policy,
688  edit_traits,
689  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
690  public edit_distance_base<
691  edit_traits::is_global,
692  edit_distance_unbanded_global_policy,
693  edit_traits,
694  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
695  public edit_distance_base<
696  edit_traits::is_semi_global,
697  edit_distance_unbanded_semi_global_policy,
698  edit_traits,
699  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
700  public edit_distance_base<
701  edit_traits::compute_score_matrix,
702  edit_distance_unbanded_score_matrix_policy,
703  edit_traits,
704  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
705  public edit_distance_base<
706  edit_traits::compute_trace_matrix,
707  edit_distance_unbanded_trace_matrix_policy,
708  edit_traits,
709  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>
711 {
712 public:
713  using typename edit_traits::word_type;
714  using typename edit_traits::score_type;
715  using typename edit_traits::database_type;
716  using typename edit_traits::query_type;
717  using typename edit_traits::align_config_type;
718  using edit_traits::word_size;
719 
720 private:
722  template <typename other_derived_t, typename other_edit_traits>
723  friend class edit_distance_unbanded_max_errors_policy;
725  template <typename other_derived_t, typename other_edit_traits>
726  friend class edit_distance_unbanded_global_policy;
728  template <typename other_derived_t, typename other_edit_traits>
729  friend class edit_distance_unbanded_semi_global_policy;
731  template <typename other_derived_t, typename other_edit_traits>
732  friend class edit_distance_unbanded_score_matrix_policy;
734  template <typename other_derived_t, typename other_edit_traits>
735  friend class edit_distance_unbanded_trace_matrix_policy;
736 
737  using typename edit_traits::database_iterator;
738  using typename edit_traits::query_alphabet_type;
739  using typename edit_traits::result_value_type;
740  using edit_traits::use_max_errors;
741  using edit_traits::is_semi_global;
742  using edit_traits::is_global;
743  using edit_traits::compute_score;
744  using edit_traits::compute_back_coordinate;
745  using edit_traits::compute_front_coordinate;
746  using edit_traits::compute_sequence_alignment;
747  using edit_traits::compute_score_matrix;
748  using edit_traits::compute_trace_matrix;
749  using edit_traits::compute_matrix;
750  using typename edit_traits::score_matrix_type;
751  using typename edit_traits::trace_matrix_type;
752 
754  database_t database;
756  query_t query;
758  align_config_t config;
759 
761  static constexpr word_type hp0 = is_global ? 1u : 0u;
763  static constexpr word_type hn0 = 0u;
765  static constexpr word_type vp0 = ~word_type{0u};
767  static constexpr word_type vn0 = 0u;
768 
770  score_type _score{};
777  word_type score_mask{0u};
788  std::vector<word_type> bit_masks{};
789 
791  database_iterator database_it{};
793  database_iterator database_it_end{};
794 
796  struct compute_state_trace_matrix
797  {
799  proxy_reference<word_type> db{};
800  };
801 
803  struct compute_state : enable_state_t<compute_trace_matrix, compute_state_trace_matrix>
804  {
807 
809  word_type b{};
811  word_type d0{};
813  hp_type hp{};
815  word_type hn{};
817  proxy_reference<word_type> vp{};
819  proxy_reference<word_type> vn{};
821  word_type carry_d0{};
823  word_type carry_hp{hp0};
825  word_type carry_hn{};
826  };
827 
829  void add_state()
830  {
831  if constexpr(!use_max_errors && compute_score_matrix)
832  this->_score_matrix.add_column(vp, vn);
833 
834  if constexpr(!use_max_errors && compute_trace_matrix)
835  this->_trace_matrix.add_column(this->hp, this->db, vp);
836 
837  if constexpr(use_max_errors && compute_matrix)
838  {
839  size_t max_rows = this->max_rows(score_mask, this->last_block, _score, this->max_errors);
840  if constexpr(compute_score_matrix)
841  this->_score_matrix.add_column(vp, vn, max_rows);
842 
843  if constexpr(compute_trace_matrix)
844  this->_trace_matrix.add_column(this->hp, this->db, vp, max_rows);
845  }
846  }
847 
848 public:
852  edit_distance_unbanded() = delete;
854  edit_distance_unbanded(edit_distance_unbanded const &) = default;
855  edit_distance_unbanded(edit_distance_unbanded &&) = default;
856  edit_distance_unbanded & operator=(edit_distance_unbanded const &) = default;
857  edit_distance_unbanded & operator=(edit_distance_unbanded &&) = default;
858  ~edit_distance_unbanded() = default;
859 
866  edit_distance_unbanded(database_t _database,
867  query_t _query,
868  align_config_t _config,
869  edit_traits const & SEQAN3_DOXYGEN_ONLY(_traits) = edit_traits{}) :
870  database{std::forward<database_t>(_database)},
871  query{std::forward<query_t>(_query)},
872  config{std::forward<align_config_t>(_config)},
873  _score{static_cast<score_type>(std::ranges::size(query))},
874  database_it{ranges::begin(database)},
875  database_it_end{ranges::end(database)}
876  {
877  static constexpr size_t alphabet_size_ = alphabet_size<query_alphabet_type>;
878 
879  size_t const block_count = (std::ranges::size(query) - 1u + word_size) / word_size;
880  score_mask = word_type{1u} << ((std::ranges::size(query) - 1u + word_size) % word_size);
881 
882  this->score_init();
883  if constexpr(use_max_errors)
884  this->max_errors_init(block_count);
885 
886  if constexpr(compute_score_matrix)
887  this->score_matrix_init();
888 
889  if constexpr(compute_trace_matrix)
890  this->trace_matrix_init(block_count);
891 
892  vp.resize(block_count, vp0);
893  vn.resize(block_count, vn0);
894  bit_masks.resize((alphabet_size_ + 1u) * block_count, 0u);
895 
896  // encoding the letters as bit-vectors
897  for (size_t j = 0u; j < std::ranges::size(query); j++)
898  {
899  size_t const i = block_count * seqan3::to_rank(query[j]) + j / word_size;
900  bit_masks[i] |= word_type{1u} << (j % word_size);
901  }
902 
903  add_state();
904  }
906 
907 private:
909  template <bool with_carry>
910  static void compute_step(compute_state & state) noexcept
911  {
912  word_type x, t;
913  assert(state.carry_d0 <= 1u);
914  assert(state.carry_hp <= 1u);
915  assert(state.carry_hn <= 1u);
916 
917  x = state.b | state.vn;
918  t = state.vp + (x & state.vp) + state.carry_d0;
919 
920  state.d0 = (t ^ state.vp) | x;
921  state.hn = state.vp & state.d0;
922  state.hp = state.vn | ~(state.vp | state.d0);
923 
924  if constexpr(with_carry)
925  state.carry_d0 = (state.carry_d0 != 0u) ? t <= state.vp : t < state.vp;
926 
927  x = (state.hp << 1u) | state.carry_hp;
928  state.vn = x & state.d0;
929  state.vp = (state.hn << 1u) | ~(x | state.d0) | state.carry_hn;
930 
931  if constexpr(with_carry)
932  {
933  state.carry_hp = state.hp >> (word_size - 1u);
934  state.carry_hn = state.hn >> (word_size - 1u);
935  }
936  }
937 
939  template <bool with_carry>
940  void compute_kernel(compute_state & state, size_t const block_offset, size_t const current_block) noexcept
941  {
942  state.vp = proxy_reference<word_type>{this->vp[current_block]};
943  state.vn = proxy_reference<word_type>{this->vn[current_block]};
944  if constexpr(compute_trace_matrix)
945  {
946  state.hp = proxy_reference<word_type>{this->hp[current_block]};
947  state.db = proxy_reference<word_type>{this->db[current_block]};
948  }
949  state.b = bit_masks[block_offset + current_block];
950 
951  compute_step<with_carry>(state);
952  if constexpr(compute_trace_matrix)
953  state.db = ~(state.b ^ state.d0);
954  }
955 
957  void advance_score(word_type P, word_type N, word_type mask) noexcept
958  {
959  if ((P & mask) != word_type{0u})
960  _score++;
961  else if ((N & mask) != word_type{0u})
962  _score--;
963  }
964 
966  bool on_hit() noexcept
967  {
968  // TODO: call external on_hit functor
969  return false;
970  }
971 
973  inline bool small_patterns();
974 
976  inline bool large_patterns();
977 
979  inline void compute_empty_query_sequence()
980  {
981  assert(std::ranges::empty(query));
982 
983  bool abort_computation = false;
984 
985  for (; database_it != database_it_end; ++database_it)
986  {
987  if constexpr(is_global)
988  ++_score;
989  else // is_semi_global
990  this->update_best_score();
991 
992  // call on_hit
993  if constexpr(use_max_errors)
994  abort_computation = on_hit();
995 
996  this->add_state();
997  if (abort_computation)
998  break;
999  }
1000  }
1001 
1003  void compute()
1004  {
1005  // limit search width for prefix search (if no matrix needs to be computed)
1006  if constexpr(use_max_errors && is_global && !compute_matrix)
1007  {
1008  // Note: For global alignments we know that the database can only be max_length long to have a score less
1009  // than or equal max_errors in the last cell.
1010  //
1011  // The following matrix shows a minimal value for each entry (because a diagonal must be +0 or +1, each
1012  // diagonal is at least the value of the initial value in the first row)
1013  // 0 1 2 3 4 5 6...
1014  // 1 0 1 2 3 4 5...
1015  // ...
1016  // m ... 3 2 1 0 1 2 3 4 5
1017  // Thus, after |query| + max_errors entries the score will always be higher than max_errors.
1018  size_t const max_length = std::ranges::size(query) + this->max_errors + 1u;
1019  size_t const haystack_length = std::min(std::ranges::size(database), max_length);
1020  database_it_end -= std::ranges::size(database) - haystack_length;
1021  }
1022 
1023  // distinguish between the version for needles not longer than
1024  // one machine word and the version for longer needles
1025  // A special cases is if the second sequence is empty (vp.size() == 0u).
1026  if (vp.size() == 0u) // [[unlikely]]
1027  compute_empty_query_sequence();
1028  else if (vp.size() == 1u)
1029  small_patterns();
1030  else
1031  large_patterns();
1032 
1033  if constexpr(is_global)
1034  this->update_best_score();
1035  }
1036 
1037 public:
1042  alignment_result<result_value_type> operator()(size_t const idx)
1043  {
1044  compute();
1045  result_value_type res_vt{};
1046  res_vt.id = idx;
1047  if constexpr (compute_score)
1048  {
1049  res_vt.score = this->score().value_or(matrix_inf<score_type>);
1050  }
1051 
1052  if constexpr (compute_back_coordinate)
1053  {
1054  res_vt.back_coordinate = this->back_coordinate();
1055  }
1056 
1057  if constexpr (compute_front_coordinate)
1058  {
1059  if (this->is_valid())
1060  res_vt.front_coordinate = alignment_front_coordinate(this->trace_matrix(), res_vt.back_coordinate);
1061  else
1062  res_vt.front_coordinate = this->invalid_coordinate();
1063  }
1064 
1065  if constexpr (compute_sequence_alignment)
1066  {
1067  if (this->is_valid())
1068  {
1069  using alignment_t = decltype(res_vt.alignment);
1070  res_vt.alignment = alignment_trace<alignment_t>(database,
1071  query,
1072  this->trace_matrix(),
1073  res_vt.back_coordinate,
1074  res_vt.front_coordinate);
1075  }
1076  }
1077  return alignment_result<result_value_type>{std::move(res_vt)};
1078  }
1079 };
1080 
1081 template <typename database_t, typename query_t, typename align_config_t, typename traits_t>
1082 bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::small_patterns()
1083 {
1084  bool abort_computation = false;
1085 
1086  // computing the blocks
1087  while (database_it != database_it_end)
1088  {
1089  compute_state state{};
1090  size_t const block_offset = seqan3::to_rank((query_alphabet_type) *database_it);
1091 
1092  compute_kernel<false>(state, block_offset, 0u);
1093  advance_score(state.hp, state.hn, score_mask);
1094 
1095  // semi-global without max_errors guarantees that the score stays within the last row
1096  if constexpr(is_semi_global && !use_max_errors)
1097  this->update_best_score();
1098 
1099  // updating the last active cell
1100  if constexpr(use_max_errors)
1101  abort_computation = this->update_last_active_cell();
1102 
1103  add_state();
1104  ++database_it;
1105  if (abort_computation)
1106  return true;
1107  }
1108 
1109  return false;
1110 }
1111 
1112 template <typename database_t, typename query_t, typename align_config_t, typename traits_t>
1113 bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::large_patterns()
1114 {
1115  bool abort_computation = false;
1116 
1117  while (database_it != database_it_end)
1118  {
1119  compute_state state{};
1120  size_t const block_offset = vp.size() * seqan3::to_rank((query_alphabet_type) *database_it);
1121 
1122  size_t block_count = vp.size();
1123  if constexpr(use_max_errors)
1124  block_count = this->last_block + 1;
1125 
1126  // compute each block in the current column; carries between blocks will be propagated.
1127  for (size_t current_block = 0u; current_block < block_count; current_block++)
1128  compute_kernel<true>(state, block_offset, current_block);
1129 
1130  advance_score(state.hp, state.hn, score_mask);
1131 
1132  // semi-global without max_errors guarantees that the score stays within the last row
1133  if constexpr(is_semi_global && !use_max_errors)
1134  this->update_best_score();
1135 
1136  if constexpr(use_max_errors)
1137  {
1138  // if the last active cell reached the end within the current block we have to compute the next block.
1139  bool additional_block = score_mask >> (word_size - 1u);
1140  bool reached_last_block = this->last_block + 1u == vp.size();
1141  // If there is no next block we skip the computation.
1142  if (reached_last_block)
1143  additional_block = false;
1144 
1145  if (additional_block)
1146  {
1147  size_t const current_block = this->last_block + 1u;
1148  // this might not be necessary, but carry_d0 = 1u might have an influence on the result of vn and vp.
1149  vp[current_block] = vp0;
1150  vn[current_block] = vn0;
1151  compute_kernel<false>(state, block_offset, current_block);
1152  }
1153 
1154  // updating the last active cell
1155  abort_computation = this->update_last_active_cell();
1156  }
1157 
1158  add_state();
1159  ++database_it;
1160 
1161  if (abort_computation)
1162  return true;
1163  }
1164 
1165  return false;
1166 }
1167 
1173 template <typename database_t, typename query_t, typename config_t>
1175 edit_distance_unbanded(database_t && database, query_t && query, config_t config)
1176  -> edit_distance_unbanded<database_t, query_t, config_t>;
1177 
1179 template <typename database_t, typename query_t, typename config_t, typename traits_t>
1180 edit_distance_unbanded(database_t && database, query_t && query, config_t config, traits_t)
1181  -> edit_distance_unbanded<database_t, query_t, config_t, traits_t>;
1183 
1184 } // namespace seqan3::detail
bitset
shortcuts.hpp
Provides various shortcuts for common std::ranges functions.
utility
seqan3::field::offset
Sequence (SEQ) relative start position (0-based), unsigned value.
std::vector
configuration.hpp
Provides seqan3::detail::configuration and utility functions.
seqan3::views::move
const auto move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
seqan3::to_rank
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:142
alignment_trace_algorithms.hpp
Provides algorithms that operate on seqan3::detail::alignment_trace_matrix.
edit_distance_score_matrix_full.hpp
Provides seqan3::detail::edit_distance_score_matrix_full.
convertible_to
The concept std::convertible_to<From, To> specifies that an expression of the type and value category...
alignment_result.hpp
Provides seqan3::alignment_result.
seqan3::pack_traits::size
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
std::min
T min(T... args)
seqan3::pack_traits::back
typename decltype((std::type_identity< pack_t >{},...))::type back
Return the last type from the type pack.
Definition: traits.hpp:262
ranges
Adaptations of concepts from the Ranges TS.
edit_distance_fwd.hpp
Forwards for seqan3::edit_distance_unbanded related types.
std
SeqAn specific customisations in the standard namespace.
std::optional
alignment_coordinate.hpp
Provides seqan3::detail::alignment_coordinate.
std::conditional_t
seqan3::get
constexpr const auto & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:576
edit_distance_trace_matrix_full.hpp
Provides seqan3::detail::edit_distance_trace_matrix_full.
seqan3::field::alignment
The (pairwise) alignment stored in an seqan3::alignment object.