SeqAn3  3.0.0
The Modern C++ library for sequence analysis.
edit_distance_unbanded.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <algorithm>
16 #include <bitset>
17 #include <utility>
18 
19 #include <range/v3/algorithm/copy.hpp>
20 
29 #include <seqan3/std/ranges>
30 
31 namespace seqan3::detail
32 {
36 template <typename derived_t, typename edit_traits>
37 class edit_distance_unbanded_max_errors_policy :
39  edit_traits
41 {
42  static_assert(edit_traits::use_max_errors, "This policy assumes that edit_traits::use_max_errors is true.");
43 
45  friend derived_t;
46 
50  edit_distance_unbanded_max_errors_policy() noexcept = default;
51  edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy const &) noexcept
52  = default;
53  edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy &&) noexcept
54  = default;
55  edit_distance_unbanded_max_errors_policy & operator=(edit_distance_unbanded_max_errors_policy const &) noexcept
56  = default;
57  edit_distance_unbanded_max_errors_policy & operator=(edit_distance_unbanded_max_errors_policy &&) noexcept
58  = default;
59  ~edit_distance_unbanded_max_errors_policy() noexcept = default;
60 
62  using typename edit_traits::word_type;
63  using typename edit_traits::score_type;
64 
69  score_type max_errors{255};
72  size_t last_block{0u};
74  word_type last_score_mask{};
76 
81  void max_errors_init(size_t block_count) noexcept
83  {
84  derived_t * self = static_cast<derived_t *>(this);
85 
86  max_errors = get<align_cfg::max_error>(self->config).value;
87  assert(max_errors >= score_type{0});
88 
89  last_block = block_count - 1u;
90  last_score_mask = self->score_mask;
91 
92  // local_max_errors either stores the maximal number of _score (me.max_errors) or the needle size minus one. It
93  // is used for the mask computation and setting the initial score (the minus one is there because of the Ukkonen
94  // trick).
95  size_t const local_max_errors = std::min<size_t>(max_errors, std::ranges::size(self->query) - 1u);
96  self->score_mask = word_type{1u} << (local_max_errors % self->word_size);
97  last_block = std::min(local_max_errors / self->word_size, last_block);
98  self->_score = local_max_errors + 1u;
99  }
100 
102  bool is_last_active_cell_within_last_row() const noexcept
103  {
104  derived_t const * self = static_cast<derived_t const *>(this);
105  return (self->score_mask == this->last_score_mask) && (this->last_block == self->vp.size() - 1u);
106  }
107 
109  bool prev_last_active_cell() noexcept
110  {
111  derived_t * self = static_cast<derived_t *>(this);
112  self->score_mask >>= 1u;
113  if (self->score_mask != 0u)
114  return true;
115 
116  if constexpr (edit_traits::is_global)
117  {
118  if (last_block == 0u) // [[unlikely]]
119  return false;
120  }
121 
122  last_block--;
123 
124  self->score_mask = word_type{1u} << (edit_traits::word_size - 1u);
125  return true;
126  }
127 
129  void next_last_active_cell() noexcept
130  {
131  derived_t * self = static_cast<derived_t *>(this);
132  self->score_mask <<= 1u;
133  if (self->score_mask)
134  return;
135 
136  self->score_mask = 1u;
137  last_block++;
138  }
139 
143  bool update_last_active_cell() noexcept
144  {
145  derived_t * self = static_cast<derived_t *>(this);
146  // update the last active cell
147  while (!(self->_score <= max_errors))
148  {
149  self->advance_score(self->vn[last_block], self->vp[last_block], self->score_mask);
150  if (!prev_last_active_cell())
151  {
152  // prev_last_active_cell = false can only happen for global alignments
153  assert(edit_traits::is_global);
154  // we abort here if we don't need to compute a matrix, because the continued
155  // computation can't produce an alignment.
156  return !edit_traits::compute_matrix;
157  }
158  }
159 
160  if (is_last_active_cell_within_last_row())
161  {
162  assert(self->_score <= max_errors);
163 
164  if constexpr(edit_traits::is_semi_global)
165  self->update_best_score();
166 
167  return self->on_hit();
168  }
169  else
170  {
171  next_last_active_cell();
172  self->advance_score(self->vp[last_block], self->vn[last_block], self->score_mask);
173  }
174 
175  return false;
176  }
178 
180  static size_t max_rows(word_type const score_mask, unsigned const last_block,
181  score_type const score, score_type const max_errors) noexcept
182  {
183  using score_matrix_type = typename edit_traits::score_matrix_type;
184  return score_matrix_type::max_rows(score_mask,
185  last_block,
186  score,
187  max_errors);
188  }
189 };
190 
194 template <typename derived_t, typename edit_traits>
195 class edit_distance_unbanded_global_policy :
197  edit_traits
199 {
200  static_assert(edit_traits::is_global || edit_traits::is_semi_global,
201  "This policy assumes that edit_traits::is_global or edit_traits::is_semi_global is true.");
202 
204  friend derived_t;
205 
209  edit_distance_unbanded_global_policy() noexcept = default;
210  edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy const &) noexcept
211  = default;
212  edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy &&) noexcept
213  = default;
214  edit_distance_unbanded_global_policy & operator=(edit_distance_unbanded_global_policy const &) noexcept
215  = default;
216  edit_distance_unbanded_global_policy & operator=(edit_distance_unbanded_global_policy &&) noexcept
217  = default;
218  ~edit_distance_unbanded_global_policy() noexcept = default;
219 
222  using typename edit_traits::score_type;
223 
232  score_type _best_score{};
234 
239  void score_init() noexcept
241  {
242  derived_t const * self = static_cast<derived_t const *>(this);
243  _best_score = self->_score;
244  }
245 
247  bool is_valid() const noexcept
248  {
249  [[maybe_unused]] derived_t const * self = static_cast<derived_t const *>(this);
250  // This condition uses the observation that after each computation of a column, _score has either the initial
251  // value of the first row (i.e. the entire column consist of INF's), has the value _score = max_errors + 1
252  // (there exists a cell within the column that has value <= max_errors, but is not on the last row) or _score <=
253  // max_errors (the score of the last active cell is <= max_errors)
254  if constexpr(edit_traits::use_max_errors)
255  return _best_score <= self->max_errors;
256 
257  // When not using max_errors there is always a valid alignment, because the last row will always be updated and
258  // with it the score.
259  return true;
260  }
261 
263  alignment_coordinate invalid_coordinate() const noexcept
264  {
265  derived_t const * self = static_cast<derived_t const *>(this);
266  return {column_index_type{std::ranges::size(self->database)}, row_index_type{std::ranges::size(self->query)}};
267  }
268 
270  void update_best_score() noexcept
271  {
272  derived_t const * self = static_cast<derived_t const *>(this);
273  _best_score = self->_score;
274  }
275 
277  size_t best_score_column() const noexcept
278  {
279  derived_t const * self = static_cast<derived_t const *>(this);
280  return std::ranges::size(self->database) - 1u;
281  }
283 
284 public:
289  std::optional<score_type> score() const noexcept
292  {
293  derived_t const * self = static_cast<derived_t const *>(this);
294  static_assert(edit_traits::compute_score, "score() can only be computed if you specify the result type within "
295  "your alignment config.");
296  if (!self->is_valid())
297  return std::nullopt;
298 
299  return -_best_score;
300  }
301 
304  alignment_coordinate back_coordinate() const noexcept
305  {
306  derived_t const * self = static_cast<derived_t const *>(this);
307  static_assert(edit_traits::compute_back_coordinate, "back_coordinate() can only be computed if you specify the"
308  "result type within your alignment config.");
309  if (!self->is_valid())
310  return self->invalid_coordinate();
311 
312  size_t col = self->best_score_column();
313  return {column_index_type{col}, row_index_type{std::ranges::size(self->query) - 1u}};
314  }
316 };
317 
319 template <typename derived_t, typename edit_traits>
320 class edit_distance_unbanded_semi_global_policy :
321  public edit_distance_unbanded_global_policy<derived_t, edit_traits>
322 {
323  static_assert(edit_traits::is_semi_global, "This policy assumes that edit_traits::is_semi_global is true.");
324 
326  friend derived_t;
327 
331  edit_distance_unbanded_semi_global_policy() noexcept = default;
332  edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy const &) noexcept
333  = default;
334  edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy &&) noexcept
335  = default;
336  edit_distance_unbanded_semi_global_policy & operator=(edit_distance_unbanded_semi_global_policy const &) noexcept
337  = default;
338  edit_distance_unbanded_semi_global_policy & operator=(edit_distance_unbanded_semi_global_policy &&) noexcept
339  = default;
340  ~edit_distance_unbanded_semi_global_policy() noexcept = default;
341 
344  using base_t = edit_distance_unbanded_global_policy<derived_t, edit_traits>;
346  using typename base_t::database_iterator;
347  using base_t::_best_score;
349 
361  database_iterator _best_score_col{};
363 
368  void score_init() noexcept
370  {
371  derived_t const * self = static_cast<derived_t const *>(this);
372  base_t::score_init();
373  _best_score_col = self->database_it_end;
374  }
375 
377  bool is_valid() const noexcept
378  {
379  [[maybe_unused]] derived_t const * self = static_cast<derived_t const *>(this);
380  // This condition is obvious, because _best_score_col will only be set to a new position if the last active cell
381  // is within the last row AND score <= max_errors. And _best_score_col can't reach database_it_end again.
382  if constexpr(edit_traits::use_max_errors)
383  return _best_score_col != self->database_it_end;
384 
385  // When not using max_errors there is always a valid alignment, because the last row will always be updated and
386  // with it the score.
387  return true;
388  }
389 
391  void update_best_score() noexcept
392  {
393  derived_t const * self = static_cast<derived_t const *>(this);
394  // we have to make sure that update_best_score is only called after a score update within the
395  // last row.
396  if constexpr(edit_traits::use_max_errors)
397  {
398  assert(self->is_last_active_cell_within_last_row());
399  }
400 
401  _best_score_col = (self->_score <= _best_score) ? self->database_it : _best_score_col;
402  _best_score = (self->_score <= _best_score) ? self->_score : _best_score;
403  }
404 
406  size_t best_score_column() const noexcept
407  {
408  derived_t const * self = static_cast<derived_t const *>(this);
409  return std::ranges::distance(std::ranges::begin(self->database), _best_score_col);
410  }
412 };
413 
417 template <typename derived_t, typename edit_traits>
418 class edit_distance_unbanded_score_matrix_policy :
420  edit_traits
422 {
423  static_assert(edit_traits::compute_score_matrix,
424  "This policy assumes that edit_traits::compute_score_matrix is true.");
425 
427  friend derived_t;
428 
432  edit_distance_unbanded_score_matrix_policy() noexcept = default;
433  edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy const &) noexcept
434  = default;
435  edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy &&) noexcept
436  = default;
437  edit_distance_unbanded_score_matrix_policy & operator=(edit_distance_unbanded_score_matrix_policy const &) noexcept
438  = default;
439  edit_distance_unbanded_score_matrix_policy & operator=(edit_distance_unbanded_score_matrix_policy &&) noexcept
440  = default;
441  ~edit_distance_unbanded_score_matrix_policy() noexcept = default;
442 
444  using typename edit_traits::score_matrix_type;
445 
450  score_matrix_type _score_matrix{};
453 
466  void score_matrix_init()
467  {
468  derived_t const * self = static_cast<derived_t const *>(this);
469 
470  _score_matrix = score_matrix_type{std::ranges::size(self->query) + 1u};
471  _score_matrix.reserve(std::ranges::size(self->database) + 1u);
472  }
474 
475 public:
483  score_matrix_type const & score_matrix() const noexcept
484  {
485  static_assert(edit_traits::compute_score_matrix, "score_matrix() can only be computed if you specify the "
486  "result type within your alignment config.");
487  return _score_matrix;
488  }
490 };
491 
495 template <typename derived_t, typename edit_traits>
496 class edit_distance_unbanded_trace_matrix_policy :
498  edit_traits
500 {
501  static_assert(edit_traits::compute_trace_matrix,
502  "This policy assumes that edit_traits::compute_trace_matrix is true.");
503 
505  friend derived_t;
506 
510  edit_distance_unbanded_trace_matrix_policy() noexcept = default;
511  edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy const &) noexcept
512  = default;
513  edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy &&) noexcept
514  = default;
515  edit_distance_unbanded_trace_matrix_policy & operator=(edit_distance_unbanded_trace_matrix_policy const &) noexcept
516  = default;
517  edit_distance_unbanded_trace_matrix_policy & operator=(edit_distance_unbanded_trace_matrix_policy &&) noexcept
518  = default;
519  ~edit_distance_unbanded_trace_matrix_policy() noexcept = default;
520 
522  using typename edit_traits::word_type;
523  using typename edit_traits::trace_matrix_type;
524  using typename edit_traits::result_value_type;
525 
530  std::vector<word_type> hp{};
534 
536  trace_matrix_type _trace_matrix{};
538 
551  void trace_matrix_init(size_t block_count)
552  {
553  derived_t const * self = static_cast<derived_t const *>(this);
554 
555  _trace_matrix = trace_matrix_type{std::ranges::size(self->query) + 1u};
556  _trace_matrix.reserve(std::ranges::size(self->database) + 1u);
557 
558  hp.resize(block_count, 0u);
559  db.resize(block_count, 0u);
560  }
562 
563 public:
568  trace_matrix_type const & trace_matrix() const noexcept
570  {
571  static_assert(edit_traits::compute_trace_matrix, "trace_matrix() can only be computed if you specify the "
572  "result type within your alignment config.");
573  return _trace_matrix;
574  }
575 
578  alignment_coordinate front_coordinate() const noexcept
579  {
580  derived_t const * self = static_cast<derived_t const *>(this);
581  static_assert(edit_traits::compute_front_coordinate, "front_coordinate() can only be computed if you specify "
582  "the result type within your alignment config.");
583  if (!self->is_valid())
584  return self->invalid_coordinate();
585 
586  alignment_coordinate const back = self->back_coordinate();
587  return alignment_front_coordinate(trace_matrix(), back);
588  }
589 
592  auto alignment() const noexcept
593  {
594  using alignment_t = decltype(result_value_type{}.alignment);
595 
596  derived_t const * self = static_cast<derived_t const *>(this);
597  static_assert(edit_traits::compute_sequence_alignment, "alignment() can only be computed if you specify the "
598  "result type within your alignment config.");
599 
600  if (!self->is_valid())
601  return alignment_t{};
602 
603  return alignment_trace<alignment_t>(self->database,
604  self->query,
605  trace_matrix(),
606  self->back_coordinate(),
607  front_coordinate());
608  }
610 };
611 
615 template <typename value_t>
616 class proxy_reference
617 {
618 public:
622  proxy_reference() noexcept = default;
623  proxy_reference(proxy_reference const &) noexcept = default;
624  proxy_reference(proxy_reference &&) noexcept = default;
625  proxy_reference & operator=(proxy_reference const &) noexcept = default;
626  proxy_reference & operator=(proxy_reference &&) noexcept = default;
627  ~proxy_reference() = default;
628 
630  proxy_reference(value_t & t) noexcept
631  : ptr(std::addressof(t))
632  {}
633 
634  proxy_reference(value_t &&) = delete;
635 
637  template <std::ConvertibleTo<value_t> other_value_t>
638  proxy_reference & operator=(other_value_t && u) noexcept
639  {
640  get() = std::forward<other_value_t>(u);
641  return *this;
642  }
644 
646  value_t & get() const noexcept
647  {
648  assert(ptr != nullptr);
649  return *ptr;
650  }
651 
653  operator value_t & () const noexcept
654  {
655  return get();
656  }
657 
658 private:
660  value_t * ptr{nullptr};
661 };
662 
674 template <std::ranges::ViewableRange database_t,
676  typename align_config_t,
677  typename edit_traits>
678 class edit_distance_unbanded :
680 // Hide this section in doxygen, because it messes up the inheritance.
681  public edit_distance_base<
682  edit_traits::use_max_errors,
683  edit_distance_unbanded_max_errors_policy,
684  edit_traits,
685  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
686  public edit_distance_base<
687  edit_traits::is_global,
688  edit_distance_unbanded_global_policy,
689  edit_traits,
690  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
691  public edit_distance_base<
692  edit_traits::is_semi_global,
693  edit_distance_unbanded_semi_global_policy,
694  edit_traits,
695  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
696  public edit_distance_base<
697  edit_traits::compute_score_matrix,
698  edit_distance_unbanded_score_matrix_policy,
699  edit_traits,
700  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
701  public edit_distance_base<
702  edit_traits::compute_trace_matrix,
703  edit_distance_unbanded_trace_matrix_policy,
704  edit_traits,
705  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>
707 {
708 public:
709  using typename edit_traits::word_type;
710  using typename edit_traits::score_type;
711  using typename edit_traits::database_type;
712  using typename edit_traits::query_type;
713  using typename edit_traits::align_config_type;
714  using edit_traits::word_size;
715 
716 private:
717  using typename edit_traits::database_iterator;
718  using typename edit_traits::query_alphabet_type;
719  using typename edit_traits::result_value_type;
720  using edit_traits::use_max_errors;
721  using edit_traits::is_semi_global;
722  using edit_traits::is_global;
723  using edit_traits::compute_score;
724  using edit_traits::compute_back_coordinate;
725  using edit_traits::compute_front_coordinate;
726  using edit_traits::compute_sequence_alignment;
727  using edit_traits::compute_score_matrix;
728  using edit_traits::compute_trace_matrix;
729  using edit_traits::compute_matrix;
730  using typename edit_traits::score_matrix_type;
731  using typename edit_traits::trace_matrix_type;
732 
734  database_t database;
736  query_t query;
738  align_config_t config;
739 
741  static constexpr word_type hp0 = is_global ? 1u : 0u;
743  static constexpr word_type hn0 = 0u;
745  static constexpr word_type vp0 = ~word_type{0u};
747  static constexpr word_type vn0 = 0u;
748 
750  score_type _score{};
757  word_type score_mask{0u};
768  std::vector<word_type> bit_masks{};
769 
771  database_iterator database_it{};
773  database_iterator database_it_end{};
774 
776  struct compute_state_trace_matrix
777  {
779  proxy_reference<word_type> db{};
780  };
781 
783  struct compute_state : enable_state_t<compute_trace_matrix, compute_state_trace_matrix>
784  {
787 
789  word_type b{};
791  word_type d0{};
793  hp_type hp{};
795  word_type hn{};
797  proxy_reference<word_type> vp{};
799  proxy_reference<word_type> vn{};
801  word_type carry_d0{};
803  word_type carry_hp{hp0};
805  word_type carry_hn{};
806  };
807 
809  void add_state()
810  {
811  if constexpr(!use_max_errors && compute_score_matrix)
812  this->_score_matrix.add_column(vp, vn);
813 
814  if constexpr(!use_max_errors && compute_trace_matrix)
815  this->_trace_matrix.add_column(this->hp, this->db, vp);
816 
817  if constexpr(use_max_errors && compute_matrix)
818  {
819  size_t max_rows = this->max_rows(score_mask, this->last_block, _score, this->max_errors);
820  if constexpr(compute_score_matrix)
821  this->_score_matrix.add_column(vp, vn, max_rows);
822 
823  if constexpr(compute_trace_matrix)
824  this->_trace_matrix.add_column(this->hp, this->db, vp, max_rows);
825  }
826  }
827 
828 public:
832  edit_distance_unbanded() = delete;
834  edit_distance_unbanded(edit_distance_unbanded const &) = default;
835  edit_distance_unbanded(edit_distance_unbanded &&) = default;
836  edit_distance_unbanded & operator=(edit_distance_unbanded const &) = default;
837  edit_distance_unbanded & operator=(edit_distance_unbanded &&) = default;
838  ~edit_distance_unbanded() = default;
839 
846  edit_distance_unbanded(database_t _database,
847  query_t _query,
848  align_config_t _config,
849  edit_traits const & SEQAN3_DOXYGEN_ONLY(_traits) = edit_traits{}) :
850  database{std::forward<database_t>(_database)},
851  query{std::forward<query_t>(_query)},
852  config{std::forward<align_config_t>(_config)},
853  _score{static_cast<score_type>(std::ranges::size(query))},
854  database_it{ranges::begin(database)},
855  database_it_end{ranges::end(database)}
856  {
857  static constexpr size_t alphabet_size_ = alphabet_size<query_alphabet_type>;
858 
859  size_t const block_count = (std::ranges::size(query) - 1u + word_size) / word_size;
860  score_mask = word_type{1u} << ((std::ranges::size(query) - 1u + word_size) % word_size);
861 
862  this->score_init();
863  if constexpr(use_max_errors)
864  this->max_errors_init(block_count);
865 
866  if constexpr(compute_score_matrix)
867  this->score_matrix_init();
868 
869  if constexpr(compute_trace_matrix)
870  this->trace_matrix_init(block_count);
871 
872  vp.resize(block_count, vp0);
873  vn.resize(block_count, vn0);
874  bit_masks.resize((alphabet_size_ + 1u) * block_count, 0u);
875 
876  // encoding the letters as bit-vectors
877  for (size_t j = 0u; j < std::ranges::size(query); j++)
878  {
879  size_t const i = block_count * seqan3::to_rank(query[j]) + j / word_size;
880  bit_masks[i] |= word_type{1u} << (j % word_size);
881  }
882 
883  add_state();
884  }
886 
887 private:
889  template <bool with_carry>
890  static void compute_step(compute_state & state) noexcept
891  {
892  word_type x, t;
893  assert(state.carry_d0 <= 1u);
894  assert(state.carry_hp <= 1u);
895  assert(state.carry_hn <= 1u);
896 
897  x = state.b | state.vn;
898  t = state.vp + (x & state.vp) + state.carry_d0;
899 
900  state.d0 = (t ^ state.vp) | x;
901  state.hn = state.vp & state.d0;
902  state.hp = state.vn | ~(state.vp | state.d0);
903 
904  if constexpr(with_carry)
905  state.carry_d0 = (state.carry_d0 != 0u) ? t <= state.vp : t < state.vp;
906 
907  x = (state.hp << 1u) | state.carry_hp;
908  state.vn = x & state.d0;
909  state.vp = (state.hn << 1u) | ~(x | state.d0) | state.carry_hn;
910 
911  if constexpr(with_carry)
912  {
913  state.carry_hp = state.hp >> (word_size - 1u);
914  state.carry_hn = state.hn >> (word_size - 1u);
915  }
916  }
917 
919  template <bool with_carry>
920  void compute_kernel(compute_state & state, size_t const block_offset, size_t const current_block) noexcept
921  {
922  state.vp = proxy_reference<word_type>{this->vp[current_block]};
923  state.vn = proxy_reference<word_type>{this->vn[current_block]};
924  if constexpr(compute_trace_matrix)
925  {
926  state.hp = proxy_reference<word_type>{this->hp[current_block]};
927  state.db = proxy_reference<word_type>{this->db[current_block]};
928  }
929  state.b = bit_masks[block_offset + current_block];
930 
931  compute_step<with_carry>(state);
932  if constexpr(compute_trace_matrix)
933  state.db = ~(state.b ^ state.d0);
934  }
935 
937  void advance_score(word_type P, word_type N, word_type mask) noexcept
938  {
939  if ((P & mask) != word_type{0u})
940  _score++;
941  else if ((N & mask) != word_type{0u})
942  _score--;
943  }
944 
946  bool on_hit() noexcept
947  {
948  // TODO: call external on_hit functor
949  return false;
950  }
951 
953  inline bool small_patterns();
954 
956  inline bool large_patterns();
957 
959  void compute()
960  {
961  // limit search width for prefix search (if no matrix needs to be computed)
962  if constexpr(use_max_errors && is_global && !compute_matrix)
963  {
964  // Note: For global alignments we know that the database can only be max_length long to have a score less
965  // than or equal max_errors in the last cell.
966  //
967  // The following matrix shows a minimal value for each entry (because a diagonal must be +0 or +1, each
968  // diagonal is at least the value of the initial value in the first row)
969  // 0 1 2 3 4 5 6...
970  // 1 0 1 2 3 4 5...
971  // ...
972  // m ... 3 2 1 0 1 2 3 4 5
973  // Thus, after |query| + max_errors entries the score will always be higher than max_errors.
974  size_t const max_length = std::ranges::size(query) + this->max_errors + 1u;
975  size_t const haystack_length = std::min(std::ranges::size(database), max_length);
976  database_it_end -= std::ranges::size(database) - haystack_length;
977  }
978 
979  // distinguish between the version for needles not longer than
980  // one machine word and the version for longer needles
981  if (vp.size() <= 1u)
982  small_patterns();
983  else
984  large_patterns();
985 
986  if constexpr(is_global)
987  this->update_best_score();
988  }
989 
990 public:
995  alignment_result<result_value_type> operator()(size_t const idx)
996  {
997  compute();
998  result_value_type res_vt{};
999  res_vt.id = idx;
1000  if constexpr (compute_score)
1001  {
1002  res_vt.score = this->score().value_or(matrix_inf<score_type>);
1003  }
1004 
1005  if constexpr (compute_back_coordinate)
1006  {
1007  res_vt.back_coordinate = this->back_coordinate();
1008  }
1009 
1010  if constexpr (compute_front_coordinate)
1011  {
1012  if (this->is_valid())
1013  res_vt.front_coordinate = alignment_front_coordinate(this->trace_matrix(), res_vt.back_coordinate);
1014  else
1015  res_vt.front_coordinate = this->invalid_coordinate();
1016  }
1017 
1018  if constexpr (compute_sequence_alignment)
1019  {
1020  if (this->is_valid())
1021  {
1022  using alignment_t = decltype(res_vt.alignment);
1023  res_vt.alignment = alignment_trace<alignment_t>(database,
1024  query,
1025  this->trace_matrix(),
1026  res_vt.back_coordinate,
1027  res_vt.front_coordinate);
1028  }
1029  }
1030  return alignment_result<result_value_type>{std::move(res_vt)};
1031  }
1032 };
1033 
1034 template <typename database_t, typename query_t, typename align_config_t, typename traits_t>
1035 bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::small_patterns()
1036 {
1037  bool abort_computation = false;
1038 
1039  // computing the blocks
1040  while (database_it != database_it_end)
1041  {
1042  compute_state state{};
1043  size_t const block_offset = seqan3::to_rank((query_alphabet_type) *database_it);
1044 
1045  compute_kernel<false>(state, block_offset, 0u);
1046  advance_score(state.hp, state.hn, score_mask);
1047 
1048  // semi-global without max_errors guarantees that the score stays within the last row
1049  if constexpr(is_semi_global && !use_max_errors)
1050  this->update_best_score();
1051 
1052  // updating the last active cell
1053  if constexpr(use_max_errors)
1054  abort_computation = this->update_last_active_cell();
1055 
1056  add_state();
1057  ++database_it;
1058  if (abort_computation)
1059  return true;
1060  }
1061 
1062  return false;
1063 }
1064 
1065 template <typename database_t, typename query_t, typename align_config_t, typename traits_t>
1066 bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::large_patterns()
1067 {
1068  bool abort_computation = false;
1069 
1070  while (database_it != database_it_end)
1071  {
1072  compute_state state{};
1073  size_t const block_offset = vp.size() * seqan3::to_rank((query_alphabet_type) *database_it);
1074 
1075  size_t block_count = vp.size();
1076  if constexpr(use_max_errors)
1077  block_count = this->last_block + 1;
1078 
1079  // compute each block in the current column; carries between blocks will be propagated.
1080  for (size_t current_block = 0u; current_block < block_count; current_block++)
1081  compute_kernel<true>(state, block_offset, current_block);
1082 
1083  advance_score(state.hp, state.hn, score_mask);
1084 
1085  // semi-global without max_errors guarantees that the score stays within the last row
1086  if constexpr(is_semi_global && !use_max_errors)
1087  this->update_best_score();
1088 
1089  if constexpr(use_max_errors)
1090  {
1091  // if the last active cell reached the end within the current block we have to compute the next block.
1092  bool additional_block = score_mask >> (word_size - 1u);
1093  bool reached_last_block = this->last_block + 1u == vp.size();
1094  // If there is no next block we skip the computation.
1095  if (reached_last_block)
1096  additional_block = false;
1097 
1098  if (additional_block)
1099  {
1100  size_t const current_block = this->last_block + 1u;
1101  // this might not be necessary, but carry_d0 = 1u might have an influence on the result of vn and vp.
1102  vp[current_block] = vp0;
1103  vn[current_block] = vn0;
1104  compute_kernel<false>(state, block_offset, current_block);
1105  }
1106 
1107  // updating the last active cell
1108  abort_computation = this->update_last_active_cell();
1109  }
1110 
1111  add_state();
1112  ++database_it;
1113 
1114  if (abort_computation)
1115  return true;
1116  }
1117 
1118  return false;
1119 }
1120 
1126 template<typename database_t, typename query_t, typename config_t>
1128 edit_distance_unbanded(database_t && database, query_t && query, config_t config)
1129  -> edit_distance_unbanded<database_t, query_t, config_t>;
1130 
1132 template<typename database_t, typename query_t, typename config_t, typename traits_t>
1133 edit_distance_unbanded(database_t && database, query_t && query, config_t config, traits_t)
1134  -> edit_distance_unbanded<database_t, query_t, config_t, traits_t>;
1136 
1137 } // namespace seqan3::detail
::ranges::distance distance
Alias for ranges::distance. Returns the number of hops from first to last.
Definition: iterator:321
Specifies the requirements of a Range type that is either a std::ranges::View or an lvalue-reference...
Provides various shortcuts for common std::ranges functions.
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:103
Forwards for seqan3::edit_distance_unbanded related types.
SeqAn specific customisations in the standard namespace.
::ranges::size size
Alias for ranges::size. Obtains the size of a range whose size can be calculated in constant time...
Definition: ranges:189
T min(T... args)
Provides seqan3::alignment_result.
Adaptations of concepts from the Ranges TS.
::ranges::begin begin
Alias for ranges::begin. Returns an iterator to the beginning of a range.
Definition: ranges:174
Provides seqan3::detail::edit_distance_score_matrix_full.
Definition: aligned_sequence_concept.hpp:35
Provides seqan3::detail::edit_distance_trace_matrix_full.
Provides seqan3::detail::alignment_coordinate.
Provides seqan3::detail::configuration and utility functions.
Provides algorithms that operate on seqan3::detail::alignment_trace_matrix.
::ranges::end end
Alias for ranges::end. Returns an iterator to the end of a range.
Definition: ranges:179