SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
edit_distance_unbanded.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <algorithm>
16 #include <seqan3/std/algorithm>
17 #include <bitset>
18 #include <seqan3/std/ranges>
19 #include <utility>
20 
28 
29 namespace seqan3::detail
30 {
34 template <typename derived_t, typename edit_traits>
35 class edit_distance_unbanded_max_errors_policy :
37  edit_traits
39 {
40 protected:
41  static_assert(edit_traits::use_max_errors, "This policy assumes that edit_traits::use_max_errors is true.");
42 
44  friend derived_t;
45 
49  edit_distance_unbanded_max_errors_policy() noexcept = default;
50  edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy const &) noexcept
51  = default;
52  edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy &&) noexcept
53  = default;
54  edit_distance_unbanded_max_errors_policy & operator=(edit_distance_unbanded_max_errors_policy const &) noexcept
55  = default;
56  edit_distance_unbanded_max_errors_policy & operator=(edit_distance_unbanded_max_errors_policy &&) noexcept
57  = default;
58  ~edit_distance_unbanded_max_errors_policy() noexcept = default;
59 
61 
62  using typename edit_traits::word_type;
63  using typename edit_traits::score_type;
64 
70  score_type max_errors{255};
72  size_t last_block{0u};
74  word_type last_score_mask{};
76 
82  void max_errors_init(size_t block_count) noexcept
83  {
84  derived_t * self = static_cast<derived_t *>(this);
85 
86  max_errors = -get<align_cfg::min_score>(self->config).score;
87 
88  assert(max_errors >= score_type{0});
89 
90  if (std::ranges::empty(self->query)) // [[unlikely]]
91  {
92  last_block = 0u;
93  self->score_mask = 0u;
94  last_score_mask = self->score_mask;
95  return;
96  }
97 
98  last_block = block_count - 1u;
99  last_score_mask = self->score_mask;
100 
101  // local_max_errors either stores the maximal number of _score (me.max_errors) or the needle size minus one. It
102  // is used for the mask computation and setting the initial score (the minus one is there because of the Ukkonen
103  // trick).
104  size_t const local_max_errors = std::min<size_t>(max_errors, std::ranges::size(self->query) - 1u);
105  self->score_mask = word_type{1u} << (local_max_errors % self->word_size);
106  last_block = std::min(local_max_errors / self->word_size, last_block);
107  self->_score = local_max_errors + 1u;
108  }
109 
111  bool is_last_active_cell_within_last_row() const noexcept
112  {
113  derived_t const * self = static_cast<derived_t const *>(this);
114  return (self->score_mask == this->last_score_mask) && (this->last_block == self->vp.size() - 1u);
115  }
116 
118  bool prev_last_active_cell() noexcept
119  {
120  derived_t * self = static_cast<derived_t *>(this);
121  self->score_mask >>= 1u;
122  if (self->score_mask != 0u)
123  return true;
124 
125  if constexpr (edit_traits::is_global)
126  {
127  if (last_block == 0u) // [[unlikely]]
128  return false;
129  }
130 
131  last_block--;
132 
133  self->score_mask = word_type{1u} << (edit_traits::word_size - 1u);
134  return true;
135  }
136 
138  void next_last_active_cell() noexcept
139  {
140  derived_t * self = static_cast<derived_t *>(this);
141  self->score_mask <<= 1u;
142  if (self->score_mask)
143  return;
144 
145  self->score_mask = 1u;
146  last_block++;
147  }
148 
152  bool update_last_active_cell() noexcept
153  {
154  derived_t * self = static_cast<derived_t *>(this);
155  // update the last active cell
156  while (!(self->_score <= max_errors))
157  {
158  self->advance_score(self->vn[last_block], self->vp[last_block], self->score_mask);
159  if (!prev_last_active_cell())
160  {
161  // prev_last_active_cell = false can only happen for global alignments
162  assert(edit_traits::is_global);
163  // we abort here if we don't need to compute a matrix, because the continued
164  // computation can't produce an alignment.
165  return !edit_traits::compute_matrix;
166  }
167  }
168 
169  if (is_last_active_cell_within_last_row())
170  {
171  assert(self->_score <= max_errors);
172 
173  if constexpr(edit_traits::is_semi_global)
174  self->update_best_score();
175 
176  return self->on_hit();
177  }
178  else
179  {
180  next_last_active_cell();
181  self->advance_score(self->vp[last_block], self->vn[last_block], self->score_mask);
182  }
183 
184  return false;
185  }
187 
189  static size_t max_rows(word_type const score_mask, unsigned const last_block,
190  score_type const score, score_type const max_errors) noexcept
191  {
192  using score_matrix_type = typename edit_traits::score_matrix_type;
193  return score_matrix_type::max_rows(score_mask,
194  last_block,
195  score,
196  max_errors);
197  }
198 };
199 
203 template <typename derived_t, typename edit_traits>
204 class edit_distance_unbanded_global_policy :
206  edit_traits
208 {
209 protected:
210  static_assert(edit_traits::is_global || edit_traits::is_semi_global,
211  "This policy assumes that edit_traits::is_global or edit_traits::is_semi_global is true.");
212 
214  friend derived_t;
215 
219  edit_distance_unbanded_global_policy() noexcept = default;
220  edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy const &) noexcept
221  = default;
222  edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy &&) noexcept
223  = default;
224  edit_distance_unbanded_global_policy & operator=(edit_distance_unbanded_global_policy const &) noexcept
225  = default;
226  edit_distance_unbanded_global_policy & operator=(edit_distance_unbanded_global_policy &&) noexcept
227  = default;
228  ~edit_distance_unbanded_global_policy() noexcept = default;
229 
231 
233  using typename edit_traits::score_type;
234 
243  score_type _best_score{};
245 
251  void score_init() noexcept
252  {
253  derived_t const * self = static_cast<derived_t const *>(this);
254  _best_score = self->_score;
255  }
256 
258  bool is_valid() const noexcept
259  {
260  [[maybe_unused]] derived_t const * self = static_cast<derived_t const *>(this);
261  // This condition uses the observation that after each computation of a column, _score has either the initial
262  // value of the first row (i.e. the entire column consist of INF's), has the value _score = max_errors + 1
263  // (there exists a cell within the column that has value <= max_errors, but is not on the last row) or _score <=
264  // max_errors (the score of the last active cell is <= max_errors)
265  if constexpr(edit_traits::use_max_errors)
266  return _best_score <= self->max_errors;
267 
268  // When not using max_errors there is always a valid alignment, because the last row will always be updated and
269  // with it the score.
270  return true;
271  }
272 
274  seqan3::detail::advanceable_alignment_coordinate<> invalid_coordinate() const noexcept
275  {
276  derived_t const * self = static_cast<derived_t const *>(this);
277  return {column_index_type{std::ranges::size(self->database)}, row_index_type{std::ranges::size(self->query)}};
278  }
279 
281  void update_best_score() noexcept
282  {
283  derived_t const * self = static_cast<derived_t const *>(this);
284  _best_score = self->_score;
285  }
286 
288  size_t end_positions_first() const noexcept
289  {
290  derived_t const * self = static_cast<derived_t const *>(this);
291  return std::ranges::size(self->database);
292  }
294 
295 public:
302  std::optional<score_type> score() const noexcept
303  {
304  derived_t const * self = static_cast<derived_t const *>(this);
305  static_assert(edit_traits::compute_score, "score() can only be computed if you specify the result type within "
306  "your alignment config.");
307  if (!self->is_valid())
308  return std::nullopt;
309 
310  return -_best_score;
311  }
312 
315  seqan3::detail::advanceable_alignment_coordinate<> end_positions() const noexcept
316  {
317  derived_t const * self = static_cast<derived_t const *>(this);
318  static_assert(edit_traits::compute_end_positions, "end_positions() can only be computed if you specify the "
319  "result type within your alignment config.");
320  if (!self->is_valid())
321  return self->invalid_coordinate();
322 
323  column_index_type const first{self->end_positions_first()};
324  row_index_type const second{std::ranges::size(self->query)};
325  return {first, second};
326  }
328 };
329 
331 template <typename derived_t, typename edit_traits>
332 class edit_distance_unbanded_semi_global_policy :
333  public edit_distance_unbanded_global_policy<derived_t, edit_traits>
334 {
335 protected:
336  static_assert(edit_traits::is_semi_global, "This policy assumes that edit_traits::is_semi_global is true.");
337 
339  friend derived_t;
340 
344  edit_distance_unbanded_semi_global_policy() noexcept = default;
345  edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy const &) noexcept
346  = default;
347  edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy &&) noexcept
348  = default;
349  edit_distance_unbanded_semi_global_policy & operator=(edit_distance_unbanded_semi_global_policy const &) noexcept
350  = default;
351  edit_distance_unbanded_semi_global_policy & operator=(edit_distance_unbanded_semi_global_policy &&) noexcept
352  = default;
353  ~edit_distance_unbanded_semi_global_policy() noexcept = default;
354 
356 
358  using base_t = edit_distance_unbanded_global_policy<derived_t, edit_traits>;
360  using database_iterator = typename edit_traits::database_iterator;
361  using base_t::_best_score;
363 
375  database_iterator _best_score_col{};
377 
383  void score_init() noexcept
384  {
385  derived_t const * self = static_cast<derived_t const *>(this);
386  base_t::score_init();
387  _best_score_col = self->database_it_end;
388  }
389 
391  void update_best_score() noexcept
392  {
393  derived_t const * self = static_cast<derived_t const *>(this);
394  // we have to make sure that update_best_score is only called after a score update within the last row.
395  if constexpr(edit_traits::use_max_errors)
396  {
397  assert(std::ranges::empty(self->query) || self->is_last_active_cell_within_last_row());
398  }
399 
400  _best_score_col = (self->_score <= _best_score) ? self->database_it : _best_score_col;
401  _best_score = (self->_score <= _best_score) ? self->_score : _best_score;
402  }
403 
405  size_t end_positions_first() const noexcept
406  {
407  derived_t const * self = static_cast<derived_t const *>(this);
408  // offset == 0u is a special case if database sequence is empty, because in this case the best column is zero.
409  size_t offset = std::ranges::empty(self->database) ? 0u : 1u;
410  return std::ranges::distance(std::ranges::begin(self->database), _best_score_col) + offset;
411  }
413 };
414 
418 template <typename derived_t, typename edit_traits>
419 class edit_distance_unbanded_score_matrix_policy :
421  edit_traits
423 {
424 protected:
425  static_assert(edit_traits::compute_score_matrix,
426  "This policy assumes that edit_traits::compute_score_matrix is true.");
427 
429  friend derived_t;
430 
434  edit_distance_unbanded_score_matrix_policy() noexcept = default;
435  edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy const &) noexcept
436  = default;
437  edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy &&) noexcept
438  = default;
439  edit_distance_unbanded_score_matrix_policy & operator=(edit_distance_unbanded_score_matrix_policy const &) noexcept
440  = default;
441  edit_distance_unbanded_score_matrix_policy & operator=(edit_distance_unbanded_score_matrix_policy &&) noexcept
442  = default;
443  ~edit_distance_unbanded_score_matrix_policy() noexcept = default;
444 
446 
447  using typename edit_traits::score_matrix_type;
448 
454  score_matrix_type _score_matrix{};
456 
469  void score_matrix_init()
470  {
471  derived_t const * self = static_cast<derived_t const *>(this);
472 
473  _score_matrix = score_matrix_type{std::ranges::size(self->query) + 1u};
474  _score_matrix.reserve(std::ranges::size(self->database) + 1u);
475  }
477 
478 public:
486  score_matrix_type const & score_matrix() const noexcept
487  {
488  static_assert(edit_traits::compute_score_matrix, "score_matrix() can only be computed if you specify the "
489  "result type within your alignment config.");
490  return _score_matrix;
491  }
493 };
494 
498 template <typename derived_t, typename edit_traits>
499 class edit_distance_unbanded_trace_matrix_policy :
501  edit_traits
503 {
504 protected:
505  static_assert(edit_traits::compute_trace_matrix,
506  "This policy assumes that edit_traits::compute_trace_matrix is true.");
507 
509  friend derived_t;
510 
514  edit_distance_unbanded_trace_matrix_policy() noexcept = default;
515  edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy const &) noexcept
516  = default;
517  edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy &&) noexcept
518  = default;
519  edit_distance_unbanded_trace_matrix_policy & operator=(edit_distance_unbanded_trace_matrix_policy const &) noexcept
520  = default;
521  edit_distance_unbanded_trace_matrix_policy & operator=(edit_distance_unbanded_trace_matrix_policy &&) noexcept
522  = default;
523  ~edit_distance_unbanded_trace_matrix_policy() noexcept = default;
524 
526 
527  using typename edit_traits::word_type;
528  using typename edit_traits::trace_matrix_type;
529  using typename edit_traits::alignment_result_type;
530 
536  std::vector<word_type> hp{};
539 
541  trace_matrix_type _trace_matrix{};
543 
556  void trace_matrix_init(size_t block_count)
557  {
558  derived_t const * self = static_cast<derived_t const *>(this);
559 
560  _trace_matrix = trace_matrix_type{std::ranges::size(self->query) + 1u};
561  _trace_matrix.reserve(std::ranges::size(self->database) + 1u);
562 
563  hp.resize(block_count, 0u);
564  db.resize(block_count, 0u);
565  }
567 
568 public:
574  trace_matrix_type const & trace_matrix() const noexcept
575  {
576  static_assert(edit_traits::compute_trace_matrix, "trace_matrix() can only be computed if you specify the "
577  "result type within your alignment config.");
578  return _trace_matrix;
579  }
580 
583  seqan3::detail::advanceable_alignment_coordinate<> begin_positions() const noexcept
584  {
585  derived_t const * self = static_cast<derived_t const *>(this);
586  static_assert(edit_traits::compute_begin_positions, "begin_positions() can only be computed if you specify the "
587  "result type within your alignment config.");
588  if (!self->is_valid())
589  return self->invalid_coordinate();
590 
591  auto [first, second] = self->end_positions();
592  detail::matrix_coordinate const end_positions{detail::row_index_type{second}, detail::column_index_type{first}};
593  auto trace_path = self->trace_matrix().trace_path(end_positions);
594  auto trace_path_it = std::ranges::begin(trace_path);
595  std::ranges::advance(trace_path_it, std::ranges::end(trace_path));
596  detail::matrix_coordinate const begin_positions = trace_path_it.coordinate();
597  return {detail::column_index_type{begin_positions.col}, detail::row_index_type{begin_positions.row}};
598  }
599 
602  auto alignment() const noexcept
603  {
604  using alignment_t = std::remove_cvref_t<decltype(std::declval<alignment_result_type &>().alignment())>;
605 
606  derived_t const * self = static_cast<derived_t const *>(this);
607  static_assert(edit_traits::compute_sequence_alignment, "alignment() can only be computed if you specify the "
608  "result type within your alignment config.");
609 
610  if (!self->is_valid())
611  return alignment_t{};
612 
613  auto [first, second] = self->end_positions();
614  detail::matrix_coordinate const end_positions{detail::row_index_type{second}, detail::column_index_type{first}};
615  aligned_sequence_builder builder{self->database, self->query};
616  auto trace_path = self->trace_matrix().trace_path(end_positions);
617  return builder(trace_path).alignment;
618  }
620 };
621 
625 template <typename value_t>
626 class proxy_reference
627 {
628 public:
632  proxy_reference() noexcept = default;
633  proxy_reference(proxy_reference const &) noexcept = default;
634  proxy_reference(proxy_reference &&) noexcept = default;
635  proxy_reference & operator=(proxy_reference const &) noexcept = default;
636  proxy_reference & operator=(proxy_reference &&) noexcept = default;
637  ~proxy_reference() = default;
638 
640  proxy_reference(value_t & t) noexcept
641  : ptr(std::addressof(t))
642  {}
643 
644  proxy_reference(value_t &&) = delete;
645 
647  template <typename other_value_t>
649  requires std::convertible_to<other_value_t, value_t>
651  proxy_reference & operator=(other_value_t && u) noexcept
652  {
653  get() = std::forward<other_value_t>(u);
654  return *this;
655  }
657 
659  value_t & get() const noexcept
660  {
661  assert(ptr != nullptr);
662  return *ptr;
663  }
664 
666  operator value_t & () const noexcept
667  {
668  return get();
669  }
670 
671 private:
673  value_t * ptr{nullptr};
674 };
675 
687 template <std::ranges::viewable_range database_t,
688  std::ranges::viewable_range query_t,
689  typename align_config_t,
690  typename edit_traits>
691 class edit_distance_unbanded :
693 // Hide this section in doxygen, because it messes up the inheritance.
694  public edit_distance_base<
695  edit_traits::use_max_errors,
696  edit_distance_unbanded_max_errors_policy,
697  edit_traits,
698  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
699  public edit_distance_base<
700  edit_traits::is_global,
701  edit_distance_unbanded_global_policy,
702  edit_traits,
703  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
704  public edit_distance_base<
705  edit_traits::is_semi_global,
706  edit_distance_unbanded_semi_global_policy,
707  edit_traits,
708  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
709  public edit_distance_base<
710  edit_traits::compute_score_matrix,
711  edit_distance_unbanded_score_matrix_policy,
712  edit_traits,
713  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
714  public edit_distance_base<
715  edit_traits::compute_trace_matrix,
716  edit_distance_unbanded_trace_matrix_policy,
717  edit_traits,
718  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>
720 {
721 public:
722  using typename edit_traits::word_type;
723  using typename edit_traits::score_type;
724  using typename edit_traits::database_type;
725  using typename edit_traits::query_type;
726  using typename edit_traits::align_config_type;
727  using edit_traits::word_size;
728 
729 private:
731  template <typename other_derived_t, typename other_edit_traits>
732  friend class edit_distance_unbanded_max_errors_policy;
734  template <typename other_derived_t, typename other_edit_traits>
735  friend class edit_distance_unbanded_global_policy;
737  template <typename other_derived_t, typename other_edit_traits>
738  friend class edit_distance_unbanded_semi_global_policy;
740  template <typename other_derived_t, typename other_edit_traits>
741  friend class edit_distance_unbanded_score_matrix_policy;
743  template <typename other_derived_t, typename other_edit_traits>
744  friend class edit_distance_unbanded_trace_matrix_policy;
745 
746  using typename edit_traits::database_iterator;
747  using typename edit_traits::query_alphabet_type;
748  using typename edit_traits::alignment_result_type;
749  using edit_traits::use_max_errors;
750  using edit_traits::is_semi_global;
751  using edit_traits::is_global;
752  using edit_traits::compute_score;
753  using edit_traits::compute_end_positions;
754  using edit_traits::compute_begin_positions;
755  using edit_traits::compute_sequence_alignment;
756  using edit_traits::compute_score_matrix;
757  using edit_traits::compute_trace_matrix;
758  using edit_traits::compute_matrix;
759  using typename edit_traits::score_matrix_type;
760  using typename edit_traits::trace_matrix_type;
761 
763  database_t database;
765  query_t query;
767  align_config_t config;
768 
770  static constexpr word_type hp0 = is_global ? 1u : 0u;
772  static constexpr word_type hn0 = 0u;
774  static constexpr word_type vp0 = ~word_type{0u};
776  static constexpr word_type vn0 = 0u;
777 
779  score_type _score{};
786  word_type score_mask{0u};
797  std::vector<word_type> bit_masks{};
798 
800  database_iterator database_it{};
802  database_iterator database_it_end{};
803 
805  struct compute_state_trace_matrix
806  {
808  proxy_reference<word_type> db{};
809  };
810 
812  struct compute_state : enable_state_t<edit_traits::compute_trace_matrix, compute_state_trace_matrix>
813  {
816 
818  word_type b{};
820  word_type d0{};
822  hp_type hp{};
824  word_type hn{};
826  proxy_reference<word_type> vp{};
828  proxy_reference<word_type> vn{};
830  word_type carry_d0{};
832  word_type carry_hp{hp0};
834  word_type carry_hn{};
835  };
836 
838  void add_state()
839  {
840  if constexpr(!use_max_errors && compute_score_matrix)
841  this->_score_matrix.add_column(vp, vn);
842 
843  if constexpr(!use_max_errors && compute_trace_matrix)
844  this->_trace_matrix.add_column(this->hp, this->db, vp);
845 
846  if constexpr(use_max_errors && compute_matrix)
847  {
848  size_t max_rows = this->max_rows(score_mask, this->last_block, _score, this->max_errors);
849  if constexpr(compute_score_matrix)
850  this->_score_matrix.add_column(vp, vn, max_rows);
851 
852  if constexpr(compute_trace_matrix)
853  this->_trace_matrix.add_column(this->hp, this->db, vp, max_rows);
854  }
855  }
856 
857 public:
862  edit_distance_unbanded() = delete;
863  edit_distance_unbanded(edit_distance_unbanded const &) = default;
864  edit_distance_unbanded(edit_distance_unbanded &&) = default;
865  edit_distance_unbanded & operator=(edit_distance_unbanded const &) = default;
866  edit_distance_unbanded & operator=(edit_distance_unbanded &&) = default;
867  ~edit_distance_unbanded() = default;
868 
875  edit_distance_unbanded(database_t _database,
876  query_t _query,
877  align_config_t _config,
878  edit_traits const & SEQAN3_DOXYGEN_ONLY(_traits)) :
879  database{std::forward<database_t>(_database)},
880  query{std::forward<query_t>(_query)},
881  config{std::forward<align_config_t>(_config)},
882  _score{static_cast<score_type>(std::ranges::size(query))},
883  database_it{ranges::begin(database)},
884  database_it_end{ranges::end(database)}
885  {
886  static constexpr size_t alphabet_size_ = alphabet_size<query_alphabet_type>;
887 
888  size_t const block_count = (std::ranges::size(query) - 1u + word_size) / word_size;
889  score_mask = word_type{1u} << ((std::ranges::size(query) - 1u + word_size) % word_size);
890 
891  this->score_init();
892  if constexpr(use_max_errors)
893  this->max_errors_init(block_count);
894 
895  if constexpr(compute_score_matrix)
896  this->score_matrix_init();
897 
898  if constexpr(compute_trace_matrix)
899  this->trace_matrix_init(block_count);
900 
901  vp.resize(block_count, vp0);
902  vn.resize(block_count, vn0);
903  bit_masks.resize((alphabet_size_ + 1u) * block_count, 0u);
904 
905  // encoding the letters as bit-vectors
906  for (size_t j = 0u; j < std::ranges::size(query); j++)
907  {
908  size_t const i = block_count * seqan3::to_rank(query[j]) + j / word_size;
909  bit_masks[i] |= word_type{1u} << (j % word_size);
910  }
911 
912  add_state();
913  }
915 
916 private:
918  template <bool with_carry>
919  static void compute_step(compute_state & state) noexcept
920  {
921  word_type x, t;
922  assert(state.carry_d0 <= 1u);
923  assert(state.carry_hp <= 1u);
924  assert(state.carry_hn <= 1u);
925 
926  x = state.b | state.vn;
927  t = state.vp + (x & state.vp) + state.carry_d0;
928 
929  state.d0 = (t ^ state.vp) | x;
930  state.hn = state.vp & state.d0;
931  state.hp = state.vn | ~(state.vp | state.d0);
932 
933  if constexpr(with_carry)
934  state.carry_d0 = (state.carry_d0 != 0u) ? t <= state.vp : t < state.vp;
935 
936  x = (state.hp << 1u) | state.carry_hp;
937  state.vn = x & state.d0;
938  state.vp = (state.hn << 1u) | ~(x | state.d0) | state.carry_hn;
939 
940  if constexpr(with_carry)
941  {
942  state.carry_hp = state.hp >> (word_size - 1u);
943  state.carry_hn = state.hn >> (word_size - 1u);
944  }
945  }
946 
948  template <bool with_carry>
949  void compute_kernel(compute_state & state, size_t const block_offset, size_t const current_block) noexcept
950  {
951  state.vp = proxy_reference<word_type>{this->vp[current_block]};
952  state.vn = proxy_reference<word_type>{this->vn[current_block]};
953  if constexpr(compute_trace_matrix)
954  {
955  state.hp = proxy_reference<word_type>{this->hp[current_block]};
956  state.db = proxy_reference<word_type>{this->db[current_block]};
957  }
958  state.b = bit_masks[block_offset + current_block];
959 
960  compute_step<with_carry>(state);
961  if constexpr(compute_trace_matrix)
962  state.db = ~(state.b ^ state.d0);
963  }
964 
966  void advance_score(word_type P, word_type N, word_type mask) noexcept
967  {
968  if ((P & mask) != word_type{0u})
969  _score++;
970  else if ((N & mask) != word_type{0u})
971  _score--;
972  }
973 
975  bool on_hit() noexcept
976  {
977  // TODO: call external on_hit functor
978  return false;
979  }
980 
982  inline bool small_patterns();
983 
985  inline bool large_patterns();
986 
988  inline void compute_empty_query_sequence()
989  {
990  assert(std::ranges::empty(query));
991 
992  bool abort_computation = false;
993 
994  for (; database_it != database_it_end; ++database_it)
995  {
996  if constexpr(is_global)
997  ++_score;
998  else // is_semi_global
999  this->update_best_score();
1000 
1001  // call on_hit
1002  if constexpr(use_max_errors)
1003  abort_computation = on_hit();
1004 
1005  this->add_state();
1006  if (abort_computation)
1007  break;
1008  }
1009  }
1010 
1012  void compute()
1013  {
1014  // limit search width for prefix search (if no matrix needs to be computed)
1015  if constexpr(use_max_errors && is_global && !compute_matrix)
1016  {
1017  // Note: For global alignments we know that the database can only be max_length long to have a score less
1018  // than or equal max_errors in the last cell.
1019  //
1020  // The following matrix shows a minimal value for each entry (because a diagonal must be +0 or +1, each
1021  // diagonal is at least the value of the initial value in the first row)
1022  // 0 1 2 3 4 5 6...
1023  // 1 0 1 2 3 4 5...
1024  // ...
1025  // m ... 3 2 1 0 1 2 3 4 5
1026  // Thus, after |query| + max_errors entries the score will always be higher than max_errors.
1027  size_t const max_length = std::ranges::size(query) + this->max_errors + 1u;
1028  size_t const haystack_length = std::min(std::ranges::size(database), max_length);
1029  database_it_end -= std::ranges::size(database) - haystack_length;
1030  }
1031 
1032  // distinguish between the version for needles not longer than
1033  // one machine word and the version for longer needles
1034  // A special cases is if the second sequence is empty (vp.size() == 0u).
1035  if (vp.size() == 0u) // [[unlikely]]
1036  compute_empty_query_sequence();
1037  else if (vp.size() == 1u)
1038  small_patterns();
1039  else
1040  large_patterns();
1041 
1042  if constexpr(is_global)
1043  this->update_best_score();
1044  }
1045 
1046 public:
1051  template <typename callback_t>
1052  void operator()([[maybe_unused]] size_t const idx, callback_t && callback)
1053  {
1054  using traits_type = alignment_configuration_traits<align_config_t>;
1055  using result_value_type = typename alignment_result_value_type_accessor<alignment_result_type>::type;
1056 
1057  compute();
1058 
1059  // First cache the begin and end positions if enabled by the edit distance traits.
1060  // Note, that they might be activated even if the user did not configure them, but in order to
1061  // compute for example the alignment both information are required and enabled internally.
1062  auto cached_end_positions = this->invalid_coordinate();
1063  auto cached_begin_positions = this->invalid_coordinate();
1064 
1065  if constexpr (compute_end_positions)
1066  cached_end_positions = this->end_positions();
1067 
1068  if constexpr (compute_begin_positions && !compute_sequence_alignment)
1069  {
1070  static_assert(compute_end_positions, "End positions required to compute the begin positions.");
1071  cached_begin_positions = this->begin_positions();
1072  }
1073 
1074  // Fill the result value type. Note we need to ask what was enabled on the user side in order to store
1075  // the correct information in the alignment result type. This information is stored in the alignment
1076  // configuration traits and not in the edit distance traits.
1077  result_value_type res_vt{};
1078 
1079  if constexpr (traits_type::output_sequence1_id)
1080  res_vt.sequence1_id = idx;
1081 
1082  if constexpr (traits_type::output_sequence2_id)
1083  res_vt.sequence2_id = idx;
1084 
1085  if constexpr (traits_type::compute_score)
1086  res_vt.score = this->score().value_or(matrix_inf<score_type>);
1087 
1088  if constexpr (traits_type::compute_sequence_alignment)
1089  {
1090  if (this->is_valid())
1091  {
1092  auto [first, second] = cached_end_positions;
1093  detail::matrix_coordinate const end_positions{detail::row_index_type{second},
1094  detail::column_index_type{first}};
1095 
1096  aligned_sequence_builder builder{this->database, this->query};
1097  auto trace_res = builder(this->trace_matrix().trace_path(end_positions));
1098  res_vt.alignment = std::move(trace_res.alignment);
1099  cached_begin_positions.first = trace_res.first_sequence_slice_positions.first;
1100  cached_begin_positions.second = trace_res.second_sequence_slice_positions.first;
1101  }
1102  }
1103 
1104  if constexpr (traits_type::compute_end_positions)
1105  res_vt.end_positions = std::move(cached_end_positions);
1106 
1107  if constexpr (traits_type::compute_begin_positions)
1108  res_vt.begin_positions = std::move(cached_begin_positions);
1109 
1110  callback(alignment_result_type{std::move(res_vt)});
1111  }
1112 };
1113 
1114 template <typename database_t, typename query_t, typename align_config_t, typename traits_t>
1115 bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::small_patterns()
1116 {
1117  bool abort_computation = false;
1118 
1119  // computing the blocks
1120  while (database_it != database_it_end)
1121  {
1122  compute_state state{};
1123  size_t const block_offset = seqan3::to_rank((query_alphabet_type) *database_it);
1124 
1125  compute_kernel<false>(state, block_offset, 0u);
1126  advance_score(state.hp, state.hn, score_mask);
1127 
1128  // semi-global without max_errors guarantees that the score stays within the last row
1129  if constexpr(is_semi_global && !use_max_errors)
1130  this->update_best_score();
1131 
1132  // updating the last active cell
1133  if constexpr(use_max_errors)
1134  abort_computation = this->update_last_active_cell();
1135 
1136  add_state();
1137  ++database_it;
1138  if (abort_computation)
1139  return true;
1140  }
1141 
1142  return false;
1143 }
1144 
1145 template <typename database_t, typename query_t, typename align_config_t, typename traits_t>
1146 bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::large_patterns()
1147 {
1148  bool abort_computation = false;
1149 
1150  while (database_it != database_it_end)
1151  {
1152  compute_state state{};
1153  size_t const block_offset = vp.size() * seqan3::to_rank((query_alphabet_type) *database_it);
1154 
1155  size_t block_count = vp.size();
1156  if constexpr(use_max_errors)
1157  block_count = this->last_block + 1;
1158 
1159  // compute each block in the current column; carries between blocks will be propagated.
1160  for (size_t current_block = 0u; current_block < block_count; current_block++)
1161  compute_kernel<true>(state, block_offset, current_block);
1162 
1163  advance_score(state.hp, state.hn, score_mask);
1164 
1165  // semi-global without max_errors guarantees that the score stays within the last row
1166  if constexpr(is_semi_global && !use_max_errors)
1167  this->update_best_score();
1168 
1169  if constexpr(use_max_errors)
1170  {
1171  // if the last active cell reached the end within the current block we have to compute the next block.
1172  bool additional_block = score_mask >> (word_size - 1u);
1173  bool reached_last_block = this->last_block + 1u == vp.size();
1174  // If there is no next block we skip the computation.
1175  if (reached_last_block)
1176  additional_block = false;
1177 
1178  if (additional_block)
1179  {
1180  size_t const current_block = this->last_block + 1u;
1181  // this might not be necessary, but carry_d0 = 1u might have an influence on the result of vn and vp.
1182  vp[current_block] = vp0;
1183  vn[current_block] = vn0;
1184  compute_kernel<false>(state, block_offset, current_block);
1185  }
1186 
1187  // updating the last active cell
1188  abort_computation = this->update_last_active_cell();
1189  }
1190 
1191  add_state();
1192  ++database_it;
1193 
1194  if (abort_computation)
1195  return true;
1196  }
1197 
1198  return false;
1199 }
1200 
1207 template <typename database_t, typename query_t, typename config_t, typename traits_t>
1208 edit_distance_unbanded(database_t && database, query_t && query, config_t config, traits_t)
1209  -> edit_distance_unbanded<database_t, query_t, config_t, traits_t>;
1211 
1212 } // namespace seqan3::detail
Provides seqan3::detail::advanceable_alignment_coordinate.
Provides seqan3::alignment_result.
T begin(T... args)
Provides seqan3::configuration and utility functions.
Forwards for seqan3::edit_distance_unbanded related types.
Provides seqan3::detail::edit_distance_score_matrix_full.
Provides seqan3::detail::edit_distance_trace_matrix_full.
T end(T... args)
T forward(T... args)
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
@ alignment
The (pairwise) alignment stored in an object that models seqan3::detail::pairwise_alignment.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:74
Provides seqan3::detail::matrix.
T min(T... args)
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:429
SeqAn specific customisations in the standard namespace.
Adaptations of concepts from the Ranges TS.