SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
simd_matrix_scoring_scheme.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <seqan3/std/concepts>
16 
23 
24 namespace seqan3::detail
25 {
26 
56 template <simd_concept simd_score_t, semialphabet alphabet_t, typename alignment_t>
58  requires (std::same_as<alignment_t, align_cfg::method_local> || std::same_as<alignment_t, align_cfg::method_global>)
60 class simd_matrix_scoring_scheme
61 {
62 private:
64  using scalar_type = typename simd_traits<simd_score_t>::scalar_type;
66  using simd_score_profile_type = simd_score_t;
68  using simd_alphabet_ranks_type = simd_score_t;
69 
70  static_assert(seqan3::alphabet_size<alphabet_t> <= std::numeric_limits<scalar_type>::max(),
71  "The selected simd scalar type is not large enough to represent the given alphabet including an "
72  "additional padding symbol!");
73 
74  static_assert(seqan3::alphabet_size<alphabet_t> < std::numeric_limits<scalar_type>::max(),
75  "The selected simd scalar type is not large enough to represent the given alphabet including an "
76  "additional padding symbol!");
77 
79  static constexpr bool is_global = std::same_as<alignment_t, align_cfg::method_global>;
81  static constexpr size_t index_offset = seqan3::alphabet_size<alphabet_t> + 1; // scheme is extended by one.
83  static constexpr scalar_type score_for_padding_symbol = (is_global) ? 1 : -1;
84 
86  std::vector<scalar_type> scoring_scheme_data{};
87 
88 public:
90  static constexpr scalar_type padding_symbol = static_cast<scalar_type>(seqan3::alphabet_size<alphabet_t>);
91 
95  constexpr simd_matrix_scoring_scheme() = default;
96  constexpr simd_matrix_scoring_scheme(simd_matrix_scoring_scheme const &) = default;
97  constexpr simd_matrix_scoring_scheme(simd_matrix_scoring_scheme &&) = default;
98  constexpr simd_matrix_scoring_scheme & operator=(simd_matrix_scoring_scheme const &) = default;
99  constexpr simd_matrix_scoring_scheme & operator=(simd_matrix_scoring_scheme &&) = default;
100  ~simd_matrix_scoring_scheme() = default;
101 
103  template <typename scoring_scheme_t>
104  constexpr explicit simd_matrix_scoring_scheme(scoring_scheme_t const & scoring_scheme)
105  {
106  initialise_from_scalar_scoring_scheme(scoring_scheme);
107  }
108 
110  template <typename scoring_scheme_t>
111  constexpr simd_matrix_scoring_scheme & operator=(scoring_scheme_t const & scoring_scheme)
112  {
113  initialise_from_scalar_scoring_scheme(scoring_scheme);
114  return *this;
115  }
117 
142  constexpr simd_score_t score(simd_score_profile_type const & score_profile,
143  simd_alphabet_ranks_type const & ranks) const noexcept
144  {
145  simd_score_t const matrix_index = score_profile + ranks; // Compute the matrix indices for the lookup.
146  simd_score_t result{};
147 
148  for (size_t idx = 0; idx < simd_traits<simd_score_t>::length; ++idx)
149  result[idx] = scoring_scheme_data.data()[matrix_index[idx]];
150 
151  return result;
152  }
154 
156  constexpr scalar_type padding_match_score() const noexcept
157  {
158  return score_for_padding_symbol;
159  }
160 
171  constexpr simd_score_profile_type make_score_profile(simd_alphabet_ranks_type const & ranks) const noexcept
172  {
173  return ranks * simd::fill<simd_score_t>(index_offset);
174  }
175 
176 private:
185  template <typename scoring_scheme_t>
189  constexpr void initialise_from_scalar_scoring_scheme(scoring_scheme_t const & scoring_scheme)
190  {
191  using score_t = decltype(std::declval<scoring_scheme_t const &>().score(alphabet_t{}, alphabet_t{}));
192 
193  // Helper function to check if the matrix score is in the value range representable by the selected simd vector.
194  [[maybe_unused]] auto check_score_range = [&] ([[maybe_unused]] score_t score)
195  {
196  // Note only if the size of the scalar type of the simd vector is smaller than the size of the score type
197  // of the original scoring scheme, the score might exceed the valid value range of the scalar type. In this
198  // case an exception will be thrown.
199  if constexpr (sizeof(scalar_type) < sizeof(score_t))
200  {
201  constexpr score_t max_score_value = static_cast<score_t>(std::numeric_limits<scalar_type>::max());
202  constexpr score_t min_score_value = static_cast<score_t>(std::numeric_limits<scalar_type>::lowest());
203 
204  if (score > max_score_value || score < min_score_value)
205  throw std::invalid_argument{"The selected scoring scheme score overflows "
206  "for the selected scalar type of the simd type."};
207  }
208  };
209 
210  // For the global alignment we extend the alphabet by one symbol to handle sequences with different size.
211  scoring_scheme_data.resize(index_offset * index_offset, score_for_padding_symbol);
212 
213  // Convert the scoring matrix into a linear vector to allow gather operations later on.
214  using alphabet_size_t = std::remove_const_t<decltype(seqan3::alphabet_size<alphabet_t>)>;
215  auto data_it = scoring_scheme_data.begin();
216  for (alphabet_size_t lhs_rank = 0; lhs_rank < seqan3::alphabet_size<alphabet_t>; ++lhs_rank)
217  {
218  for (alphabet_size_t rhs_rank = 0; rhs_rank < seqan3::alphabet_size<alphabet_t>; ++rhs_rank, ++data_it)
219  {
220  score_t tmp_score = scoring_scheme.score(seqan3::assign_rank_to(lhs_rank, alphabet_t{}),
221  seqan3::assign_rank_to(rhs_rank, alphabet_t{}));
222 
223  check_score_range(tmp_score);
224  *data_it = tmp_score;
225  }
226  ++data_it; // skip one for the padded symbol.
227  }
228  }
229 };
230 
231 } // namespace seqan3::detail
Provides algorithms to modify seqan3::simd::simd_type.
Provides global and local alignment configurations.
Core alphabet concept and free function/type trait wrappers.
Adaptions of concepts from the Cereal library.
The Concepts library.
constexpr auto assign_rank_to
Assign a rank to an alphabet object.
Definition: concept.hpp:291
A concept that requires that type be able to score two letters.
T lowest(T... args)
T max(T... args)
Provides seqan3::scoring_scheme_for.
Provides seqan3::simd::simd_concept.