SeqAn3 3.1.0
The Modern C++ library for sequence analysis.
simd_matrix_scoring_scheme.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <seqan3/std/concepts>
16
23
24namespace seqan3::detail
25{
26
56template <simd_concept simd_score_t, semialphabet alphabet_t, typename alignment_t>
58 requires (std::same_as<alignment_t, align_cfg::method_local> || std::same_as<alignment_t, align_cfg::method_global>)
60class simd_matrix_scoring_scheme
61{
62private:
64 using scalar_type = typename simd_traits<simd_score_t>::scalar_type;
66 using simd_score_profile_type = simd_score_t;
68 using simd_alphabet_ranks_type = simd_score_t;
69
70 static_assert(seqan3::alphabet_size<alphabet_t> <= std::numeric_limits<scalar_type>::max(),
71 "The selected simd scalar type is not large enough to represent the given alphabet including an "
72 "additional padding symbol!");
73
74 static_assert(seqan3::alphabet_size<alphabet_t> < std::numeric_limits<scalar_type>::max(),
75 "The selected simd scalar type is not large enough to represent the given alphabet including an "
76 "additional padding symbol!");
77
79 static constexpr bool is_global = std::same_as<alignment_t, align_cfg::method_global>;
81 static constexpr size_t index_offset = seqan3::alphabet_size<alphabet_t> + 1; // scheme is extended by one.
83 static constexpr scalar_type score_for_padding_symbol = (is_global) ? 1 : -1;
84
86 std::vector<scalar_type> scoring_scheme_data{};
87
88public:
90 static constexpr scalar_type padding_symbol = static_cast<scalar_type>(seqan3::alphabet_size<alphabet_t>);
91
95 constexpr simd_matrix_scoring_scheme() = default;
96 constexpr simd_matrix_scoring_scheme(simd_matrix_scoring_scheme const &) = default;
97 constexpr simd_matrix_scoring_scheme(simd_matrix_scoring_scheme &&) = default;
98 constexpr simd_matrix_scoring_scheme & operator=(simd_matrix_scoring_scheme const &) = default;
99 constexpr simd_matrix_scoring_scheme & operator=(simd_matrix_scoring_scheme &&) = default;
100 ~simd_matrix_scoring_scheme() = default;
101
103 template <typename scoring_scheme_t>
104 constexpr explicit simd_matrix_scoring_scheme(scoring_scheme_t const & scoring_scheme)
105 {
106 initialise_from_scalar_scoring_scheme(scoring_scheme);
107 }
108
110 template <typename scoring_scheme_t>
111 constexpr simd_matrix_scoring_scheme & operator=(scoring_scheme_t const & scoring_scheme)
112 {
113 initialise_from_scalar_scoring_scheme(scoring_scheme);
114 return *this;
115 }
117
142 constexpr simd_score_t score(simd_score_profile_type const & score_profile,
143 simd_alphabet_ranks_type const & ranks) const noexcept
144 {
145 simd_score_t const matrix_index = score_profile + ranks; // Compute the matrix indices for the lookup.
146 simd_score_t result{};
147
148 for (size_t idx = 0; idx < simd_traits<simd_score_t>::length; ++idx)
149 result[idx] = scoring_scheme_data.data()[matrix_index[idx]];
150
151 return result;
152 }
154
156 constexpr scalar_type padding_match_score() const noexcept
157 {
158 return score_for_padding_symbol;
159 }
160
171 constexpr simd_score_profile_type make_score_profile(simd_alphabet_ranks_type const & ranks) const noexcept
172 {
173 return ranks * simd::fill<simd_score_t>(index_offset);
174 }
175
176private:
185 template <typename scoring_scheme_t>
189 constexpr void initialise_from_scalar_scoring_scheme(scoring_scheme_t const & scoring_scheme)
190 {
191 using score_t = decltype(std::declval<scoring_scheme_t const &>().score(alphabet_t{}, alphabet_t{}));
192
193 // Helper function to check if the matrix score is in the value range representable by the selected simd vector.
194 [[maybe_unused]] auto check_score_range = [&] ([[maybe_unused]] score_t score)
195 {
196 // Note only if the size of the scalar type of the simd vector is smaller than the size of the score type
197 // of the original scoring scheme, the score might exceed the valid value range of the scalar type. In this
198 // case an exception will be thrown.
199 if constexpr (sizeof(scalar_type) < sizeof(score_t))
200 {
201 constexpr score_t max_score_value = static_cast<score_t>(std::numeric_limits<scalar_type>::max());
202 constexpr score_t min_score_value = static_cast<score_t>(std::numeric_limits<scalar_type>::lowest());
203
204 if (score > max_score_value || score < min_score_value)
205 throw std::invalid_argument{"The selected scoring scheme score overflows "
206 "for the selected scalar type of the simd type."};
207 }
208 };
209
210 // For the global alignment we extend the alphabet by one symbol to handle sequences with different size.
211 scoring_scheme_data.resize(index_offset * index_offset, score_for_padding_symbol);
212
213 // Convert the scoring matrix into a linear vector to allow gather operations later on.
214 using alphabet_size_t = std::remove_const_t<decltype(seqan3::alphabet_size<alphabet_t>)>;
215 auto data_it = scoring_scheme_data.begin();
216 for (alphabet_size_t lhs_rank = 0; lhs_rank < seqan3::alphabet_size<alphabet_t>; ++lhs_rank)
217 {
218 for (alphabet_size_t rhs_rank = 0; rhs_rank < seqan3::alphabet_size<alphabet_t>; ++rhs_rank, ++data_it)
219 {
220 score_t tmp_score = scoring_scheme.score(seqan3::assign_rank_to(lhs_rank, alphabet_t{}),
221 seqan3::assign_rank_to(rhs_rank, alphabet_t{}));
222
223 check_score_range(tmp_score);
224 *data_it = tmp_score;
225 }
226 ++data_it; // skip one for the padded symbol.
227 }
228 }
229};
230
231} // namespace seqan3::detail
Provides algorithms to modify seqan3::simd::simd_type.
Provides global and local alignment configurations.
Core alphabet concept and free function/type trait wrappers.
Adaptions of concepts from the Cereal library.
The <concepts> header from C++20's standard library.
constexpr auto assign_rank_to
Assign a rank to an alphabet object.
Definition: concept.hpp:294
A concept that requires that type be able to score two letters.
T lowest(T... args)
T max(T... args)
Provides seqan3::scoring_scheme_for.
Provides seqan3::simd::simd_concept.