SeqAn3 3.2.0
The Modern C++ library for sequence analysis.
wuss.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <cmath>
16#include <vector>
17
21
22// ------------------------------------------------------------------
23// wuss
24// ------------------------------------------------------------------
25
26namespace seqan3
27{
28
58template <uint8_t SIZE = 51>
59class wuss : public alphabet_base<wuss<SIZE>, SIZE>
60{
61 static_assert(SIZE >= 15 && SIZE <= 67 && SIZE % 2 == 1,
62 "The wuss<> alphabet size must be an odd number in range 15..67.");
63
64private:
66 using base_t = alphabet_base<wuss<SIZE>, SIZE>;
67
69 friend base_t;
70
71protected:
72 using typename base_t::char_type;
73 using typename base_t::rank_type;
74
75public:
77 using base_t::to_char;
78 using base_t::to_rank;
79
83 constexpr wuss() noexcept = default;
84 constexpr wuss(wuss const &) noexcept = default;
85 constexpr wuss(wuss &&) noexcept = default;
86 constexpr wuss & operator=(wuss const &) noexcept = default;
87 constexpr wuss & operator=(wuss &&) noexcept = default;
88 ~wuss() noexcept = default;
89
91
100 constexpr bool is_pair_open() const noexcept
101 {
102 return interaction_tab[to_rank()] < 0;
103 }
104
110 constexpr bool is_pair_close() const noexcept
111 {
112 return interaction_tab[to_rank()] > 0;
113 }
114
120 constexpr bool is_unpaired() const noexcept
121 {
122 return interaction_tab[to_rank()] == 0;
123 }
124
130 // formula: (alphabet size - 7 unpaired characters) / 2, as every bracket exists as opening/closing pair
131 static constexpr uint8_t max_pseudoknot_depth{static_cast<uint8_t>((alphabet_size - 7) / 2)};
132
141 constexpr std::optional<uint8_t> pseudoknot_id() const noexcept
142 {
143 if (interaction_tab[to_rank()] != 0)
144 return std::abs(interaction_tab[to_rank()]) - 1;
145 else
146 return std::nullopt; // unpaired
147 }
149
150private:
152 static constexpr std::array<char_type, alphabet_size> rank_to_char_table{
154 chars{'.', ':', ',', '-', '_', '~', ';', '<', '(', '[', '{', '>', ')', ']', '}'};
155
156 // pseudoknot letters
157 for (rank_type rnk = 15u; rnk + 1u < alphabet_size; rnk += 2u)
158 {
159 char_type const off = static_cast<char_type>((rnk - 15u) / 2u);
160 chars[rnk] = 'A' + off;
161 chars[rnk + 1u] = 'a' + off;
162 }
163
164 return chars;
165}()
166}; // namespace seqan3
167
169static constexpr std::array<rank_type, 256> char_to_rank_table{[]() constexpr {std::array<rank_type, 256> rank_table{};
170
171// initialize with unpaired (std::array::fill unfortunately not constexpr)
172for (rank_type & rnk : rank_table)
173 rnk = 6u;
174
175// set alphabet values
176for (rank_type rnk = 0u; rnk < alphabet_size; ++rnk)
177 rank_table[rank_to_char_table[rnk]] = rnk;
178return rank_table;
179}
180()
181}
182;
183
185static constexpr char_type rank_to_char(rank_type const rank)
186{
187 return rank_to_char_table[rank];
188}
189
191static constexpr rank_type char_to_rank(char_type const chr)
192{
193 using index_t = std::make_unsigned_t<char_type>;
194 return char_to_rank_table[static_cast<index_t>(chr)];
195}
196
200static std::array<int8_t, SIZE> const interaction_tab;
201}
202;
203
204template <uint8_t SIZE>
205constexpr std::array<int8_t, SIZE> wuss<SIZE>::interaction_tab = []() constexpr
206{
207 std::array<int8_t, alphabet_size> interaction_table{};
208 int cnt_open = 0;
209 int cnt_close = 0;
210
211 for (rank_type rnk = 0u; rnk <= 6u; ++rnk)
212 {
213 interaction_table[rnk] = 0;
214 }
215
216 for (rank_type rnk = 7u; rnk <= 10u; ++rnk)
217 {
218 interaction_table[rnk] = --cnt_open;
219 }
220
221 for (rank_type rnk = 11u; rnk <= 14u; ++rnk)
222 {
223 interaction_table[rnk] = ++cnt_close;
224 }
225
226 for (rank_type rnk = 15u; rnk + 1u < alphabet_size; rnk += 2u)
227 {
228 interaction_table[rnk] = --cnt_open;
229 interaction_table[rnk + 1u] = ++cnt_close;
230 }
231
232 return interaction_table;
233}
234();
235
240
241inline namespace literals
242{
243
257constexpr wuss51 operator""_wuss51(char const ch) noexcept
258{
259 return wuss51{}.assign_char(ch);
260}
261
273inline std::vector<wuss51> operator""_wuss51(char const * str, std::size_t len)
274{
276 vec.resize(len);
277
278 for (size_t idx = 0ul; idx < len; ++idx)
279 vec[idx].assign_char(str[idx]);
280
281 return vec;
282}
284
285} // namespace literals
286
287} // namespace seqan3
Provides seqan3::rna_structure_alphabet.
Provides seqan3::alphabet_base.
A CRTP-base that makes defining a custom alphabet easier.
Definition: alphabet_base.hpp:57
constexpr derived_type & assign_char(char_type const chr) noexcept
Assign from a character, implicitly converts invalid characters.
Definition: alphabet_base.hpp:163
constexpr rank_type to_rank() const noexcept
Return the letter's numeric value (rank in the alphabet).
Definition: alphabet_base.hpp:137
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:80
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:199
constexpr char_type to_char() const noexcept
Return the letter as a character of char_type.
Definition: alphabet_base.hpp:115
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition: alphabet_base.hpp:72
The WUSS structure alphabet of the characters .<>:,-_~;()[]{}AaBbCcDd...
Definition: wuss.hpp:60
static constexpr uint8_t max_pseudoknot_depth
The ability of this alphabet to represent pseudoknots, i.e. crossing interactions,...
Definition: wuss.hpp:131
constexpr bool is_pair_close() const noexcept
Check whether the character represents a leftward interaction in an RNA structure.
Definition: wuss.hpp:110
constexpr bool is_pair_open() const noexcept
Check whether the character represents a rightward interaction in an RNA structure.
Definition: wuss.hpp:100
constexpr std::optional< uint8_t > pseudoknot_id() const noexcept
Get an identifier for a pseudoknotted interaction, where opening and closing brackets of the same typ...
Definition: wuss.hpp:141
constexpr rank_type to_rank() const noexcept
Return the letter's numeric value (rank in the alphabet).
Definition: alphabet_base.hpp:137
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:80
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:199
constexpr wuss() noexcept=default
Defaulted.
constexpr bool is_unpaired() const noexcept
Check whether the character represents an unpaired position in an RNA structure.
Definition: wuss.hpp:120
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition: alphabet_base.hpp:72
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:849
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
T resize(T... args)
Provides utilities for modifying characters.