SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
wuss.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cmath>
16 #include <vector>
17 
21 
22 // ------------------------------------------------------------------
23 // wuss
24 // ------------------------------------------------------------------
25 
26 namespace seqan3
27 {
28 
58 template <uint8_t SIZE = 51>
59 class wuss : public alphabet_base<wuss<SIZE>, SIZE>
60 {
61  static_assert(SIZE >= 15 && SIZE <= 67 && SIZE % 2 == 1,
62  "The wuss<> alphabet size must be an odd number in range 15..67.");
63 
64 private:
66  using base_t = alphabet_base<wuss<SIZE>, SIZE>;
67 
69  friend base_t;
70 
71 protected:
72  using typename base_t::rank_type;
73  using typename base_t::char_type;
74 
75 public:
77  using base_t::to_rank;
78  using base_t::to_char;
79 
83  constexpr wuss() noexcept = default;
84  constexpr wuss(wuss const &) noexcept = default;
85  constexpr wuss(wuss &&) noexcept = default;
86  constexpr wuss & operator=(wuss const &) noexcept = default;
87  constexpr wuss & operator=(wuss &&) noexcept = default;
88  ~wuss() noexcept = default;
89 
91 
100  constexpr bool is_pair_open() const noexcept
101  {
102  return interaction_tab[to_rank()] < 0;
103  }
104 
110  constexpr bool is_pair_close() const noexcept
111  {
112  return interaction_tab[to_rank()] > 0;
113  }
114 
120  constexpr bool is_unpaired() const noexcept
121  {
122  return interaction_tab[to_rank()] == 0;
123  }
124 
130  // formula: (alphabet size - 7 unpaired characters) / 2, as every bracket exists as opening/closing pair
131  static constexpr uint8_t max_pseudoknot_depth{static_cast<uint8_t>((alphabet_size - 7) / 2)};
132 
141  constexpr std::optional<uint8_t> pseudoknot_id() const noexcept
142  {
143  if (interaction_tab[to_rank()] != 0)
144  return std::abs(interaction_tab[to_rank()]) - 1;
145  else
146  return std::nullopt; // unpaired
147  }
149 
150 private:
152  static constexpr std::array<char_type, alphabet_size> rank_to_char_table
153  {
154  [] () constexpr
155  {
157  {
158  '.', ':', ',', '-', '_', '~', ';', '<', '(', '[', '{', '>', ')', ']', '}'
159  };
160 
161  // pseudoknot letters
162  for (rank_type rnk = 15u; rnk + 1u < alphabet_size; rnk += 2u)
163  {
164  char_type const off = static_cast<char_type>((rnk - 15u) / 2u);
165  chars[rnk] = 'A' + off;
166  chars[rnk + 1u] = 'a' + off;
167  }
168 
169  return chars;
170  } ()
171  };
172 
174  static constexpr std::array<rank_type, 256> char_to_rank_table
175  {
176  [] () constexpr
177  {
178  std::array<rank_type, 256> rank_table{};
179 
180  // initialize with unpaired (std::array::fill unfortunately not constexpr)
181  for (rank_type & rnk : rank_table)
182  rnk = 6u;
183 
184  // set alphabet values
185  for (rank_type rnk = 0u; rnk < alphabet_size; ++rnk)
186  rank_table[rank_to_char_table[rnk]] = rnk;
187  return rank_table;
188  } ()
189  };
190 
192  static constexpr char_type rank_to_char(rank_type const rank)
193  {
194  return rank_to_char_table[rank];
195  }
196 
198  static constexpr rank_type char_to_rank(char_type const chr)
199  {
200  using index_t = std::make_unsigned_t<char_type>;
201  return char_to_rank_table[static_cast<index_t>(chr)];
202  }
203 
207  static std::array<int8_t, SIZE> const interaction_tab;
208 };
209 
210 template <uint8_t SIZE>
211 constexpr std::array<int8_t, SIZE> wuss<SIZE>::interaction_tab = [] () constexpr
212 {
213  std::array<int8_t, alphabet_size> interaction_table{};
214  int cnt_open = 0;
215  int cnt_close = 0;
216 
217  for (rank_type rnk = 0u; rnk <= 6u; ++rnk)
218  {
219  interaction_table[rnk] = 0;
220  }
221 
222  for (rank_type rnk = 7u; rnk <= 10u; ++rnk)
223  {
224  interaction_table[rnk] = --cnt_open;
225  }
226 
227  for (rank_type rnk = 11u; rnk <= 14u; ++rnk)
228  {
229  interaction_table[rnk] = ++cnt_close;
230  }
231 
232  for (rank_type rnk = 15u; rnk + 1u < alphabet_size; rnk += 2u)
233  {
234  interaction_table[rnk] = --cnt_open;
235  interaction_table[rnk + 1u] = ++cnt_close;
236  }
237 
238  return interaction_table;
239 } ();
240 
244 using wuss51 = wuss<51>;
245 
246 inline namespace literals
247 {
248 
262 constexpr wuss51 operator""_wuss51(char const ch) noexcept
263 {
264  return wuss51{}.assign_char(ch);
265 }
266 
278 inline std::vector<wuss51> operator""_wuss51(const char * str, std::size_t len)
279 {
281  vec.resize(len);
282 
283  for (size_t idx = 0ul; idx < len; ++idx)
284  vec[idx].assign_char(str[idx]);
285 
286  return vec;
287 }
289 
290 } // inline namespace literals
291 
292 } // namespace seqan3
Provides seqan3::rna_structure_alphabet.
Provides seqan3::alphabet_base.
A CRTP-base that makes defining a custom alphabet easier.
Definition: alphabet_base.hpp:81
constexpr char_type to_char() const noexcept
Return the letter as a character of char_type.
Definition: alphabet_base.hpp:139
constexpr rank_type to_rank() const noexcept
Return the letter's numeric value (rank in the alphabet).
Definition: alphabet_base.hpp:185
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:104
constexpr derived_type & assign_char(char_type const chr) noexcept
Assign from a character, implicitly converts invalid characters.
Definition: alphabet_base.hpp:211
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:276
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition: alphabet_base.hpp:96
The WUSS structure alphabet of the characters .<>:,-_~;()[]{}AaBbCcDd...
Definition: wuss.hpp:60
static constexpr uint8_t max_pseudoknot_depth
The ability of this alphabet to represent pseudoknots, i.e. crossing interactions,...
Definition: wuss.hpp:131
constexpr bool is_pair_close() const noexcept
Check whether the character represents a leftward interaction in an RNA structure.
Definition: wuss.hpp:110
constexpr bool is_pair_open() const noexcept
Check whether the character represents a rightward interaction in an RNA structure.
Definition: wuss.hpp:100
constexpr std::optional< uint8_t > pseudoknot_id() const noexcept
Get an identifier for a pseudoknotted interaction, where opening and closing brackets of the same typ...
Definition: wuss.hpp:141
constexpr rank_type to_rank() const noexcept
Return the letter's numeric value (rank in the alphabet).
Definition: alphabet_base.hpp:185
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:104
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:276
constexpr wuss() noexcept=default
Defaulted.
constexpr bool is_unpaired() const noexcept
Check whether the character represents an unpaired position in an RNA structure.
Definition: wuss.hpp:120
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition: alphabet_base.hpp:96
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:858
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
T resize(T... args)
Provides utilities for modifying characters.