SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
aa20.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <vector>
13
17
18namespace seqan3
19{
20
60class aa20 : public aminoacid_base<aa20, 20>
61{
62private:
65
67 friend base_t;
70 friend base_t::base_t;
72
73public:
77 constexpr aa20() noexcept = default;
78 constexpr aa20(aa20 const &) noexcept = default;
79 constexpr aa20(aa20 &&) noexcept = default;
80 constexpr aa20 & operator=(aa20 const &) noexcept = default;
81 constexpr aa20 & operator=(aa20 &&) noexcept = default;
82 ~aa20() noexcept = default;
83
84 using base_t::base_t;
86
87private:
89 static constexpr char_type rank_to_char_table[alphabet_size]{'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
90 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'};
91
93 static constexpr char_type rank_to_char(rank_type const rank)
94 {
95 return rank_to_char_table[rank];
96 }
97
99 static constexpr rank_type char_to_rank(char_type const chr)
100 {
101 using index_t = std::make_unsigned_t<char_type>;
102 return char_to_rank_table[static_cast<index_t>(chr)];
103 }
104
106 static constexpr std::array<rank_type, 256> char_to_rank_table{
107 []() constexpr
108 {
110
111 // initialize with 'S' because that appears most frequently
112 ret.fill(15u);
113
114 // reverse mapping for characters and their lowercase
115 for (rank_type rnk = 0u; rnk < alphabet_size; ++rnk)
116 {
117 ret[static_cast<rank_type>(rank_to_char_table[rnk])] = rnk;
118 ret[static_cast<rank_type>(to_lower(rank_to_char_table[rnk]))] = rnk;
119 }
120
121 ret['B'] = ret['D'];
122 ret['b'] = ret['D']; // Convert b (either D/N) to D, since D occurs more frequently.
123 ret['J'] = ret['L'];
124 ret['j'] = ret['L']; // Convert j (either I/L) to L, since L occurs more frequently.
125 ret['O'] = ret['L'];
126 ret['o'] = ret['L']; // Convert Pyrrolysine to lysine.
127 ret['U'] = ret['C'];
128 ret['u'] = ret['C']; // Convert Selenocysteine to cysteine.
129 ret['X'] = ret['S'];
130 ret['x'] = ret['S']; // Convert unknown amino acids to serine.
131 ret['Z'] = ret['E'];
132 ret['z'] = ret['E']; // Convert z (either E/Q) to E, since E occurs more frequently.
133 ret['*'] = ret['W']; // The most common stop codon is UGA. This is most similar to a Tryptophan.
134 return ret;
135 }()};
136};
137
138// ------------------------------------------------------------------
139// containers
140// ------------------------------------------------------------------
141
148
149// ------------------------------------------------------------------
150// literals
151// ------------------------------------------------------------------
152inline namespace literals
153{
154
168constexpr aa20 operator""_aa20(char const c) noexcept
169{
170 return aa20{}.assign_char(c);
171}
172
184SEQAN3_WORKAROUND_LITERAL aa20_vector operator""_aa20(char const * const s, size_t const n)
185{
186 aa20_vector r;
187 r.resize(n);
188
189 for (size_t i = 0; i < n; ++i)
190 r[i].assign_char(s[i]);
191
192 return r;
193}
195
196} // namespace literals
197
198} // namespace seqan3
Provides seqan3::aminoacid_alphabet.
Provides seqan3::aminoacid_base.
The canonical amino acid alphabet.
Definition aa20.hpp:61
constexpr aa20() noexcept=default
Defaulted.
constexpr derived_type & assign_char(char_type const chr) noexcept
Assign from a character, implicitly converts invalid characters.
Definition alphabet_base.hpp:160
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition alphabet_base.hpp:77
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition alphabet_base.hpp:196
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition alphabet_base.hpp:69
A CRTP-base that refines seqan3::alphabet_base and is used by the amino acids.
Definition aminoacid_base.hpp:30
T fill(T... args)
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
constexpr char_type to_lower(char_type const c) noexcept
Converts 'A'-'Z' to 'a'-'z' respectively; other characters are returned as is.
Definition transform.hpp:74
#define SEQAN3_WORKAROUND_LITERAL
Our char literals returning std::vector should be constexpr if constexpr std::vector is supported.
Definition platform.hpp:269
T resize(T... args)
Provides utilities for modifying characters.
Hide me