SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
aa20.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <vector>
13
17
18namespace seqan3
19{
20
60class aa20 : public aminoacid_base<aa20, 20>
61{
62private:
65
67 friend base_t;
70 friend base_t::base_t;
72
73public:
77 constexpr aa20() noexcept = default;
78 constexpr aa20(aa20 const &) noexcept = default;
79 constexpr aa20(aa20 &&) noexcept = default;
80 constexpr aa20 & operator=(aa20 const &) noexcept = default;
81 constexpr aa20 & operator=(aa20 &&) noexcept = default;
82 ~aa20() noexcept = default;
83
84 using base_t::base_t;
86
87private:
89 static constexpr char_type rank_to_char_table[alphabet_size]{'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
90 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'};
91
93 static constexpr char_type rank_to_char(rank_type const rank)
94 {
95 return rank_to_char_table[rank];
96 }
97
99 static constexpr rank_type char_to_rank(char_type const chr)
100 {
101 using index_t = std::make_unsigned_t<char_type>;
102 return char_to_rank_table[static_cast<index_t>(chr)];
103 }
104
105 // clang-format off
107 static constexpr std::array<rank_type, 256> char_to_rank_table
108 {
109 []() constexpr {
111
112 // initialize with 'S' because that appears most frequently
113 ret.fill(15u);
114
115 // reverse mapping for characters and their lowercase
116 for (rank_type rnk = 0u; rnk < alphabet_size; ++rnk)
117 {
118 ret[static_cast<rank_type>(rank_to_char_table[rnk])] = rnk;
119 ret[static_cast<rank_type>(to_lower(rank_to_char_table[rnk]))] = rnk;
120 }
121
122 ret['B'] = ret['D'];
123 ret['b'] = ret['D']; // Convert b (either D/N) to D, since D occurs more frequently.
124 ret['J'] = ret['L'];
125 ret['j'] = ret['L']; // Convert j (either I/L) to L, since L occurs more frequently.
126 ret['O'] = ret['L'];
127 ret['o'] = ret['L']; // Convert Pyrrolysine to lysine.
128 ret['U'] = ret['C'];
129 ret['u'] = ret['C']; // Convert Selenocysteine to cysteine.
130 ret['X'] = ret['S'];
131 ret['x'] = ret['S']; // Convert unknown amino acids to serine.
132 ret['Z'] = ret['E'];
133 ret['z'] = ret['E']; // Convert z (either E/Q) to E, since E occurs more frequently.
134 ret['*'] = ret['W']; // The most common stop codon is UGA. This is most similar to a Tryptophan.
135 return ret;
136 }()
137 };
138};
139// clang-format on
140
141// ------------------------------------------------------------------
142// containers
143// ------------------------------------------------------------------
144
151
152// ------------------------------------------------------------------
153// literals
154// ------------------------------------------------------------------
155inline namespace literals
156{
157
171constexpr aa20 operator""_aa20(char const c) noexcept
172{
173 return aa20{}.assign_char(c);
174}
175
187SEQAN3_WORKAROUND_LITERAL aa20_vector operator""_aa20(char const * const s, size_t const n)
188{
189 aa20_vector r;
190 r.resize(n);
191
192 for (size_t i = 0; i < n; ++i)
193 r[i].assign_char(s[i]);
194
195 return r;
196}
198
199} // namespace literals
200
201} // namespace seqan3
Provides seqan3::aminoacid_alphabet.
Provides seqan3::aminoacid_base.
The canonical amino acid alphabet.
Definition aa20.hpp:61
constexpr aa20() noexcept=default
Defaulted.
constexpr derived_type & assign_char(char_type const chr) noexcept
Assign from a character, implicitly converts invalid characters.
Definition alphabet_base.hpp:160
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition alphabet_base.hpp:77
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition alphabet_base.hpp:196
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition alphabet_base.hpp:69
A CRTP-base that refines seqan3::alphabet_base and is used by the amino acids.
Definition aminoacid_base.hpp:29
T fill(T... args)
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
constexpr char_type to_lower(char_type const c) noexcept
Converts 'A'-'Z' to 'a'-'z' respectively; other characters are returned as is.
Definition transform.hpp:80
#define SEQAN3_WORKAROUND_LITERAL
Our char literals returning std::vector should be constexpr if constexpr std::vector is supported.
Definition platform.hpp:269
T resize(T... args)
Provides utilities for modifying characters.
Hide me