SeqAn3 3.2.0
The Modern C++ library for sequence analysis.
aa20.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <vector>
16
20
21namespace seqan3
22{
23
63class aa20 : public aminoacid_base<aa20, 20>
64{
65private:
68
70 friend base_t;
72 friend base_t::base_t;
74
75public:
79 constexpr aa20() noexcept = default;
80 constexpr aa20(aa20 const &) noexcept = default;
81 constexpr aa20(aa20 &&) noexcept = default;
82 constexpr aa20 & operator=(aa20 const &) noexcept = default;
83 constexpr aa20 & operator=(aa20 &&) noexcept = default;
84 ~aa20() noexcept = default;
85
86 using base_t::base_t;
88
89private:
91 static constexpr char_type rank_to_char_table[alphabet_size]{'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L',
92 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'};
93
95 static constexpr std::array<rank_type, 256> char_to_rank_table{[]() constexpr {std::array<rank_type, 256> ret{};
96
97 // initialize with UNKNOWN (std::array::fill unfortunately not constexpr)
98 for (auto & c : ret)
99 c = 15; // value of 'S', because that appears most frequently
100
101 // reverse mapping for characters and their lowercase
102 for (rank_type rnk = 0u; rnk < alphabet_size; ++rnk)
103 {
104 ret[static_cast<rank_type>(rank_to_char_table[rnk])] = rnk;
105 ret[static_cast<rank_type>(to_lower(rank_to_char_table[rnk]))] = rnk;
106 }
107
108 ret['B'] = ret['D'];
109 ret['b'] = ret['D']; // Convert b (either D/N) to D, since D occurs more frequently.
110 ret['J'] = ret['L'];
111 ret['j'] = ret['L']; // Convert j (either I/L) to L, since L occurs more frequently.
112 ret['O'] = ret['L'];
113 ret['o'] = ret['L']; // Convert Pyrrolysine to lysine.
114 ret['U'] = ret['C'];
115 ret['u'] = ret['C']; // Convert Selenocysteine to cysteine.
116 ret['X'] = ret['S'];
117 ret['x'] = ret['S']; // Convert unknown amino acids to serine.
118 ret['Z'] = ret['E'];
119 ret['z'] = ret['E']; // Convert z (either E/Q) to E, since E occurs more frequently.
120 ret['*'] = ret['W']; // The most common stop codon is UGA. This is most similar to a Tryptophan.
121 return ret;
122}()
123}; // namespace seqan3
124
126static constexpr char_type rank_to_char(rank_type const rank)
127{
128 return rank_to_char_table[rank];
129}
130
132static constexpr rank_type char_to_rank(char_type const chr)
133{
134 using index_t = std::make_unsigned_t<char_type>;
135 return char_to_rank_table[static_cast<index_t>(chr)];
136}
137}
138;
139
140// ------------------------------------------------------------------
141// containers
142// ------------------------------------------------------------------
143
150
151// ------------------------------------------------------------------
152// literals
153// ------------------------------------------------------------------
154inline namespace literals
155{
156
170constexpr aa20 operator""_aa20(char const c) noexcept
171{
172 return aa20{}.assign_char(c);
173}
174
186inline aa20_vector operator""_aa20(char const * const s, size_t const n)
187{
188 aa20_vector r;
189 r.resize(n);
190
191 for (size_t i = 0; i < n; ++i)
192 r[i].assign_char(s[i]);
193
194 return r;
195}
197
198} // namespace literals
199
200} // namespace seqan3
Provides seqan3::aminoacid_alphabet.
Provides seqan3::aminoacid_base.
The canonical amino acid alphabet..
Definition: aa20.hpp:64
constexpr aa20() noexcept=default
Defaulted.
constexpr derived_type & assign_char(char_type const chr) noexcept
Assign from a character, implicitly converts invalid characters.
Definition: alphabet_base.hpp:163
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:80
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:199
std::conditional_t< std::same_as< char, void >, char, char > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition: alphabet_base.hpp:72
A CRTP-base that refines seqan3::alphabet_base and is used by the amino acids.
Definition: aminoacid_base.hpp:32
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
constexpr char_type to_lower(char_type const c) noexcept
Converts 'A'-'Z' to 'a'-'z' respectively; other characters are returned as is.
Definition: transform.hpp:83
T resize(T... args)
Provides utilities for modifying characters.