SeqAn3  3.0.0
The Modern C++ library for sequence analysis.
csa_alphabet_strategy.hpp
Go to the documentation of this file.
1 // Copyright (c) 2018, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
4 
5 // -----------------------------------------------------------------------------------------------------
6 // Copyright (c) 2006-2019, Knut Reinert & Freie Universität Berlin
7 // Copyright (c) 2016-2019, Knut Reinert & MPI für molekulare Genetik
8 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
9 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
10 // -----------------------------------------------------------------------------------------------------
11 
20 #pragma once
21 
22 #include <string>
23 
24 #include <sdsl/config.hpp>
25 #include <sdsl/int_vector.hpp>
26 #include <sdsl/rank_support.hpp>
27 #include <sdsl/sdsl_concepts.hpp>
28 #include <sdsl/select_support.hpp>
29 
30 namespace sdsl
31 {
32 
34  // This is recommended when the underlying text uses the entire alphabet and not just a small subset.
36  {
38  public:
39  class mapping_wrapper;
40 
41  typedef int_vector<>::size_type size_type;
42  typedef mapping_wrapper char2comp_type;
43  typedef mapping_wrapper comp2char_type;
44  typedef int_vector<64> C_type;
45  typedef uint16_t sigma_type;
46  typedef uint8_t char_type;
47  typedef uint8_t comp_char_type;
48  typedef std::string string_type;
49  typedef byte_alphabet_tag alphabet_category;
50  enum { int_width = 8 };
51 
53  class mapping_wrapper
54  {
55  public:
56  mapping_wrapper() {}
57 
58  constexpr char_type operator[](char_type const c) const noexcept
59  {
60  return c;
61  }
62  };
63 
64  const char2comp_type char2comp;
65  const comp2char_type comp2char;
66  const C_type & C;
67  const sigma_type & sigma;
68 
69  private:
70  C_type m_C; // Cumulative counts for the compact alphabet [0..sigma].
71  sigma_type m_sigma; // Effective size of the alphabet.
72 
73  public:
75  plain_byte_alphabet() : C(m_C), sigma(m_sigma), m_sigma(0)
76  {}
77 
82  plain_byte_alphabet(int_vector_buffer<8> & text_buf, int_vector_size_type len) : C(m_C), sigma(m_sigma)
83  {
84  m_sigma = 0;
85  if (0 == len || 0 == text_buf.size())
86  return;
87 
88  assert(len <= text_buf.size());
89 
90  // initialize vectors
91  m_C = int_vector<64>(257, 0);
92  // count occurrences of each symbol
93  for (size_type i = 0; i < len; ++i)
94  ++m_C[text_buf[i]];
95 
96  assert(1 == m_C[0]); // null-byte should occur exactly once
97 
98  m_sigma = 255;
99  for (int i = 0; i < 256; ++i)
100  {
101  if (m_C[i])
102  {
103  m_sigma = i + 1;
104  // m_C[m_sigma] = m_C[i];
105  // ++m_sigma;
106  }
107  }
108  // m_C.resize(m_sigma + 1);
109  for (int i = (int) 256; i > 0; --i)
110  m_C[i] = m_C[i - 1];
111  m_C[0] = 0;
112  for (int i = 1; i <= (int) 256; ++i)
113  m_C[i] += m_C[i - 1];
114 
115  assert(C[sigma] == len);
116  }
117 
118  plain_byte_alphabet(plain_byte_alphabet const & strat) : C(m_C),
119  sigma(m_sigma),
120  m_C(strat.m_C),
121  m_sigma(strat.m_sigma)
122  {}
123 
124  plain_byte_alphabet(plain_byte_alphabet && strat) : C(m_C),
125  sigma(m_sigma),
126  m_C(std::move(strat.m_C)),
127  m_sigma(strat.m_sigma)
128  {}
129 
130  plain_byte_alphabet & operator=(plain_byte_alphabet const & strat)
131  {
132  if (this != &strat)
133  {
134  plain_byte_alphabet tmp(strat);
135  *this = std::move(tmp);
136  }
137  return *this;
138  }
139 
140  plain_byte_alphabet & operator=(plain_byte_alphabet && strat)
141  {
142  if (this != &strat)
143  {
144  m_C = std::move(strat.m_C);
145  m_sigma = std::move(strat.m_sigma);
146  }
147  return *this;
148  }
149 
150  size_type serialize(std::ostream & out, structure_tree_node * v, std::string name = "") const
151  {
152  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
153  size_type written_bytes = 0;
154  written_bytes += m_C.serialize(out, child, "m_C");
155  written_bytes += write_member(m_sigma, out, child, "m_sigma");
156  structure_tree::add_size(child, written_bytes);
157  return written_bytes;
158  }
159 
160  void load(std::istream & in)
161  {
162  m_C.load(in);
163  read_member(m_sigma, in);
164  }
165 
166  template <typename archive_t>
167  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
168  {
169  ar(CEREAL_NVP(m_C));
170  ar(CEREAL_NVP(m_sigma));
171  }
172 
173  template <typename archive_t>
174  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
175  {
176  ar(CEREAL_NVP(m_C));
177  ar(CEREAL_NVP(m_sigma));
178  }
179 
180  bool operator==(plain_byte_alphabet const & other) const noexcept
181  {
182  return (m_C == other.m_C) && (m_sigma == other.m_sigma);
183  }
184 
185  bool operator!=(plain_byte_alphabet const & other) const noexcept
186  {
187  return !(*this == other);
188  }
190  };
191 
192 }
Byte alphabet that does no mapping of char_type to comp_char_type and vice versa. ...
Definition: csa_alphabet_strategy.hpp:35
Definition: csa_alphabet_strategy.hpp:30