SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
csa_alphabet_strategy.hpp
Go to the documentation of this file.
1 // Copyright (c) 2018, the SDSL Project Authors. All rights reserved.
2 // Please see the AUTHORS file for details. Use of this source code is governed
3 // by a BSD license that can be found in the LICENSE file.
4 
5 // -----------------------------------------------------------------------------------------------------
6 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
7 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
8 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
9 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
10 // -----------------------------------------------------------------------------------------------------
11 
20 #pragma once
21 
22 #include <string>
23 
24 #include <sdsl/config.hpp>
25 #include <sdsl/int_vector.hpp>
26 #include <sdsl/rank_support.hpp>
27 #include <sdsl/sdsl_concepts.hpp>
28 #include <sdsl/select_support.hpp>
29 
30 #include <seqan3/core/platform.hpp>
31 
32 namespace sdsl
33 {
34 
36  // This is recommended when the underlying text uses the entire alphabet and not just a small subset.
38  {
40  public:
41  class mapping_wrapper;
42 
43  typedef int_vector<>::size_type size_type;
44  typedef mapping_wrapper char2comp_type;
45  typedef mapping_wrapper comp2char_type;
46  typedef int_vector<64> C_type;
47  typedef uint16_t sigma_type;
48  typedef uint8_t char_type;
49  typedef uint8_t comp_char_type;
50  typedef std::string string_type;
51  typedef byte_alphabet_tag alphabet_category;
52  enum { int_width = 8 };
53 
55  class mapping_wrapper
56  {
57  public:
58  mapping_wrapper() {}
59 
60  constexpr char_type operator[](char_type const c) const noexcept
61  {
62  return c;
63  }
64  };
65 
66  const char2comp_type char2comp;
67  const comp2char_type comp2char;
68  const C_type & C;
69  const sigma_type & sigma;
70 
71  private:
72  C_type m_C; // Cumulative counts for the compact alphabet [0..sigma].
73  sigma_type m_sigma; // Effective size of the alphabet.
74 
75  public:
77  plain_byte_alphabet() : C(m_C), sigma(m_sigma), m_sigma(0)
78  {}
79 
84  plain_byte_alphabet(int_vector_buffer<8> & text_buf, int_vector_size_type len) : C(m_C), sigma(m_sigma)
85  {
86  m_sigma = 0;
87  if (0 == len || 0 == text_buf.size())
88  return;
89 
90  assert(len <= text_buf.size());
91 
92  // initialize vectors
93  m_C = int_vector<64>(257, 0);
94  // count occurrences of each symbol
95  for (size_type i = 0; i < len; ++i)
96  ++m_C[text_buf[i]];
97 
98  assert(1 == m_C[0]); // null-byte should occur exactly once
99 
100  m_sigma = 255;
101  for (int i = 0; i < 256; ++i)
102  {
103  if (m_C[i])
104  {
105  m_sigma = i + 1;
106  // m_C[m_sigma] = m_C[i];
107  // ++m_sigma;
108  }
109  }
110  // m_C.resize(m_sigma + 1);
111  for (int i = (int) 256; i > 0; --i)
112  m_C[i] = m_C[i - 1];
113  m_C[0] = 0;
114  for (int i = 1; i <= (int) 256; ++i)
115  m_C[i] += m_C[i - 1];
116 
117  assert(C[sigma] == len);
118  }
119 
120  plain_byte_alphabet(plain_byte_alphabet const & strat) : C(m_C),
121  sigma(m_sigma),
122  m_C(strat.m_C),
123  m_sigma(strat.m_sigma)
124  {}
125 
126  plain_byte_alphabet(plain_byte_alphabet && strat) : C(m_C),
127  sigma(m_sigma),
128  m_C(std::move(strat.m_C)),
129  m_sigma(strat.m_sigma)
130  {}
131 
132  plain_byte_alphabet & operator=(plain_byte_alphabet const & strat)
133  {
134  if (this != &strat)
135  {
136  plain_byte_alphabet tmp(strat);
137  *this = std::move(tmp);
138  }
139  return *this;
140  }
141 
142  plain_byte_alphabet & operator=(plain_byte_alphabet && strat)
143  {
144  if (this != &strat)
145  {
146  m_C = std::move(strat.m_C);
147  m_sigma = std::move(strat.m_sigma);
148  }
149  return *this;
150  }
151 
152  size_type serialize(std::ostream & out, structure_tree_node * v, std::string name = "") const
153  {
154  structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this));
155  size_type written_bytes = 0;
156  written_bytes += m_C.serialize(out, child, "m_C");
157  written_bytes += write_member(m_sigma, out, child, "m_sigma");
158  structure_tree::add_size(child, written_bytes);
159  return written_bytes;
160  }
161 
162  void load(std::istream & in)
163  {
164  m_C.load(in);
165  read_member(m_sigma, in);
166  }
167 
168  template <typename archive_t>
169  void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const
170  {
171  ar(CEREAL_NVP(m_C));
172  ar(CEREAL_NVP(m_sigma));
173  }
174 
175  template <typename archive_t>
176  void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar)
177  {
178  ar(CEREAL_NVP(m_C));
179  ar(CEREAL_NVP(m_sigma));
180  }
181 
182  bool operator==(plain_byte_alphabet const & other) const noexcept
183  {
184  return (m_C == other.m_C) && (m_sigma == other.m_sigma);
185  }
186 
187  bool operator!=(plain_byte_alphabet const & other) const noexcept
188  {
189  return !(*this == other);
190  }
192  };
193 
194 }
std::string
seqan3::views::move
const auto move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
std::ostream
sdsl::plain_byte_alphabet
Byte alphabet that does no mapping of char_type to comp_char_type and vice versa.
Definition: csa_alphabet_strategy.hpp:37
platform.hpp
Provides platform and dependency checks.
std::istream
string