SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
gz_istream.hpp
1 // zipstream Library License:
2 // --------------------------
3 //
4 // The zlib/libpng License Copyright (c) 2003 Jonathan de Halleux.
5 //
6 // This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
7 //
8 // Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
9 //
10 // 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 //
12 // 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 //
14 // 3. This notice may not be removed or altered from any source distribution
15 //
16 // Altered zipstream library header
17 // Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003
18 // Author: David Weese <david.weese@fu-berlin.de>
19 // Author: Enrico Siragusa <enrico.siragusa@fu-berlin.de>
20 // Author: Hannes Hauswedell <hannes.hauswedell@fu-berlin.de>
21 
22 #pragma once
23 
24 #include <iostream>
25 #include <cstring>
26 #include <vector>
27 
28 #ifndef SEQAN3_HAS_ZLIB
29 #error "This file cannot be used when building without ZLIB-support."
30 #endif
31 
32 #include <zlib.h>
33 
34 #include <seqan3/core/platform.hpp>
35 
36 namespace seqan3::contrib
37 {
38 
39 // Default gzip buffer size, change this to suite your needs.
40 const size_t GZ_INPUT_DEFAULT_BUFFER_SIZE = 921600;
41 
42 // --------------------------------------------------------------------------
43 // Class basic_gz_istreambuf
44 // --------------------------------------------------------------------------
45 // A stream decorator that takes compressed input and unzips it to a istream.
46 // The class wraps up the deflate method of the zlib library 1.1.4 https://www.zlib.net
47 
48 template <typename Elem,
49  typename Tr = std::char_traits<Elem>,
50  typename ElemA = std::allocator<Elem>,
51  typename ByteT = unsigned char,
52  typename ByteAT = std::allocator<ByteT>
53  >
54 class basic_gz_istreambuf :
55  public std::basic_streambuf<Elem, Tr>
56 {
57 public:
58  typedef std::basic_istream<Elem, Tr> & istream_reference;
59  typedef ElemA char_allocator_type;
60  typedef ByteT byte_type;
61  typedef ByteAT byte_allocator_type;
62  typedef byte_type * byte_buffer_type;
63  typedef Tr traits_type;
64  typedef typename Tr::char_type char_type;
65  typedef typename Tr::int_type int_type;
66  typedef std::vector<byte_type, byte_allocator_type> byte_vector_type;
67  typedef std::vector<char_type, char_allocator_type> char_vector_type;
68 
69  // Construct a unzip stream
70  // More info on the following parameters can be found in the zlib documentation.
71  basic_gz_istreambuf(istream_reference istream_,
72  size_t window_size_,
73  size_t read_buffer_size_,
74  size_t input_buffer_size_);
75 
76  ~basic_gz_istreambuf();
77 
78  int_type underflow();
79 
80  // returns the compressed input istream
81  istream_reference get_istream() { return m_istream; }
82  // returns the zlib stream structure
83  z_stream & get_zip_stream() { return m_zip_stream; }
84 
85 private:
86  void put_back_from_zip_stream();
87  std::streamsize unzip_from_stream(char_type *, std::streamsize);
88  size_t fill_input_buffer();
89 
90  istream_reference m_istream;
91  z_stream m_zip_stream;
92  int m_err;
93  byte_vector_type m_input_buffer;
94  char_vector_type m_buffer;
95 };
96 
97 // --------------------------------------------------------------------------
98 // Class basic_gz_istreambuf implementation
99 // --------------------------------------------------------------------------
100 
101 template <typename Elem,
102  typename Tr,
103  typename ElemA,
104  typename ByteT,
105  typename ByteAT>
106 basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::basic_gz_istreambuf(
107  istream_reference istream_,
108  size_t window_size_,
109  size_t read_buffer_size_,
110  size_t input_buffer_size_
111  ) :
112  m_istream(istream_),
113  m_input_buffer(input_buffer_size_),
114  m_buffer(read_buffer_size_)
115 {
116  // setting zalloc, zfree and opaque
117  m_zip_stream.zalloc = (alloc_func)0;
118  m_zip_stream.zfree = (free_func)0;
119 
120  m_zip_stream.next_in = NULL;
121  m_zip_stream.avail_in = 0;
122  m_zip_stream.avail_out = 0;
123  m_zip_stream.next_out = NULL;
124 
125  m_err = inflateInit2(&m_zip_stream, static_cast<int>(window_size_));
126 
127  this->setg(&(m_buffer[0]) + 4, // beginning of putback area
128  &(m_buffer[0]) + 4, // read position
129  &(m_buffer[0]) + 4); // end position
130 }
131 
132 template <typename Elem,
133  typename Tr,
134  typename ElemA,
135  typename ByteT,
136  typename ByteAT>
137 basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::~basic_gz_istreambuf()
138 {
139  inflateEnd(&m_zip_stream);
140 }
141 
142 template <typename Elem,
143  typename Tr,
144  typename ElemA,
145  typename ByteT,
146  typename ByteAT>
147 typename basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::int_type
148 basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::underflow()
149 {
150  if (this->gptr() && (this->gptr() < this->egptr()))
151  return *reinterpret_cast<unsigned char *>(this->gptr());
152 
153  int n_putback = static_cast<int>(this->gptr() - this->eback());
154  if (n_putback > 4)
155  n_putback = 4;
156 
157  std::memmove(&(m_buffer[0]) + (4 - n_putback), this->gptr() - n_putback, n_putback * sizeof(char_type));
158 
159  int num = unzip_from_stream(&(m_buffer[0]) + 4,
160  static_cast<std::streamsize>((m_buffer.size() - 4) * sizeof(char_type)));
161 
162  if (num <= 0) // ERROR or EOF
163  return traits_type::eof();
164 
165  // reset buffer pointers
166  this->setg(&(m_buffer[0]) + (4 - n_putback), // beginning of putback area
167  &(m_buffer[0]) + 4, // read position
168  &(m_buffer[0]) + 4 + num); // end of buffer
169 
170  // return next character
171  return *reinterpret_cast<unsigned char *>(this->gptr());
172 }
173 
174 template <typename Elem,
175  typename Tr,
176  typename ElemA,
177  typename ByteT,
178  typename ByteAT>
179 std::streamsize basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::unzip_from_stream(
180  char_type * buffer_,
181  std::streamsize buffer_size_)
182 {
183  m_zip_stream.next_out = (byte_buffer_type)buffer_;
184  m_zip_stream.avail_out = static_cast<uInt>(buffer_size_ * sizeof(char_type));
185  size_t count = m_zip_stream.avail_in;
186 
187  do
188  {
189  if (m_zip_stream.avail_in == 0)
190  count = fill_input_buffer();
191 
192  if (m_zip_stream.avail_in)
193  m_err = inflate(&m_zip_stream, Z_SYNC_FLUSH);
194 
195  if (m_err == Z_STREAM_END)
196  inflateReset(&m_zip_stream);
197  else if (m_err < 0)
198  break;
199  }
200  while (m_zip_stream.avail_out > 0 && count > 0);
201 
202  std::streamsize n_read = buffer_size_ - m_zip_stream.avail_out / sizeof(char_type);
203 
204  // check if it is the end
205  if (m_zip_stream.avail_out > 0 && m_err == Z_STREAM_END)
206  put_back_from_zip_stream();
207 
208  return n_read;
209 }
210 
211 template <typename Elem,
212  typename Tr,
213  typename ElemA,
214  typename ByteT,
215  typename ByteAT>
216 size_t basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::fill_input_buffer()
217 {
218  m_zip_stream.next_in = &(m_input_buffer[0]);
219  m_istream.read((char_type *)(&(m_input_buffer[0])),
220  static_cast<std::streamsize>(m_input_buffer.size() / sizeof(char_type)));
221  return m_zip_stream.avail_in = m_istream.gcount() * sizeof(char_type);
222 }
223 
224 template <typename Elem,
225  typename Tr,
226  typename ElemA,
227  typename ByteT,
228  typename ByteAT>
229 void basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::put_back_from_zip_stream()
230 {
231  if (m_zip_stream.avail_in == 0)
232  return;
233 
234  m_istream.clear(std::ios::goodbit);
235  m_istream.seekg(-static_cast<int>(m_zip_stream.avail_in), std::ios_base::cur);
236 
237  m_zip_stream.avail_in = 0;
238 }
239 
240 // --------------------------------------------------------------------------
241 // Class basic_gz_istreambase
242 // --------------------------------------------------------------------------
243 // Base class for unzip istreams
244 // Contains a basic_gz_istreambuf.
245 
246 template <typename Elem,
247  typename Tr = std::char_traits<Elem>,
248  typename ElemA = std::allocator<Elem>,
249  typename ByteT = unsigned char,
250  typename ByteAT = std::allocator<ByteT>
251  >
252 class basic_gz_istreambase :
253  virtual public std::basic_ios<Elem, Tr>
254 {
255 public:
256  typedef std::basic_istream<Elem, Tr> & istream_reference;
257  typedef basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT> unzip_streambuf_type;
258 
259  basic_gz_istreambase(istream_reference ostream_,
260  size_t window_size_,
261  size_t read_buffer_size_,
262  size_t input_buffer_size_) :
263  m_buf(ostream_, window_size_, read_buffer_size_, input_buffer_size_)
264  {
265  this->init(&m_buf);
266  }
267 
268  // returns the underlying unzip istream object
269  unzip_streambuf_type * rdbuf() { return &m_buf; }
270 
271 private:
272  unzip_streambuf_type m_buf;
273 };
274 
275 // --------------------------------------------------------------------------
276 // Class basic_gz_istream
277 // --------------------------------------------------------------------------
278 // A zipper istream
279 //
280 // This class is a istream decorator that behaves 'almost' like any other ostream.
281 // At construction, it takes any istream that shall be used to input of the compressed data.
282 //
283 // Simlpe example:
284 //
285 // // create a stream on zip string
286 // istringstream istringstream_( ostringstream_.str());
287 // // create unzipper istream
288 // zip_istream unzipper( istringstream_);
289 // // read and unzip
290 // unzipper>>f_r>>d_r>>ui_r>>ul_r>>us_r>>c_r>>dum_r;
291 
292 template <typename Elem,
293  typename Tr = std::char_traits<Elem>,
294  typename ElemA = std::allocator<Elem>,
295  typename ByteT = unsigned char,
296  typename ByteAT = std::allocator<ByteT>
297  >
298 class basic_gz_istream :
299  public basic_gz_istreambase<Elem, Tr, ElemA, ByteT, ByteAT>,
300  public std::basic_istream<Elem, Tr>
301 {
302 public:
303  typedef basic_gz_istreambase<Elem, Tr, ElemA, ByteT, ByteAT> zip_istreambase_type;
304  typedef std::basic_istream<Elem, Tr> istream_type;
305  typedef istream_type & istream_reference;
306  typedef ByteT byte_type;
307  typedef Tr traits_type;
308 
309  // Construct a unzipper stream
310  //
311  // istream_ input buffer
312  // window_size_
313  // read_buffer_size_
314  // input_buffer_size_
315 
316  basic_gz_istream(istream_reference istream_,
317  size_t window_size_ = 31, // 15 (size) + 16 (gzip header)
318  size_t read_buffer_size_ = GZ_INPUT_DEFAULT_BUFFER_SIZE,
319  size_t input_buffer_size_ = GZ_INPUT_DEFAULT_BUFFER_SIZE) :
320  zip_istreambase_type(istream_, window_size_, read_buffer_size_, input_buffer_size_),
321  istream_type(this->rdbuf())
322  {}
323 
324 #ifdef _WIN32
325 private:
326  void _Add_vtordisp1() {} // Required to avoid VC++ warning C4250
327  void _Add_vtordisp2() {} // Required to avoid VC++ warning C4250
328 #endif
329 };
330 
331 // ===========================================================================
332 // Typedefs
333 // ===========================================================================
334 
335 // A typedef for basic_gz_istream<char>
336 typedef basic_gz_istream<char> gz_istream;
337 // A typedef for basic_gz_istream<wchart>
338 typedef basic_gz_istream<wchar_t> gz_wistream;
339 
340 } // namespace seqan3::contrib
std::basic_ios::rdbuf
T rdbuf(T... args)
cstring
output_stream_over::char_type
typename stream::char_type char_type
Declares the associated char type.
vector
std::basic_streambuf
iostream
std::basic_streambuf< Elem, Tr >::underflow
T underflow(T... args)
std::char_traits
std::streamsize
std::basic_ios::init
T init(T... args)
platform.hpp
Provides platform and dependency checks.
std::allocator
std::memmove
T memmove(T... args)
std::basic_istream< Elem, Tr >
seqan3::pack_traits::count
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: traits.hpp:134
std::basic_ios