SeqAn3  3.0.2
The Modern C++ library for sequence analysis.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gz_ostream.hpp
1 // zipstream Library License:
2 // --------------------------
3 //
4 // The zlib/libpng License Copyright (c) 2003 Jonathan de Halleux.
5 //
6 // This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
7 //
8 // Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
9 //
10 // 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 //
12 // 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 //
14 // 3. This notice may not be removed or altered from any source distribution
15 //
16 // Altered zipstream library header
17 // Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003
18 // Author: David Weese <david.weese@fu-berlin.de>
19 // Author: Enrico Siragusa <enrico.siragusa@fu-berlin.de>
20 // Author: Hannes Hauswedell <hannes.hauswedell@fu-berlin.de>
21 
22 #pragma once
23 
24 #ifndef SEQAN3_HAS_ZLIB
25 #error "This file cannot be used when building without ZLIB-support."
26 #endif
27 
28 #include <iostream>
29 #include <cstring>
30 #include <vector>
31 
32 #include <zlib.h>
33 
34 namespace seqan3::contrib
35 {
36 
37 // Default gzip buffer size, change this to suite your needs.
38 const size_t GZ_OUTPUT_DEFAULT_BUFFER_SIZE = 921600;
39 
40 // --------------------------------------------------------------------------
41 // Enum EStrategy
42 // --------------------------------------------------------------------------
43 // Compression strategy, see zlib doc.
44 
45 enum EStrategy
46 {
47  StrategyFiltered = 1,
48  StrategyHuffmanOnly = 2,
49  DefaultStrategy = 0
50 };
51 
52 // --------------------------------------------------------------------------
53 // Class basic_gz_ostreambuf
54 // --------------------------------------------------------------------------
55 // A stream decorator that takes raw input and zips it to a ostream.
56 // The class wraps up the inflate method of the zlib library 1.1.4 https://www.zlib.net
57 
58 template <typename Elem,
59  typename Tr = std::char_traits<Elem>,
60  typename ElemA = std::allocator<Elem>,
61  typename ByteT = unsigned char,
62  typename ByteAT = std::allocator<ByteT>
63  >
64 class basic_gz_ostreambuf :
65  public std::basic_streambuf<Elem, Tr>
66 {
67 public:
68  typedef std::basic_ostream<Elem, Tr> & ostream_reference;
69  typedef ElemA char_allocator_type;
70  typedef ByteT byte_type;
71  typedef ByteAT byte_allocator_type;
72  typedef byte_type * byte_buffer_type;
73  typedef Tr traits_type;
74  typedef typename Tr::char_type char_type;
75  typedef typename Tr::int_type int_type;
76  typedef std::vector<byte_type, byte_allocator_type> byte_vector_type;
77  typedef std::vector<char_type, char_allocator_type> char_vector_type;
78 
79  // Construct a zip stream
80  // More info on the following parameters can be found in the zlib documentation.
81  basic_gz_ostreambuf(ostream_reference ostream_,
82  size_t level_,
83  EStrategy strategy_,
84  size_t window_size_,
85  size_t memory_level_,
86  size_t buffer_size_);
87 
88  ~basic_gz_ostreambuf();
89 
90  int sync();
91  int_type overflow(int_type c);
92 
93  // flushes the zip buffer and output buffer.
94  // This method should be called at the end of the compression.
95  // Calling flush multiple times, will lower the compression ratio.
97 
98  // flushes the zip buffer and output buffer and finalize the zip stream
99  // This method should be called at the end of the compression.
100  std::streamsize flush_finalize();
101 
102 
103 private:
104  bool zip_to_stream(char_type *, std::streamsize);
105  size_t fill_input_buffer();
106  // flush the zip buffer using a particular mode and flush output buffer
107  std::streamsize flush(int flush_mode);
108 
109  ostream_reference m_ostream;
110  z_stream m_zip_stream;
111  int m_err;
112  byte_vector_type m_output_buffer;
113  char_vector_type m_buffer;
114 };
115 
116 // --------------------------------------------------------------------------
117 // Class basic_gz_ostreambuf implementation
118 // --------------------------------------------------------------------------
119 
120 template <typename Elem,
121  typename Tr,
122  typename ElemA,
123  typename ByteT,
124  typename ByteAT>
125 basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::basic_gz_ostreambuf(
126  ostream_reference ostream_,
127  size_t level_,
128  EStrategy strategy_,
129  size_t window_size_,
130  size_t memory_level_,
131  size_t buffer_size_
132  ) :
133  m_ostream(ostream_),
134  m_output_buffer(buffer_size_, 0),
135  m_buffer(buffer_size_, 0)
136 {
137  m_zip_stream.zalloc = (alloc_func)0;
138  m_zip_stream.zfree = (free_func)0;
139 
140  m_zip_stream.next_in = NULL;
141  m_zip_stream.avail_in = 0;
142  m_zip_stream.avail_out = 0;
143  m_zip_stream.next_out = NULL;
144 
145  m_err = deflateInit2(
146  &m_zip_stream,
147  std::min(9, static_cast<int>(level_)),
148  Z_DEFLATED,
149  static_cast<int>(window_size_),
150  std::min(9, static_cast<int>(memory_level_)),
151  static_cast<int>(strategy_)
152  );
153 
154  this->setp(&(m_buffer[0]), &(m_buffer[m_buffer.size() - 1]));
155 }
156 
157 template <typename Elem,
158  typename Tr,
159  typename ElemA,
160  typename ByteT,
161  typename ByteAT>
162 basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::~basic_gz_ostreambuf()
163 {
164  flush_finalize();
165  m_ostream.flush();
166  m_err = deflateEnd(&m_zip_stream);
167 }
168 
169 template <typename Elem,
170  typename Tr,
171  typename ElemA,
172  typename ByteT,
173  typename ByteAT>
174 int basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::sync()
175 {
176  if (this->pptr() && this->pptr() > this->pbase())
177  {
178  if (traits_type::eq_int_type(overflow(traits_type::eof()), traits_type::eof()))
179  return -1;
180  }
181 
182  return 0;
183 }
184 
185 template <typename Elem,
186  typename Tr,
187  typename ElemA,
188  typename ByteT,
189  typename ByteAT>
190 typename basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::int_type
191 basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::overflow(
192  typename basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::int_type c)
193 {
194  int w = static_cast<int>(this->pptr() - this->pbase());
195 
196  if (!traits_type::eq_int_type(c, traits_type::eof()))
197  {
198  *this->pptr() = c;
199  ++w;
200  }
201 
202  if (zip_to_stream(this->pbase(), w))
203  {
204  this->setp(this->pbase(), this->epptr() - 1);
205  return c;
206  }
207  else
208  {
209  return traits_type::eof();
210  }
211 }
212 
213 template <typename Elem,
214  typename Tr,
215  typename ElemA,
216  typename ByteT,
217  typename ByteAT>
218 bool basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::zip_to_stream(
219  typename basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::char_type * buffer_,
220  std::streamsize buffer_size_)
221 {
222  std::streamsize written_byte_size = 0, total_written_byte_size = 0;
223 
224  m_zip_stream.next_in = (byte_buffer_type)buffer_;
225  m_zip_stream.avail_in = static_cast<uInt>(buffer_size_ * sizeof(char_type));
226  m_zip_stream.avail_out = static_cast<uInt>(m_output_buffer.size());
227  m_zip_stream.next_out = &(m_output_buffer[0]);
228  size_t remainder = 0;
229 
230  do
231  {
232  m_err = deflate(&m_zip_stream, 0);
233 
234  if (m_err == Z_OK || m_err == Z_STREAM_END)
235  {
236  written_byte_size = static_cast<std::streamsize>(m_output_buffer.size()) - m_zip_stream.avail_out;
237  total_written_byte_size += written_byte_size;
238 
239  // ouput buffer is full, dumping to ostream
240  m_ostream.write((const char_type *) &(m_output_buffer[0]),
241  static_cast<std::streamsize>(written_byte_size / sizeof(char_type)));
242 
243  // checking if some bytes were not written.
244  if ((remainder = written_byte_size % sizeof(char_type)) != 0)
245  {
246  // copy to the beginning of the stream
247  std::memmove(&(m_output_buffer[0]),
248  &(m_output_buffer[written_byte_size - remainder]),
249  remainder);
250  }
251 
252  m_zip_stream.avail_out = static_cast<uInt>(m_output_buffer.size() - remainder);
253  m_zip_stream.next_out = &m_output_buffer[remainder];
254  }
255  }
256  while (m_zip_stream.avail_in != 0 && m_err == Z_OK);
257 
258  return m_err == Z_OK;
259 }
260 
261 template <typename Elem,
262  typename Tr,
263  typename ElemA,
264  typename ByteT,
265  typename ByteAT>
266 std::streamsize basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::flush(int flush_mode)
267 {
268  int const buffer_size = static_cast<int>(this->pptr() - this->pbase()); // amount of data currently in buffer
269 
270  std::streamsize written_byte_size = 0, total_written_byte_size = 0;
271 
272  m_zip_stream.next_in = (byte_buffer_type) this->pbase();
273  m_zip_stream.avail_in = static_cast<uInt>(buffer_size * sizeof(char_type));
274  m_zip_stream.avail_out = static_cast<uInt>(m_output_buffer.size());
275  m_zip_stream.next_out = &(m_output_buffer[0]);
276  size_t remainder = 0;
277 
278  do
279  {
280  m_err = deflate(&m_zip_stream, flush_mode);
281  if (m_err == Z_OK || m_err == Z_STREAM_END)
282  {
283  written_byte_size = static_cast<std::streamsize>(m_output_buffer.size()) - m_zip_stream.avail_out;
284  total_written_byte_size += written_byte_size;
285 
286  // ouput buffer is full, dumping to ostream
287  m_ostream.write((const char_type *) &(m_output_buffer[0]),
288  static_cast<std::streamsize>(written_byte_size / sizeof(char_type) * sizeof(byte_type)));
289 
290  // checking if some bytes were not written.
291  if ((remainder = written_byte_size % sizeof(char_type)) != 0)
292  {
293  // copy to the beginning of the stream
294  std::memmove(&(m_output_buffer[0]),
295  &(m_output_buffer[written_byte_size - remainder]),
296  remainder);
297  }
298 
299  m_zip_stream.avail_out = static_cast<uInt>(m_output_buffer.size() - remainder);
300  m_zip_stream.next_out = &m_output_buffer[remainder];
301  }
302  }
303  while (m_err == Z_OK);
304 
305  m_ostream.flush();
306 
307  return total_written_byte_size;
308 }
309 
310 template <typename Elem,
311  typename Tr,
312  typename ElemA,
313  typename ByteT,
314  typename ByteAT>
315 std::streamsize basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::flush()
316 {
317  return flush(Z_SYNC_FLUSH);
318 }
319 
320 template <typename Elem,
321  typename Tr,
322  typename ElemA,
323  typename ByteT,
324  typename ByteAT>
325 std::streamsize basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::flush_finalize()
326 {
327  return flush(Z_FINISH);
328 }
329 
330 // --------------------------------------------------------------------------
331 // Class basic_gz_ostreambase
332 // --------------------------------------------------------------------------
333 // Base class for zip ostreams.
334 // Contains a basic_gz_ostreambuf.
335 
336 template <typename Elem,
337  typename Tr = std::char_traits<Elem>,
338  typename ElemA = std::allocator<Elem>,
339  typename ByteT = unsigned char,
340  typename ByteAT = std::allocator<ByteT>
341  >
342 class basic_gz_ostreambase :
343  virtual public std::basic_ios<Elem, Tr>
344 {
345 public:
346  typedef std::basic_ostream<Elem, Tr> & ostream_reference;
347  typedef basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT> zip_streambuf_type;
348 
349  // Construct a zip stream
350  // More info on the following parameters can be found in the zlib documentation.
351  basic_gz_ostreambase(ostream_reference ostream_,
352  size_t level_,
353  EStrategy strategy_,
354  size_t window_size_,
355  size_t memory_level_,
356  size_t buffer_size_) :
357  m_buf(ostream_, level_, strategy_, window_size_, memory_level_, buffer_size_)
358  {
359  this->init(&m_buf);
360  }
361 
362  // returns the underlying zip ostream object
363  zip_streambuf_type * rdbuf() { return &m_buf; }
364 
365 private:
366  zip_streambuf_type m_buf;
367 };
368 
369 // --------------------------------------------------------------------------
370 // Class basic_gz_ostream
371 // --------------------------------------------------------------------------
372 // A zipper ostream
373 //
374 // This class is a ostream decorator that behaves 'almost' like any other ostream.
375 // At construction, it takes any ostream that shall be used to output of the compressed data.
376 // When finished, you need to call the special method zflush or call the destructor
377 // to flush all the intermidiate streams.
378 //
379 // Example:
380 //
381 // // creating the target zip string, could be a fstream
382 // ostringstream ostringstream_;
383 // // creating the zip layer
384 // zip_ostream zipper(ostringstream_);
385 // // writing data
386 // zipper<<f<<" "<<d<<" "<<ui<<" "<<ul<<" "<<us<<" "<<c<<" "<<dum;
387 // // zip ostream needs special flushing...
388 // zipper.zflush();
389 
390 template <typename Elem,
391  typename Tr = std::char_traits<Elem>,
392  typename ElemA = std::allocator<Elem>,
393  typename ByteT = unsigned char,
394  typename ByteAT = std::allocator<ByteT>
395  >
396 class basic_gz_ostream :
397  public basic_gz_ostreambase<Elem, Tr, ElemA, ByteT, ByteAT>,
398  public std::basic_ostream<Elem, Tr>
399 {
400 public:
401  typedef basic_gz_ostreambase<Elem, Tr, ElemA, ByteT, ByteAT> zip_ostreambase_type;
402  typedef std::basic_ostream<Elem, Tr> ostream_type;
403  typedef ostream_type & ostream_reference;
404 
405  // Constructs a zipper ostream decorator
406  //
407  // ostream_ ostream where the compressed output is written
408  // is_gzip_ true if gzip header and footer have to be added
409  // level_ level of compression 0, bad and fast, 9, good and slower,
410  // strategy_ compression strategy
411  // window_size_ see zlib doc
412  // memory_level_ see zlib doc
413  // buffer_size_ the buffer size used to zip data
414 
415  basic_gz_ostream(ostream_reference ostream_,
416  size_t level_ = Z_DEFAULT_COMPRESSION,
417  EStrategy strategy_ = DefaultStrategy,
418  size_t window_size_ = 31, // 15 (size) + 16 (gzip header)
419  size_t memory_level_ = 8,
420  size_t buffer_size_ = GZ_OUTPUT_DEFAULT_BUFFER_SIZE) :
421  zip_ostreambase_type(ostream_, level_, strategy_, window_size_, memory_level_, buffer_size_),
422  ostream_type(this->rdbuf())
423  {}
424 
425  ~basic_gz_ostream()
426  {
427  ostream_type::flush(); this->rdbuf()->flush_finalize();
428  }
429 
430  // flush inner buffer and zipper buffer
431  basic_gz_ostream<Elem, Tr> & flush()
432  {
433  ostream_type::flush(); this->rdbuf()->flush(); return *this;
434  }
435 
436 #ifdef _WIN32
437 private:
438  void _Add_vtordisp1() {} // Required to avoid VC++ warning C4250
439  void _Add_vtordisp2() {} // Required to avoid VC++ warning C4250
440 #endif
441 };
442 
443 // ===========================================================================
444 // Typedefs
445 // ===========================================================================
446 
447 // A typedef for basic_gz_ostream<char>
448 typedef basic_gz_ostream<char> gz_ostream;
449 // A typedef for basic_gz_ostream<wchar_t>
450 typedef basic_gz_ostream<wchar_t> gz_wostream;
451 
452 } // namespace seqan3::contrib
std::basic_ios::rdbuf
T rdbuf(T... args)
cstring
output_stream_over::char_type
typename stream::char_type char_type
Declares the associated char type.
vector
std::remainder
T remainder(T... args)
std::basic_streambuf
iostream
std::basic_streambuf< Elem, std::char_traits< Elem > >::sync
T sync(T... args)
std::char_traits
std::streamsize
std::basic_ostream
std::flush
T flush(T... args)
std::basic_ios::init
T init(T... args)
std::min
T min(T... args)
std::basic_streambuf< Elem, std::char_traits< Elem > >::overflow
T overflow(T... args)
std::allocator
std::memmove
T memmove(T... args)
std::basic_ios