SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
gz_ostream.hpp
1 // zipstream Library License:
2 // --------------------------
3 //
4 // The zlib/libpng License Copyright (c) 2003 Jonathan de Halleux.
5 //
6 // This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
7 //
8 // Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
9 //
10 // 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 //
12 // 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 //
14 // 3. This notice may not be removed or altered from any source distribution
15 //
16 // Altered zipstream library header
17 // Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003
18 // Author: David Weese <david.weese@fu-berlin.de>
19 // Author: Enrico Siragusa <enrico.siragusa@fu-berlin.de>
20 // Author: Hannes Hauswedell <hannes.hauswedell@fu-berlin.de>
21 
22 #pragma once
23 
24 #ifndef SEQAN3_HAS_ZLIB
25 #error "This file cannot be used when building without ZLIB-support."
26 #endif
27 
28 #include <iostream>
29 #include <cstring>
30 #include <vector>
31 
32 #include <zlib.h>
33 
34 #include <seqan3/core/platform.hpp>
35 
36 namespace seqan3::contrib
37 {
38 
39 // Default gzip buffer size, change this to suite your needs.
40 const size_t GZ_OUTPUT_DEFAULT_BUFFER_SIZE = 921600;
41 
42 // --------------------------------------------------------------------------
43 // Enum EStrategy
44 // --------------------------------------------------------------------------
45 // Compression strategy, see zlib doc.
46 
47 enum EStrategy
48 {
49  StrategyFiltered = 1,
50  StrategyHuffmanOnly = 2,
51  DefaultStrategy = 0
52 };
53 
54 // --------------------------------------------------------------------------
55 // Class basic_gz_ostreambuf
56 // --------------------------------------------------------------------------
57 // A stream decorator that takes raw input and zips it to a ostream.
58 // The class wraps up the inflate method of the zlib library 1.1.4 https://www.zlib.net
59 
60 template <typename Elem,
61  typename Tr = std::char_traits<Elem>,
62  typename ElemA = std::allocator<Elem>,
63  typename ByteT = unsigned char,
64  typename ByteAT = std::allocator<ByteT>
65  >
66 class basic_gz_ostreambuf :
67  public std::basic_streambuf<Elem, Tr>
68 {
69 public:
70  typedef std::basic_ostream<Elem, Tr> & ostream_reference;
71  typedef ElemA char_allocator_type;
72  typedef ByteT byte_type;
73  typedef ByteAT byte_allocator_type;
74  typedef byte_type * byte_buffer_type;
75  typedef Tr traits_type;
76  typedef typename Tr::char_type char_type;
77  typedef typename Tr::int_type int_type;
78  typedef std::vector<byte_type, byte_allocator_type> byte_vector_type;
79  typedef std::vector<char_type, char_allocator_type> char_vector_type;
80 
81  // Construct a zip stream
82  // More info on the following parameters can be found in the zlib documentation.
83  basic_gz_ostreambuf(ostream_reference ostream_,
84  size_t level_,
85  EStrategy strategy_,
86  size_t window_size_,
87  size_t memory_level_,
88  size_t buffer_size_);
89 
90  ~basic_gz_ostreambuf();
91 
92  int sync();
93  int_type overflow(int_type c);
94 
95  // flushes the zip buffer and output buffer.
96  // This method should be called at the end of the compression.
97  // Calling flush multiple times, will lower the compression ratio.
99 
100  // flushes the zip buffer and output buffer and finalize the zip stream
101  // This method should be called at the end of the compression.
102  std::streamsize flush_finalize();
103 
104 
105 private:
106  bool zip_to_stream(char_type *, std::streamsize);
107  size_t fill_input_buffer();
108  // flush the zip buffer using a particular mode and flush output buffer
109  std::streamsize flush(int flush_mode);
110 
111  ostream_reference m_ostream;
112  z_stream m_zip_stream;
113  int m_err;
114  byte_vector_type m_output_buffer;
115  char_vector_type m_buffer;
116 };
117 
118 // --------------------------------------------------------------------------
119 // Class basic_gz_ostreambuf implementation
120 // --------------------------------------------------------------------------
121 
122 template <typename Elem,
123  typename Tr,
124  typename ElemA,
125  typename ByteT,
126  typename ByteAT>
127 basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::basic_gz_ostreambuf(
128  ostream_reference ostream_,
129  size_t level_,
130  EStrategy strategy_,
131  size_t window_size_,
132  size_t memory_level_,
133  size_t buffer_size_
134  ) :
135  m_ostream(ostream_),
136  m_output_buffer(buffer_size_, 0),
137  m_buffer(buffer_size_, 0)
138 {
139  m_zip_stream.zalloc = (alloc_func)0;
140  m_zip_stream.zfree = (free_func)0;
141 
142  m_zip_stream.next_in = NULL;
143  m_zip_stream.avail_in = 0;
144  m_zip_stream.avail_out = 0;
145  m_zip_stream.next_out = NULL;
146 
147  m_err = deflateInit2(
148  &m_zip_stream,
149  std::min(9, static_cast<int>(level_)),
150  Z_DEFLATED,
151  static_cast<int>(window_size_),
152  std::min(9, static_cast<int>(memory_level_)),
153  static_cast<int>(strategy_)
154  );
155 
156  this->setp(&(m_buffer[0]), &(m_buffer[m_buffer.size() - 1]));
157 }
158 
159 template <typename Elem,
160  typename Tr,
161  typename ElemA,
162  typename ByteT,
163  typename ByteAT>
164 basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::~basic_gz_ostreambuf()
165 {
166  flush_finalize();
167  m_ostream.flush();
168  m_err = deflateEnd(&m_zip_stream);
169 }
170 
171 template <typename Elem,
172  typename Tr,
173  typename ElemA,
174  typename ByteT,
175  typename ByteAT>
176 int basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::sync()
177 {
178  if (this->pptr() && this->pptr() > this->pbase())
179  {
180  if (traits_type::eq_int_type(overflow(traits_type::eof()), traits_type::eof()))
181  return -1;
182  }
183 
184  return 0;
185 }
186 
187 template <typename Elem,
188  typename Tr,
189  typename ElemA,
190  typename ByteT,
191  typename ByteAT>
192 typename basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::int_type
193 basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::overflow(
194  typename basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::int_type c)
195 {
196  int w = static_cast<int>(this->pptr() - this->pbase());
197 
198  if (!traits_type::eq_int_type(c, traits_type::eof()))
199  {
200  *this->pptr() = c;
201  ++w;
202  }
203 
204  if (zip_to_stream(this->pbase(), w))
205  {
206  this->setp(this->pbase(), this->epptr() - 1);
207  return c;
208  }
209  else
210  {
211  return traits_type::eof();
212  }
213 }
214 
215 template <typename Elem,
216  typename Tr,
217  typename ElemA,
218  typename ByteT,
219  typename ByteAT>
220 bool basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::zip_to_stream(
221  typename basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::char_type * buffer_,
222  std::streamsize buffer_size_)
223 {
224  std::streamsize written_byte_size = 0, total_written_byte_size = 0;
225 
226  m_zip_stream.next_in = (byte_buffer_type)buffer_;
227  m_zip_stream.avail_in = static_cast<uInt>(buffer_size_ * sizeof(char_type));
228  m_zip_stream.avail_out = static_cast<uInt>(m_output_buffer.size());
229  m_zip_stream.next_out = &(m_output_buffer[0]);
230  size_t remainder = 0;
231 
232  do
233  {
234  m_err = deflate(&m_zip_stream, 0);
235 
236  if (m_err == Z_OK || m_err == Z_STREAM_END)
237  {
238  written_byte_size = static_cast<std::streamsize>(m_output_buffer.size()) - m_zip_stream.avail_out;
239  total_written_byte_size += written_byte_size;
240 
241  // ouput buffer is full, dumping to ostream
242  m_ostream.write((const char_type *) &(m_output_buffer[0]),
243  static_cast<std::streamsize>(written_byte_size / sizeof(char_type)));
244 
245  // checking if some bytes were not written.
246  if ((remainder = written_byte_size % sizeof(char_type)) != 0)
247  {
248  // copy to the beginning of the stream
249  std::memmove(&(m_output_buffer[0]),
250  &(m_output_buffer[written_byte_size - remainder]),
251  remainder);
252  }
253 
254  m_zip_stream.avail_out = static_cast<uInt>(m_output_buffer.size() - remainder);
255  m_zip_stream.next_out = &m_output_buffer[remainder];
256  }
257  }
258  while (m_zip_stream.avail_in != 0 && m_err == Z_OK);
259 
260  return m_err == Z_OK;
261 }
262 
263 template <typename Elem,
264  typename Tr,
265  typename ElemA,
266  typename ByteT,
267  typename ByteAT>
268 std::streamsize basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::flush(int flush_mode)
269 {
270  int const buffer_size = static_cast<int>(this->pptr() - this->pbase()); // amount of data currently in buffer
271 
272  std::streamsize written_byte_size = 0, total_written_byte_size = 0;
273 
274  m_zip_stream.next_in = (byte_buffer_type) this->pbase();
275  m_zip_stream.avail_in = static_cast<uInt>(buffer_size * sizeof(char_type));
276  m_zip_stream.avail_out = static_cast<uInt>(m_output_buffer.size());
277  m_zip_stream.next_out = &(m_output_buffer[0]);
278  size_t remainder = 0;
279 
280  do
281  {
282  m_err = deflate(&m_zip_stream, flush_mode);
283  if (m_err == Z_OK || m_err == Z_STREAM_END)
284  {
285  written_byte_size = static_cast<std::streamsize>(m_output_buffer.size()) - m_zip_stream.avail_out;
286  total_written_byte_size += written_byte_size;
287 
288  // ouput buffer is full, dumping to ostream
289  m_ostream.write((const char_type *) &(m_output_buffer[0]),
290  static_cast<std::streamsize>(written_byte_size / sizeof(char_type) * sizeof(byte_type)));
291 
292  // checking if some bytes were not written.
293  if ((remainder = written_byte_size % sizeof(char_type)) != 0)
294  {
295  // copy to the beginning of the stream
296  std::memmove(&(m_output_buffer[0]),
297  &(m_output_buffer[written_byte_size - remainder]),
298  remainder);
299  }
300 
301  m_zip_stream.avail_out = static_cast<uInt>(m_output_buffer.size() - remainder);
302  m_zip_stream.next_out = &m_output_buffer[remainder];
303  }
304  }
305  while (m_err == Z_OK);
306 
307  m_ostream.flush();
308 
309  return total_written_byte_size;
310 }
311 
312 template <typename Elem,
313  typename Tr,
314  typename ElemA,
315  typename ByteT,
316  typename ByteAT>
317 std::streamsize basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::flush()
318 {
319  return flush(Z_SYNC_FLUSH);
320 }
321 
322 template <typename Elem,
323  typename Tr,
324  typename ElemA,
325  typename ByteT,
326  typename ByteAT>
327 std::streamsize basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::flush_finalize()
328 {
329  return flush(Z_FINISH);
330 }
331 
332 // --------------------------------------------------------------------------
333 // Class basic_gz_ostreambase
334 // --------------------------------------------------------------------------
335 // Base class for zip ostreams.
336 // Contains a basic_gz_ostreambuf.
337 
338 template <typename Elem,
339  typename Tr = std::char_traits<Elem>,
340  typename ElemA = std::allocator<Elem>,
341  typename ByteT = unsigned char,
342  typename ByteAT = std::allocator<ByteT>
343  >
344 class basic_gz_ostreambase :
345  virtual public std::basic_ios<Elem, Tr>
346 {
347 public:
348  typedef std::basic_ostream<Elem, Tr> & ostream_reference;
349  typedef basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT> zip_streambuf_type;
350 
351  // Construct a zip stream
352  // More info on the following parameters can be found in the zlib documentation.
353  basic_gz_ostreambase(ostream_reference ostream_,
354  size_t level_,
355  EStrategy strategy_,
356  size_t window_size_,
357  size_t memory_level_,
358  size_t buffer_size_) :
359  m_buf(ostream_, level_, strategy_, window_size_, memory_level_, buffer_size_)
360  {
361  this->init(&m_buf);
362  }
363 
364  // returns the underlying zip ostream object
365  zip_streambuf_type * rdbuf() { return &m_buf; }
366 
367 private:
368  zip_streambuf_type m_buf;
369 };
370 
371 // --------------------------------------------------------------------------
372 // Class basic_gz_ostream
373 // --------------------------------------------------------------------------
374 // A zipper ostream
375 //
376 // This class is a ostream decorator that behaves 'almost' like any other ostream.
377 // At construction, it takes any ostream that shall be used to output of the compressed data.
378 // When finished, you need to call the special method zflush or call the destructor
379 // to flush all the intermidiate streams.
380 //
381 // Example:
382 //
383 // // creating the target zip string, could be a fstream
384 // ostringstream ostringstream_;
385 // // creating the zip layer
386 // zip_ostream zipper(ostringstream_);
387 // // writing data
388 // zipper<<f<<" "<<d<<" "<<ui<<" "<<ul<<" "<<us<<" "<<c<<" "<<dum;
389 // // zip ostream needs special flushing...
390 // zipper.zflush();
391 
392 template <typename Elem,
393  typename Tr = std::char_traits<Elem>,
394  typename ElemA = std::allocator<Elem>,
395  typename ByteT = unsigned char,
396  typename ByteAT = std::allocator<ByteT>
397  >
398 class basic_gz_ostream :
399  public basic_gz_ostreambase<Elem, Tr, ElemA, ByteT, ByteAT>,
400  public std::basic_ostream<Elem, Tr>
401 {
402 public:
403  typedef basic_gz_ostreambase<Elem, Tr, ElemA, ByteT, ByteAT> zip_ostreambase_type;
404  typedef std::basic_ostream<Elem, Tr> ostream_type;
405  typedef ostream_type & ostream_reference;
406 
407  // Constructs a zipper ostream decorator
408  //
409  // ostream_ ostream where the compressed output is written
410  // is_gzip_ true if gzip header and footer have to be added
411  // level_ level of compression 0, bad and fast, 9, good and slower,
412  // strategy_ compression strategy
413  // window_size_ see zlib doc
414  // memory_level_ see zlib doc
415  // buffer_size_ the buffer size used to zip data
416 
417  basic_gz_ostream(ostream_reference ostream_,
418  size_t level_ = Z_DEFAULT_COMPRESSION,
419  EStrategy strategy_ = DefaultStrategy,
420  size_t window_size_ = 31, // 15 (size) + 16 (gzip header)
421  size_t memory_level_ = 8,
422  size_t buffer_size_ = GZ_OUTPUT_DEFAULT_BUFFER_SIZE) :
423  zip_ostreambase_type(ostream_, level_, strategy_, window_size_, memory_level_, buffer_size_),
424  ostream_type(this->rdbuf())
425  {}
426 
427  ~basic_gz_ostream()
428  {
429  ostream_type::flush(); this->rdbuf()->flush_finalize();
430  }
431 
432  // flush inner buffer and zipper buffer
433  basic_gz_ostream<Elem, Tr> & flush()
434  {
435  ostream_type::flush(); this->rdbuf()->flush(); return *this;
436  }
437 
438 #ifdef _WIN32
439 private:
440  void _Add_vtordisp1() {} // Required to avoid VC++ warning C4250
441  void _Add_vtordisp2() {} // Required to avoid VC++ warning C4250
442 #endif
443 };
444 
445 // ===========================================================================
446 // Typedefs
447 // ===========================================================================
448 
449 // A typedef for basic_gz_ostream<char>
450 typedef basic_gz_ostream<char> gz_ostream;
451 // A typedef for basic_gz_ostream<wchar_t>
452 typedef basic_gz_ostream<wchar_t> gz_wostream;
453 
454 } // namespace seqan3::contrib
std::basic_ios::rdbuf
T rdbuf(T... args)
cstring
output_stream_over::char_type
typename stream::char_type char_type
Declares the associated char type.
vector
std::remainder
T remainder(T... args)
std::basic_streambuf
iostream
std::basic_streambuf< Elem, Tr >::sync
T sync(T... args)
std::char_traits
std::streamsize
std::basic_ostream< Elem, Tr >
std::flush
T flush(T... args)
std::basic_ios::init
T init(T... args)
std::min
T min(T... args)
std::basic_streambuf< Elem, Tr >::overflow
T overflow(T... args)
platform.hpp
Provides platform and dependency checks.
std::allocator
std::memmove
T memmove(T... args)
std::basic_ios