SeqAn3 3.2.0
The Modern C++ library for sequence analysis.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gz_ostream.hpp
1// zipstream Library License:
2// --------------------------
3//
4// The zlib/libpng License Copyright (c) 2003 Jonathan de Halleux.
5//
6// This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
7//
8// Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
9//
10// 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11//
12// 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13//
14// 3. This notice may not be removed or altered from any source distribution
15//
16// Altered zipstream library header
17// Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003
18// Author: David Weese <david.weese@fu-berlin.de>
19// Author: Enrico Siragusa <enrico.siragusa@fu-berlin.de>
20// Author: Hannes Hauswedell <hannes.hauswedell@fu-berlin.de>
21
22#pragma once
23
24#include <iostream>
25#include <cstring>
26#include <vector>
27
28#if !defined(SEQAN3_HAS_ZLIB) && !defined(SEQAN3_HEADER_TEST)
29#error "This file cannot be used when building without ZLIB-support."
30#endif // !defined(SEQAN3_HAS_ZLIB) && !defined(SEQAN3_HEADER_TEST)
31
32#if defined(SEQAN3_HAS_ZLIB)
33
34#include <zlib.h>
35
36namespace seqan3::contrib
37{
38
39// Default gzip buffer size, change this to suite your needs.
40const size_t GZ_OUTPUT_DEFAULT_BUFFER_SIZE = 921600;
41
42// --------------------------------------------------------------------------
43// Enum EStrategy
44// --------------------------------------------------------------------------
45// Compression strategy, see zlib doc.
46
47enum EStrategy
48{
49 StrategyFiltered = 1,
50 StrategyHuffmanOnly = 2,
51 DefaultStrategy = 0
52};
53
54// --------------------------------------------------------------------------
55// Class basic_gz_ostreambuf
56// --------------------------------------------------------------------------
57// A stream decorator that takes raw input and zips it to a ostream.
58// The class wraps up the inflate method of the zlib library 1.1.4 https://www.zlib.net
59
60template <typename Elem,
61 typename Tr = std::char_traits<Elem>,
62 typename ElemA = std::allocator<Elem>,
63 typename ByteT = unsigned char,
64 typename ByteAT = std::allocator<ByteT>
65 >
66class basic_gz_ostreambuf :
67 public std::basic_streambuf<Elem, Tr>
68{
69public:
70 typedef std::basic_ostream<Elem, Tr> & ostream_reference;
71 typedef ElemA char_allocator_type;
72 typedef ByteT byte_type;
73 typedef ByteAT byte_allocator_type;
74 typedef byte_type * byte_buffer_type;
75 typedef Tr traits_type;
76 typedef typename Tr::char_type char_type;
77 typedef typename Tr::int_type int_type;
78 typedef std::vector<byte_type, byte_allocator_type> byte_vector_type;
79 typedef std::vector<char_type, char_allocator_type> char_vector_type;
80
81 // Construct a zip stream
82 // More info on the following parameters can be found in the zlib documentation.
83 basic_gz_ostreambuf(ostream_reference ostream_,
84 size_t level_,
85 EStrategy strategy_,
86 size_t window_size_,
87 size_t memory_level_,
88 size_t buffer_size_);
89
90 ~basic_gz_ostreambuf();
91
92 int sync();
93 int_type overflow(int_type c);
94
95 // flushes the zip buffer and output buffer.
96 // This method should be called at the end of the compression.
97 // Calling flush multiple times, will lower the compression ratio.
99
100 // flushes the zip buffer and output buffer and finalize the zip stream
101 // This method should be called at the end of the compression.
102 std::streamsize flush_finalize();
103
104
105private:
106 bool zip_to_stream(char_type *, std::streamsize);
107 size_t fill_input_buffer();
108 // flush the zip buffer using a particular mode and flush output buffer
109 std::streamsize flush(int flush_mode);
110
111 ostream_reference m_ostream;
112 z_stream m_zip_stream;
113 int m_err;
114 byte_vector_type m_output_buffer;
115 char_vector_type m_buffer;
116};
117
118// --------------------------------------------------------------------------
119// Class basic_gz_ostreambuf implementation
120// --------------------------------------------------------------------------
121
122template <typename Elem,
123 typename Tr,
124 typename ElemA,
125 typename ByteT,
126 typename ByteAT>
127basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::basic_gz_ostreambuf(
128 ostream_reference ostream_,
129 size_t level_,
130 EStrategy strategy_,
131 size_t window_size_,
132 size_t memory_level_,
133 size_t buffer_size_
134 ) :
135 m_ostream(ostream_),
136 m_output_buffer(buffer_size_, 0),
137 m_buffer(buffer_size_, 0)
138{
139 m_zip_stream.zalloc = (alloc_func)0;
140 m_zip_stream.zfree = (free_func)0;
141
142 m_zip_stream.next_in = NULL;
143 m_zip_stream.avail_in = 0;
144 m_zip_stream.avail_out = 0;
145 m_zip_stream.next_out = NULL;
146
147 m_err = deflateInit2(
148 &m_zip_stream,
149 std::min(9, static_cast<int>(level_)),
150 Z_DEFLATED,
151 static_cast<int>(window_size_),
152 std::min(9, static_cast<int>(memory_level_)),
153 static_cast<int>(strategy_)
154 );
155
156 this->setp(&(m_buffer[0]), &(m_buffer[m_buffer.size() - 1]));
157}
158
159template <typename Elem,
160 typename Tr,
161 typename ElemA,
162 typename ByteT,
163 typename ByteAT>
164basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::~basic_gz_ostreambuf()
165{
166 flush_finalize();
167 m_ostream.flush();
168 m_err = deflateEnd(&m_zip_stream);
169}
170
171template <typename Elem,
172 typename Tr,
173 typename ElemA,
174 typename ByteT,
175 typename ByteAT>
176int basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::sync()
177{
178 if (this->pptr() && this->pptr() > this->pbase())
179 {
180 if (traits_type::eq_int_type(overflow(traits_type::eof()), traits_type::eof()))
181 return -1;
182 }
183
184 return 0;
185}
186
187template <typename Elem,
188 typename Tr,
189 typename ElemA,
190 typename ByteT,
191 typename ByteAT>
192typename basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::int_type
193basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::overflow(
194 typename basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::int_type c)
195{
196 int w = static_cast<int>(this->pptr() - this->pbase());
197
198 if (!traits_type::eq_int_type(c, traits_type::eof()))
199 {
200 *this->pptr() = c;
201 ++w;
202 }
203
204 if (zip_to_stream(this->pbase(), w))
205 {
206 this->setp(this->pbase(), this->epptr() - 1);
207 return c;
208 }
209 else
210 {
211 return traits_type::eof();
212 }
213}
214
215template <typename Elem,
216 typename Tr,
217 typename ElemA,
218 typename ByteT,
219 typename ByteAT>
220bool basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::zip_to_stream(
221 typename basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::char_type * buffer_,
222 std::streamsize buffer_size_)
223{
224 std::streamsize written_byte_size = 0, total_written_byte_size = 0;
225
226 m_zip_stream.next_in = (byte_buffer_type)buffer_;
227 m_zip_stream.avail_in = static_cast<uInt>(buffer_size_ * sizeof(char_type));
228 m_zip_stream.avail_out = static_cast<uInt>(m_output_buffer.size());
229 m_zip_stream.next_out = &(m_output_buffer[0]);
230 size_t remainder = 0;
231
232 do
233 {
234 m_err = deflate(&m_zip_stream, 0);
235
236 if (m_err == Z_OK || m_err == Z_STREAM_END)
237 {
238 written_byte_size = static_cast<std::streamsize>(m_output_buffer.size()) - m_zip_stream.avail_out;
239 total_written_byte_size += written_byte_size;
240
241 // output buffer is full, dumping to ostream
242 m_ostream.write((const char_type *) &(m_output_buffer[0]),
243 static_cast<std::streamsize>(written_byte_size / sizeof(char_type)));
244
245 // checking if some bytes were not written.
246 if ((remainder = written_byte_size % sizeof(char_type)) != 0)
247 {
248 // copy to the beginning of the stream
249 std::memmove(&(m_output_buffer[0]),
250 &(m_output_buffer[written_byte_size - remainder]),
251 remainder);
252 }
253
254 m_zip_stream.avail_out = static_cast<uInt>(m_output_buffer.size() - remainder);
255 m_zip_stream.next_out = &m_output_buffer[remainder];
256 }
257 }
258 while (m_zip_stream.avail_in != 0 && m_err == Z_OK);
259
260 return m_err == Z_OK;
261}
262
263template <typename Elem,
264 typename Tr,
265 typename ElemA,
266 typename ByteT,
267 typename ByteAT>
268std::streamsize basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::flush(int flush_mode)
269{
270 int const buffer_size = static_cast<int>(this->pptr() - this->pbase()); // amount of data currently in buffer
271
272 std::streamsize written_byte_size = 0, total_written_byte_size = 0;
273
274 m_zip_stream.next_in = (byte_buffer_type) this->pbase();
275 m_zip_stream.avail_in = static_cast<uInt>(buffer_size * sizeof(char_type));
276 m_zip_stream.avail_out = static_cast<uInt>(m_output_buffer.size());
277 m_zip_stream.next_out = &(m_output_buffer[0]);
278 size_t remainder = 0;
279
280 do
281 {
282 m_err = deflate(&m_zip_stream, flush_mode);
283 if (m_err == Z_OK || m_err == Z_STREAM_END)
284 {
285 written_byte_size = static_cast<std::streamsize>(m_output_buffer.size()) - m_zip_stream.avail_out;
286 total_written_byte_size += written_byte_size;
287
288 // output buffer is full, dumping to ostream
289 m_ostream.write((const char_type *) &(m_output_buffer[0]),
290 static_cast<std::streamsize>(written_byte_size / sizeof(char_type) * sizeof(byte_type)));
291
292 // checking if some bytes were not written.
293 if ((remainder = written_byte_size % sizeof(char_type)) != 0)
294 {
295 // copy to the beginning of the stream
296 std::memmove(&(m_output_buffer[0]),
297 &(m_output_buffer[written_byte_size - remainder]),
298 remainder);
299 }
300
301 m_zip_stream.avail_out = static_cast<uInt>(m_output_buffer.size() - remainder);
302 m_zip_stream.next_out = &m_output_buffer[remainder];
303 }
304 }
305 while (m_err == Z_OK);
306
307 m_ostream.flush();
308
309 return total_written_byte_size;
310}
311
312template <typename Elem,
313 typename Tr,
314 typename ElemA,
315 typename ByteT,
316 typename ByteAT>
317std::streamsize basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::flush()
318{
319 return flush(Z_SYNC_FLUSH);
320}
321
322template <typename Elem,
323 typename Tr,
324 typename ElemA,
325 typename ByteT,
326 typename ByteAT>
327std::streamsize basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::flush_finalize()
328{
329 return flush(Z_FINISH);
330}
331
332// --------------------------------------------------------------------------
333// Class basic_gz_ostreambase
334// --------------------------------------------------------------------------
335// Base class for zip ostreams.
336// Contains a basic_gz_ostreambuf.
337
338template <typename Elem,
339 typename Tr = std::char_traits<Elem>,
340 typename ElemA = std::allocator<Elem>,
341 typename ByteT = unsigned char,
342 typename ByteAT = std::allocator<ByteT>
343 >
344class basic_gz_ostreambase :
345 virtual public std::basic_ios<Elem, Tr>
346{
347public:
348 typedef std::basic_ostream<Elem, Tr> & ostream_reference;
349 typedef basic_gz_ostreambuf<Elem, Tr, ElemA, ByteT, ByteAT> zip_streambuf_type;
350
351 // Construct a zip stream
352 // More info on the following parameters can be found in the zlib documentation.
353 basic_gz_ostreambase(ostream_reference ostream_,
354 size_t level_,
355 EStrategy strategy_,
356 size_t window_size_,
357 size_t memory_level_,
358 size_t buffer_size_) :
359 m_buf(ostream_, level_, strategy_, window_size_, memory_level_, buffer_size_)
360 {
361 this->init(&m_buf);
362 }
363
364 // returns the underlying zip ostream object
365 zip_streambuf_type * rdbuf() { return &m_buf; }
366
367private:
368 zip_streambuf_type m_buf;
369};
370
371// --------------------------------------------------------------------------
372// Class basic_gz_ostream
373// --------------------------------------------------------------------------
374// A zipper ostream
375//
376// This class is a ostream decorator that behaves 'almost' like any other ostream.
377// At construction, it takes any ostream that shall be used to output of the compressed data.
378// When finished, you need to call the special method zflush or call the destructor
379// to flush all the intermidiate streams.
380//
381// Example:
382//
383// // creating the target zip string, could be a fstream
384// ostringstream ostringstream_;
385// // creating the zip layer
386// zip_ostream zipper(ostringstream_);
387// // writing data
388// zipper<<f<<" "<<d<<" "<<ui<<" "<<ul<<" "<<us<<" "<<c<<" "<<dum;
389// // zip ostream needs special flushing...
390// zipper.zflush();
391
392template <typename Elem,
393 typename Tr = std::char_traits<Elem>,
394 typename ElemA = std::allocator<Elem>,
395 typename ByteT = unsigned char,
396 typename ByteAT = std::allocator<ByteT>
397 >
398class basic_gz_ostream :
399 public basic_gz_ostreambase<Elem, Tr, ElemA, ByteT, ByteAT>,
400 public std::basic_ostream<Elem, Tr>
401{
402public:
403 typedef basic_gz_ostreambase<Elem, Tr, ElemA, ByteT, ByteAT> zip_ostreambase_type;
404 typedef std::basic_ostream<Elem, Tr> ostream_type;
405 typedef ostream_type & ostream_reference;
406
407 // Constructs a zipper ostream decorator
408 //
409 // ostream_ ostream where the compressed output is written
410 // is_gzip_ true if gzip header and footer have to be added
411 // level_ level of compression 0, bad and fast, 9, good and slower,
412 // strategy_ compression strategy
413 // window_size_ see zlib doc
414 // memory_level_ see zlib doc
415 // buffer_size_ the buffer size used to zip data
416
417 basic_gz_ostream(ostream_reference ostream_,
418 size_t level_ = Z_DEFAULT_COMPRESSION,
419 EStrategy strategy_ = DefaultStrategy,
420 size_t window_size_ = 31, // 15 (size) + 16 (gzip header)
421 size_t memory_level_ = 8,
422 size_t buffer_size_ = GZ_OUTPUT_DEFAULT_BUFFER_SIZE) :
423 zip_ostreambase_type(ostream_, level_, strategy_, window_size_, memory_level_, buffer_size_),
424 ostream_type(this->rdbuf())
425 {}
426
427 ~basic_gz_ostream()
428 {
429 ostream_type::flush(); this->rdbuf()->flush_finalize();
430 }
431
432 // flush inner buffer and zipper buffer
433 basic_gz_ostream<Elem, Tr> & flush()
434 {
435 ostream_type::flush(); this->rdbuf()->flush(); return *this;
436 }
437
438#ifdef _WIN32
439private:
440 void _Add_vtordisp1() {} // Required to avoid VC++ warning C4250
441 void _Add_vtordisp2() {} // Required to avoid VC++ warning C4250
442#endif
443};
444
445// ===========================================================================
446// Typedefs
447// ===========================================================================
448
449// A typedef for basic_gz_ostream<char>
450typedef basic_gz_ostream<char> gz_ostream;
451// A typedef for basic_gz_ostream<wchar_t>
452typedef basic_gz_ostream<wchar_t> gz_wostream;
453
454} // namespace seqan3::contrib
455
456#endif // defined(SEQAN3_HAS_ZLIB)
T flush(T... args)
T init(T... args)
typename stream::char_type char_type
Declares the associated char type.
T memmove(T... args)
T min(T... args)
T rdbuf(T... args)
T remainder(T... args)