SeqAn3 3.2.0
The Modern C++ library for sequence analysis.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gz_istream.hpp
1// zipstream Library License:
2// --------------------------
3//
4// The zlib/libpng License Copyright (c) 2003 Jonathan de Halleux.
5//
6// This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
7//
8// Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
9//
10// 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11//
12// 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13//
14// 3. This notice may not be removed or altered from any source distribution
15//
16// Altered zipstream library header
17// Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003
18// Author: David Weese <david.weese@fu-berlin.de>
19// Author: Enrico Siragusa <enrico.siragusa@fu-berlin.de>
20// Author: Hannes Hauswedell <hannes.hauswedell@fu-berlin.de>
21
22#pragma once
23
24#include <iostream>
25#include <cstring>
26#include <vector>
27
28#if !defined(SEQAN3_HAS_ZLIB) && !defined(SEQAN3_HEADER_TEST)
29#error "This file cannot be used when building without ZLIB-support."
30#endif // !defined(SEQAN3_HAS_ZLIB) && !defined(SEQAN3_HEADER_TEST)
31
32#if defined(SEQAN3_HAS_ZLIB)
33
34#include <zlib.h>
35
36namespace seqan3::contrib
37{
38
39// Default gzip buffer size, change this to suite your needs.
40const size_t GZ_INPUT_DEFAULT_BUFFER_SIZE = 921600;
41
42// --------------------------------------------------------------------------
43// Class basic_gz_istreambuf
44// --------------------------------------------------------------------------
45// A stream decorator that takes compressed input and unzips it to a istream.
46// The class wraps up the deflate method of the zlib library 1.1.4 https://www.zlib.net
47
48template <typename Elem,
49 typename Tr = std::char_traits<Elem>,
50 typename ElemA = std::allocator<Elem>,
51 typename ByteT = unsigned char,
52 typename ByteAT = std::allocator<ByteT>
53 >
54class basic_gz_istreambuf :
55 public std::basic_streambuf<Elem, Tr>
56{
57public:
58 typedef std::basic_istream<Elem, Tr> & istream_reference;
59 typedef ElemA char_allocator_type;
60 typedef ByteT byte_type;
61 typedef ByteAT byte_allocator_type;
62 typedef byte_type * byte_buffer_type;
63 typedef Tr traits_type;
64 typedef typename Tr::char_type char_type;
65 typedef typename Tr::int_type int_type;
66 typedef std::vector<byte_type, byte_allocator_type> byte_vector_type;
67 typedef std::vector<char_type, char_allocator_type> char_vector_type;
68
69 // Construct a unzip stream
70 // More info on the following parameters can be found in the zlib documentation.
71 basic_gz_istreambuf(istream_reference istream_,
72 size_t window_size_,
73 size_t read_buffer_size_,
74 size_t input_buffer_size_);
75
76 ~basic_gz_istreambuf();
77
78 int_type underflow();
79
80 // returns the compressed input istream
81 istream_reference get_istream() { return m_istream; }
82 // returns the zlib stream structure
83 z_stream & get_zip_stream() { return m_zip_stream; }
84
85private:
86 void put_back_from_zip_stream();
87 std::streamsize unzip_from_stream(char_type *, std::streamsize);
88 size_t fill_input_buffer();
89
90 istream_reference m_istream;
91 z_stream m_zip_stream;
92 int m_err;
93 byte_vector_type m_input_buffer;
94 char_vector_type m_buffer;
95};
96
97// --------------------------------------------------------------------------
98// Class basic_gz_istreambuf implementation
99// --------------------------------------------------------------------------
100
101template <typename Elem,
102 typename Tr,
103 typename ElemA,
104 typename ByteT,
105 typename ByteAT>
106basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::basic_gz_istreambuf(
107 istream_reference istream_,
108 size_t window_size_,
109 size_t read_buffer_size_,
110 size_t input_buffer_size_
111 ) :
112 m_istream(istream_),
113 m_input_buffer(input_buffer_size_),
114 m_buffer(read_buffer_size_)
115{
116 // setting zalloc, zfree and opaque
117 m_zip_stream.zalloc = (alloc_func)0;
118 m_zip_stream.zfree = (free_func)0;
119
120 m_zip_stream.next_in = NULL;
121 m_zip_stream.avail_in = 0;
122 m_zip_stream.avail_out = 0;
123 m_zip_stream.next_out = NULL;
124
125 m_err = inflateInit2(&m_zip_stream, static_cast<int>(window_size_));
126
127 this->setg(&(m_buffer[0]) + 4, // beginning of putback area
128 &(m_buffer[0]) + 4, // read position
129 &(m_buffer[0]) + 4); // end position
130}
131
132template <typename Elem,
133 typename Tr,
134 typename ElemA,
135 typename ByteT,
136 typename ByteAT>
137basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::~basic_gz_istreambuf()
138{
139 inflateEnd(&m_zip_stream);
140}
141
142template <typename Elem,
143 typename Tr,
144 typename ElemA,
145 typename ByteT,
146 typename ByteAT>
147typename basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::int_type
148basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::underflow()
149{
150 if (this->gptr() && (this->gptr() < this->egptr()))
151 return *reinterpret_cast<unsigned char *>(this->gptr());
152
153 int n_putback = static_cast<int>(this->gptr() - this->eback());
154 if (n_putback > 4)
155 n_putback = 4;
156
157 std::memmove(&(m_buffer[0]) + (4 - n_putback), this->gptr() - n_putback, n_putback * sizeof(char_type));
158
159 int num = unzip_from_stream(&(m_buffer[0]) + 4,
160 static_cast<std::streamsize>((m_buffer.size() - 4) * sizeof(char_type)));
161
162 if (num <= 0) // ERROR or EOF
163 return traits_type::eof();
164
165 // reset buffer pointers
166 this->setg(&(m_buffer[0]) + (4 - n_putback), // beginning of putback area
167 &(m_buffer[0]) + 4, // read position
168 &(m_buffer[0]) + 4 + num); // end of buffer
169
170 // return next character
171 return *reinterpret_cast<unsigned char *>(this->gptr());
172}
173
174template <typename Elem,
175 typename Tr,
176 typename ElemA,
177 typename ByteT,
178 typename ByteAT>
179std::streamsize basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::unzip_from_stream(
180 char_type * buffer_,
181 std::streamsize buffer_size_)
182{
183 m_zip_stream.next_out = (byte_buffer_type)buffer_;
184 m_zip_stream.avail_out = static_cast<uInt>(buffer_size_ * sizeof(char_type));
185 size_t count = m_zip_stream.avail_in;
186
187 do
188 {
189 if (m_zip_stream.avail_in == 0)
190 count = fill_input_buffer();
191
192 if (m_zip_stream.avail_in)
193 m_err = inflate(&m_zip_stream, Z_SYNC_FLUSH);
194
195 if (m_err == Z_STREAM_END)
196 inflateReset(&m_zip_stream);
197 else if (m_err < 0)
198 break;
199 }
200 while (m_zip_stream.avail_out > 0 && count > 0);
201
202 std::streamsize n_read = buffer_size_ - m_zip_stream.avail_out / sizeof(char_type);
203
204 // check if it is the end
205 if (m_zip_stream.avail_out > 0 && m_err == Z_STREAM_END)
206 put_back_from_zip_stream();
207
208 return n_read;
209}
210
211template <typename Elem,
212 typename Tr,
213 typename ElemA,
214 typename ByteT,
215 typename ByteAT>
216size_t basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::fill_input_buffer()
217{
218 m_zip_stream.next_in = &(m_input_buffer[0]);
219 m_istream.read((char_type *)(&(m_input_buffer[0])),
220 static_cast<std::streamsize>(m_input_buffer.size() / sizeof(char_type)));
221 return m_zip_stream.avail_in = m_istream.gcount() * sizeof(char_type);
222}
223
224template <typename Elem,
225 typename Tr,
226 typename ElemA,
227 typename ByteT,
228 typename ByteAT>
229void basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT>::put_back_from_zip_stream()
230{
231 if (m_zip_stream.avail_in == 0)
232 return;
233
234 m_istream.clear(std::ios::goodbit);
235 m_istream.seekg(-static_cast<int>(m_zip_stream.avail_in), std::ios_base::cur);
236
237 m_zip_stream.avail_in = 0;
238}
239
240// --------------------------------------------------------------------------
241// Class basic_gz_istreambase
242// --------------------------------------------------------------------------
243// Base class for unzip istreams
244// Contains a basic_gz_istreambuf.
245
246template <typename Elem,
247 typename Tr = std::char_traits<Elem>,
248 typename ElemA = std::allocator<Elem>,
249 typename ByteT = unsigned char,
250 typename ByteAT = std::allocator<ByteT>
251 >
252class basic_gz_istreambase :
253 virtual public std::basic_ios<Elem, Tr>
254{
255public:
256 typedef std::basic_istream<Elem, Tr> & istream_reference;
257 typedef basic_gz_istreambuf<Elem, Tr, ElemA, ByteT, ByteAT> unzip_streambuf_type;
258
259 basic_gz_istreambase(istream_reference ostream_,
260 size_t window_size_,
261 size_t read_buffer_size_,
262 size_t input_buffer_size_) :
263 m_buf(ostream_, window_size_, read_buffer_size_, input_buffer_size_)
264 {
265 this->init(&m_buf);
266 }
267
268 // returns the underlying unzip istream object
269 unzip_streambuf_type * rdbuf() { return &m_buf; }
270
271private:
272 unzip_streambuf_type m_buf;
273};
274
275// --------------------------------------------------------------------------
276// Class basic_gz_istream
277// --------------------------------------------------------------------------
278// A zipper istream
279//
280// This class is a istream decorator that behaves 'almost' like any other ostream.
281// At construction, it takes any istream that shall be used to input of the compressed data.
282//
283// Simlpe example:
284//
285// // create a stream on zip string
286// istringstream istringstream_( ostringstream_.str());
287// // create unzipper istream
288// zip_istream unzipper( istringstream_);
289// // read and unzip
290// unzipper>>f_r>>d_r>>ui_r>>ul_r>>us_r>>c_r>>dum_r;
291
292template <typename Elem,
293 typename Tr = std::char_traits<Elem>,
294 typename ElemA = std::allocator<Elem>,
295 typename ByteT = unsigned char,
296 typename ByteAT = std::allocator<ByteT>
297 >
298class basic_gz_istream :
299 public basic_gz_istreambase<Elem, Tr, ElemA, ByteT, ByteAT>,
300 public std::basic_istream<Elem, Tr>
301{
302public:
303 typedef basic_gz_istreambase<Elem, Tr, ElemA, ByteT, ByteAT> zip_istreambase_type;
304 typedef std::basic_istream<Elem, Tr> istream_type;
305 typedef istream_type & istream_reference;
306 typedef ByteT byte_type;
307 typedef Tr traits_type;
308
309 // Construct a unzipper stream
310 //
311 // istream_ input buffer
312 // window_size_
313 // read_buffer_size_
314 // input_buffer_size_
315
316 basic_gz_istream(istream_reference istream_,
317 size_t window_size_ = 31, // 15 (size) + 16 (gzip header)
318 size_t read_buffer_size_ = GZ_INPUT_DEFAULT_BUFFER_SIZE,
319 size_t input_buffer_size_ = GZ_INPUT_DEFAULT_BUFFER_SIZE) :
320 zip_istreambase_type(istream_, window_size_, read_buffer_size_, input_buffer_size_),
321 istream_type(this->rdbuf())
322 {}
323
324#ifdef _WIN32
325private:
326 void _Add_vtordisp1() {} // Required to avoid VC++ warning C4250
327 void _Add_vtordisp2() {} // Required to avoid VC++ warning C4250
328#endif
329};
330
331// ===========================================================================
332// Typedefs
333// ===========================================================================
334
335// A typedef for basic_gz_istream<char>
336typedef basic_gz_istream<char> gz_istream;
337// A typedef for basic_gz_istream<wchart>
338typedef basic_gz_istream<wchar_t> gz_wistream;
339
340} // namespace seqan3::contrib
341
342#endif // defined(SEQAN3_HAS_ZLIB)
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: traits.hpp:164
T init(T... args)
typename stream::char_type char_type
Declares the associated char type.
T memmove(T... args)
T rdbuf(T... args)