SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
bz2_istream.hpp
1 // bzip2stream Library License:
2 // --------------------------
3 //
4 // The zlib/libpng License Copyright (c) 2003 Jonathan de Halleux.
5 //
6 // This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
7 //
8 // Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
9 //
10 // 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 //
12 // 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
13 //
14 // 3. This notice may not be removed or altered from any source distribution
15 //
16 // Author: Jonathan de Halleux, dehalleux@pelikhan.com, 2003
17 // Altered bzip2_stream header
18 // Author: Hannes Hauswedell <hannes.hauswedell@fu-berlin.de>
19 
20 #pragma once
21 
22 #ifndef SEQAN3_HAS_BZIP2
23 #error "This file cannot be used when building without BZIP2-support."
24 #endif
25 
26 #include <algorithm>
27 #include <cstring>
28 #include <iostream>
29 #include <vector>
30 
31 #define BZ_NO_STDIO
32 #include <bzlib.h>
33 
34 #include <seqan3/core/platform.hpp>
35 
36 namespace seqan3::contrib
37 {
38 
39 // --------------------------------------------------------------------------
40 // Class basic_bz2_istreambuf
41 // --------------------------------------------------------------------------
42 
43 const size_t BZ2_INPUT_DEFAULT_BUFFER_SIZE = 4096;
44 
45 template<
46  typename Elem,
47  typename Tr = std::char_traits<Elem>,
48  typename ElemA = std::allocator<Elem>,
49  typename ByteT = char,
50  typename ByteAT = std::allocator<ByteT>
51 >
52 class basic_bz2_istreambuf :
53  public std::basic_streambuf<Elem, Tr>
54 {
55 public:
56  typedef std::basic_istream<Elem, Tr>& istream_reference;
57  typedef ElemA char_allocator_type;
58  typedef ByteT byte_type;
59  typedef ByteAT byte_allocator_type;
60  typedef byte_type* byte_buffer_type;
61  typedef typename Tr::char_type char_type;
62  typedef typename Tr::int_type int_type;
63  typedef std::vector<byte_type, byte_allocator_type > byte_vector_type;
64  typedef std::vector<char_type, char_allocator_type > char_vector_type;
65 
66  basic_bz2_istreambuf(
67  istream_reference istream_,
68  size_t verbosity_,
69  bool small_,
70  size_t read_buffer_size_,
71  size_t input_buffer_size_
72  );
73 
74  ~basic_bz2_istreambuf();
75 
76  int_type underflow();
77 
78  istream_reference get_istream() { return m_istream;};
79  bz_stream& get_bzip2_stream() { return m_bzip2_stream;};
80  int get_zerr() const { return m_err;};
81 private:
82  std::streamsize unbzip2_from_stream( char_type*, std::streamsize);
83  void put_back_from_bzip2_stream();
84  size_t fill_input_buffer();
85 
86  istream_reference m_istream;
87  bz_stream m_bzip2_stream;
88  int m_err;
89  byte_vector_type m_input_buffer;
90  char_vector_type m_buffer;
91 };
92 
93 // --------------------------------------------------------------------------
94 // Class basic_bz2_istreambuf implementation
95 // --------------------------------------------------------------------------
96 
97 template<
98  typename Elem,
99  typename Tr,
100  typename ElemA,
101  typename ByteT,
102  typename ByteAT
103 >
104 basic_bz2_istreambuf<
105  Elem,Tr,ElemA,ByteT,ByteAT
106  >::basic_bz2_istreambuf(
107  istream_reference istream_,
108  size_t verbosity_,
109  bool small_,
110  size_t read_buffer_size_,
111  size_t input_buffer_size_
112 )
113 :
114  m_istream(istream_),
115  m_input_buffer(input_buffer_size_),
116  m_buffer(read_buffer_size_)
117 {
118  // setting zalloc, zfree and opaque
119  m_bzip2_stream.bzalloc=NULL;
120  m_bzip2_stream.bzfree=NULL;
121 
122  m_bzip2_stream.next_in=NULL;
123  m_bzip2_stream.avail_in=0;
124  m_bzip2_stream.avail_out=0;
125  m_bzip2_stream.next_out=NULL;
126 
127 
128  m_err=BZ2_bzDecompressInit (
129  &m_bzip2_stream,
130  std::min(4, static_cast<int>(verbosity_)),
131  static_cast<int>(small_)
132  );
133 
134  this->setg(
135  &(m_buffer[0])+4, // beginning of putback area
136  &(m_buffer[0])+4, // read position
137  &(m_buffer[0])+4); // end position
138 }
139 
140 template<
141  typename Elem,
142  typename Tr,
143  typename ElemA,
144  typename ByteT,
145  typename ByteAT
146 >
147 size_t basic_bz2_istreambuf<
148  Elem,Tr,ElemA,ByteT,ByteAT
149  >::fill_input_buffer()
150 {
151  m_bzip2_stream.next_in=&(m_input_buffer[0]);
152  m_istream.read(
153  (char_type*)(&(m_input_buffer[0])),
154  static_cast<std::streamsize>(m_input_buffer.size()/sizeof(char_type))
155  );
156  return m_bzip2_stream.avail_in=m_istream.gcount()*sizeof(char_type);
157 }
158 
159 template<
160  typename Elem,
161  typename Tr,
162  typename ElemA,
163  typename ByteT,
164  typename ByteAT
165 >
166 void basic_bz2_istreambuf<
167  Elem,Tr,ElemA,ByteT,ByteAT
168  >::put_back_from_bzip2_stream()
169 {
170  if (m_bzip2_stream.avail_in==0)
171  return;
172 
173  m_istream.clear( std::ios::goodbit );
174  m_istream.seekg(
175  -static_cast<int>(m_bzip2_stream.avail_in),
176  std::ios_base::cur
177  );
178 
179  m_bzip2_stream.avail_in=0;
180 }
181 
182 
183 template<
184  typename Elem,
185  typename Tr,
186  typename ElemA,
187  typename ByteT,
188  typename ByteAT
189 >
190 basic_bz2_istreambuf<
191  Elem,Tr,ElemA,ByteT,ByteAT
192  >::~basic_bz2_istreambuf()
193 {
194  BZ2_bzDecompressEnd(&m_bzip2_stream);
195 }
196 
197 template<
198  typename Elem,
199  typename Tr,
200  typename ElemA,
201  typename ByteT,
202  typename ByteAT
203 >
204 typename basic_bz2_istreambuf<
205  Elem,Tr,ElemA,ByteT,ByteAT
206  >::int_type
207  basic_bz2_istreambuf<
208  Elem,Tr,ElemA,ByteT,ByteAT
209  >::underflow()
210 {
211  if ( this->gptr() && ( this->gptr() < this->egptr()))
212  return * reinterpret_cast<unsigned char *>( this->gptr());
213 
214  int n_putback = static_cast<int>(this->gptr() - this->eback());
215  if ( n_putback > 4)
216  n_putback = 4;
217  std::memmove(
218  &(m_buffer[0]) + (4 - n_putback),
219  this->gptr() - n_putback,
220  n_putback*sizeof(char_type)
221  );
222 
223  int num = unbzip2_from_stream(
224  &(m_buffer[0])+4,
225  static_cast<std::streamsize>((m_buffer.size()-4)*sizeof(char_type))
226  );
227  if (num <= 0) // ERROR or EOF
228  return EOF;
229 
230  // reset buffer pointers
231  this->setg(
232  &(m_buffer[0]) + (4 - n_putback), // beginning of putback area
233  &(m_buffer[0]) + 4, // read position
234  &(m_buffer[0]) + 4 + num); // end of buffer
235 
236  // return next character
237  return* reinterpret_cast<unsigned char *>( this->gptr());
238  }
239 
240 
241 template<
242  typename Elem,
243  typename Tr,
244  typename ElemA,
245  typename ByteT,
246  typename ByteAT
247 >
248 std::streamsize basic_bz2_istreambuf<
249  Elem,Tr,ElemA,ByteT,ByteAT
250  >::unbzip2_from_stream(
251  char_type* buffer_,
252  std::streamsize buffer_size_
253  )
254 {
255  m_bzip2_stream.next_out=(byte_buffer_type)buffer_;
256  m_bzip2_stream.avail_out=buffer_size_*sizeof(char_type);
257  size_t count =m_bzip2_stream.avail_in;
258 
259  do
260  {
261  if (m_bzip2_stream.avail_in==0)
262  count=fill_input_buffer();
263 
264  if (m_bzip2_stream.avail_in)
265  {
266  m_err = BZ2_bzDecompress( &m_bzip2_stream );
267  }
268  } while (m_err==BZ_OK && m_bzip2_stream.avail_out != 0 && count != 0);
269 
270  if (m_err == BZ_STREAM_END)
271  put_back_from_bzip2_stream();
272 
273  return buffer_size_ - m_bzip2_stream.avail_out/sizeof(char_type);
274 }
275 
276 // --------------------------------------------------------------------------
277 // Class basic_bz2_istreambase
278 // --------------------------------------------------------------------------
279 
280 template<
281  typename Elem,
282  typename Tr = std::char_traits<Elem>,
283  typename ElemA = std::allocator<Elem>,
284  typename ByteT = char,
285  typename ByteAT = std::allocator<ByteT>
286 >
287 class basic_bz2_istreambase : virtual public std::basic_ios<Elem,Tr>
288 {
289 public:
290  typedef std::basic_istream<Elem, Tr>& istream_reference;
291  typedef basic_bz2_istreambuf<
292  Elem,Tr,ElemA,ByteT,ByteAT> unbzip2_streambuf_type;
293 
294  basic_bz2_istreambase(
295  istream_reference ostream_,
296  size_t verbosity_,
297  bool small_,
298  size_t read_buffer_size_,
299  size_t input_buffer_size_
300  )
301  : m_buf(
302  ostream_,
303  verbosity_,
304  small_,
305  read_buffer_size_,
306  input_buffer_size_
307  )
308  {
309  this->init(&m_buf );
310  };
311 
312  unbzip2_streambuf_type* rdbuf() { return &m_buf; };
313 
314 private:
315  unbzip2_streambuf_type m_buf;
316 };
317 
318 // --------------------------------------------------------------------------
319 // Class basic_bz2_istream
320 // --------------------------------------------------------------------------
321 
322 template<
323  typename Elem,
324  typename Tr = std::char_traits<Elem>,
325  typename ElemA = std::allocator<Elem>,
326  typename ByteT = char,
327  typename ByteAT = std::allocator<ByteT>
328 >
329 class basic_bz2_istream :
330  public basic_bz2_istreambase<Elem,Tr,ElemA,ByteT,ByteAT>,
331  public std::basic_istream<Elem,Tr>
332 {
333 public:
334  typedef basic_bz2_istreambase<
335  Elem,Tr,ElemA,ByteT,ByteAT> bzip2_istreambase_type;
336  typedef std::basic_istream<Elem,Tr> istream_type;
337  typedef istream_type& istream_reference;
338  typedef unsigned char byte_type;
339 
340  basic_bz2_istream(
341  istream_reference istream_,
342  size_t verbosity_ = 0,
343  bool small_ = false,
344  size_t read_buffer_size_ = BZ2_INPUT_DEFAULT_BUFFER_SIZE,
345  size_t input_buffer_size_ = BZ2_INPUT_DEFAULT_BUFFER_SIZE
346  )
347  :
348  bzip2_istreambase_type(istream_,verbosity_, small_, read_buffer_size_, input_buffer_size_),
349  istream_type(bzip2_istreambase_type::rdbuf())
350  {};
351 #ifdef _WIN32
352 private:
353  void _Add_vtordisp1() { } // Required to avoid VC++ warning C4250
354  void _Add_vtordisp2() { } // Required to avoid VC++ warning C4250
355 #endif
356 };
357 
358 // --------------------------------------------------------------------------
359 // typedefs
360 // --------------------------------------------------------------------------
361 
362 typedef basic_bz2_istream<char> bz2_istream;
363 typedef basic_bz2_istream<wchar_t> bz2_wistream;
364 
365 } // namespace seqan3::contrib
std::basic_ios::rdbuf
T rdbuf(T... args)
cstring
output_stream_over::int_type
typename stream::int_type int_type
Declares the associated int type.
output_stream_over::char_type
typename stream::char_type char_type
Declares the associated char type.
vector
std::basic_streambuf
iostream
std::basic_streambuf< Elem, Tr >::underflow
T underflow(T... args)
std::char_traits
std::streamsize
std::basic_ios::init
T init(T... args)
std::min
T min(T... args)
platform.hpp
Provides platform and dependency checks.
std::allocator
std::memmove
T memmove(T... args)
std::basic_istream< Elem, Tr >
seqan3::pack_traits::count
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: traits.hpp:134
std::basic_ios