27#if !defined(SEQAN3_HAS_ZLIB) && !defined(SEQAN3_HEADER_TEST)
28# error "This file cannot be used when building without GZip-support."
31#if defined(SEQAN3_HAS_ZLIB)
35namespace seqan3::contrib
54 '\x00',
'\x00',
'\x00',
'\x00',
55 '\x00',
'\xff',
'\x06',
'\x00',
56 '\x42',
'\x43',
'\x02',
'\x00',
57 '\x1b',
'\x00',
'\x03',
'\x00',
58 '\x00',
'\x00',
'\x00',
'\x00',
59 '\x00',
'\x00',
'\x00',
'\x00'}};
61template <
typename TAlgTag>
62struct CompressionContext {};
64template <
typename TAlgTag>
65struct DefaultPageSize;
68struct CompressionContext<detail::gz_compression>
79struct CompressionContext<detail::bgzf_compression>:
80 CompressionContext<detail::gz_compression>
82 static constexpr size_t BLOCK_HEADER_LENGTH = detail::bgzf_compression::magic_header.size();
83 unsigned char headerPos;
87struct DefaultPageSize<detail::bgzf_compression>
89 static const unsigned MAX_BLOCK_SIZE = 64 * 1024;
90 static const unsigned BLOCK_FOOTER_LENGTH = 8;
92 static const unsigned ZLIB_BLOCK_OVERHEAD = 5;
96 enum { BLOCK_HEADER_LENGTH = CompressionContext<detail::bgzf_compression>::BLOCK_HEADER_LENGTH };
97 static const unsigned VALUE = MAX_BLOCK_SIZE - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH - ZLIB_BLOCK_OVERHEAD;
109compressInit(CompressionContext<detail::gz_compression> & ctx)
111 const int GZIP_WINDOW_BITS = -15;
112 const int Z_DEFAULT_MEM_LEVEL = 8;
114 ctx.strm.zalloc = NULL;
115 ctx.strm.zfree = NULL;
121 int status = deflateInit2(&ctx.strm, Z_BEST_SPEED, Z_DEFLATED,
122 GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
124 throw io_error(
"Calling deflateInit2() failed for gz file.");
132compressInit(CompressionContext<detail::bgzf_compression> & ctx)
134 compressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
143_bgzfUnpack16(
char const * buffer)
147 return detail::to_little_endian(tmp);
151_bgzfUnpack32(
char const * buffer)
155 return detail::to_little_endian(tmp);
163_bgzfPack16(
char * buffer, uint16_t value)
165 value = detail::to_little_endian(value);
167 reinterpret_cast<char *
>(&value) +
sizeof(uint16_t),
172_bgzfPack32(
char * buffer, uint32_t value)
174 value = detail::to_little_endian(value);
176 reinterpret_cast<char *
>(&value) +
sizeof(uint32_t),
184template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
186_compressBlock(TDestValue *dstBegin, TDestCapacity dstCapacity,
187 TSourceValue *srcBegin, TSourceLength srcLength, CompressionContext<detail::bgzf_compression> & ctx)
189 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
190 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
192 assert(dstCapacity > BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH);
193 assert(
sizeof(TDestValue) == 1u);
194 assert(
sizeof(
unsigned) == 4u);
197 std::ranges::copy(detail::bgzf_compression::magic_header, dstBegin);
201 ctx.strm.next_in = (Bytef *)(srcBegin);
202 ctx.strm.next_out = (Bytef *)(dstBegin + BLOCK_HEADER_LENGTH);
203 ctx.strm.avail_in = srcLength *
sizeof(TSourceValue);
204 ctx.strm.avail_out = dstCapacity - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
206 int status = deflate(&ctx.strm, Z_FINISH);
207 if (status != Z_STREAM_END)
209 deflateEnd(&ctx.strm);
210 throw io_error(
"Deflation failed. Compressed BGZF data is too big.");
213 status = deflateEnd(&ctx.strm);
215 throw io_error(
"BGZF deflateEnd() failed.");
222 size_t len = dstCapacity - ctx.strm.avail_out;
223 _bgzfPack16(dstBegin + 16, len - 1);
225 dstBegin += len - BLOCK_FOOTER_LENGTH;
226 _bgzfPack32(dstBegin, crc32(crc32(0u, NULL, 0u), (Bytef *)(srcBegin), srcLength *
sizeof(TSourceValue)));
227 _bgzfPack32(dstBegin + 4, srcLength *
sizeof(TSourceValue));
229 return dstCapacity - ctx.strm.avail_out;
237decompressInit(CompressionContext<detail::gz_compression> & ctx)
239 const int GZIP_WINDOW_BITS = -15;
241 ctx.strm.zalloc = NULL;
242 ctx.strm.zfree = NULL;
243 int status = inflateInit2(&ctx.strm, GZIP_WINDOW_BITS);
245 throw io_error(
"GZip inflateInit2() failed.");
253decompressInit(CompressionContext<detail::bgzf_compression> & ctx)
255 decompressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
263template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
265_decompressBlock(TDestValue *dstBegin, TDestCapacity dstCapacity,
266 TSourceValue *srcBegin, TSourceLength srcLength, CompressionContext<detail::bgzf_compression> & ctx)
268 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
269 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
271 assert(
sizeof(TSourceValue) == 1u);
272 assert(
sizeof(
unsigned) == 4u);
276 if (srcLength <= BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH)
277 throw io_error(
"BGZF block too short.");
279 if (!detail::bgzf_compression::validate_header(
std::span{srcBegin, srcLength}))
280 throw io_error(
"Invalid BGZF block header.");
282 size_t compressedLen = _bgzfUnpack16(srcBegin + 16) + 1u;
283 if (compressedLen != srcLength)
284 throw io_error(
"BGZF compressed size mismatch.");
290 ctx.strm.next_in = (Bytef *)(srcBegin + BLOCK_HEADER_LENGTH);
291 ctx.strm.next_out = (Bytef *)(dstBegin);
292 ctx.strm.avail_in = srcLength - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
293 ctx.strm.avail_out = dstCapacity *
sizeof(TDestValue);
295 int status = inflate(&ctx.strm, Z_FINISH);
296 if (status != Z_STREAM_END)
298 inflateEnd(&ctx.strm);
299 throw io_error(
"Inflation failed. Decompressed BGZF data is too big.");
302 status = inflateEnd(&ctx.strm);
304 throw io_error(
"BGZF inflateEnd() failed.");
311 unsigned crc = crc32(crc32(0u, NULL, 0u), (Bytef *)(dstBegin), dstCapacity - ctx.strm.avail_out);
313 srcBegin += compressedLen - BLOCK_FOOTER_LENGTH;
314 if (_bgzfUnpack32(srcBegin) != crc)
315 throw io_error(
"BGZF wrong checksum.");
317 if (_bgzfUnpack32(srcBegin + 4) != dstCapacity - ctx.strm.avail_out)
318 throw io_error(
"BGZF size mismatch.");
320 return (dstCapacity - ctx.strm.avail_out) /
sizeof(TDestValue);
The <algorithm> header from C++20's standard library.
Provides various transformation traits used by the range module.
T hardware_concurrency(T... args)
Provides exceptions used in the I/O module.
Provides std::span from the C++20 standard library.
Provides utility functions for bit twiddling.
T uninitialized_copy(T... args)