29#if !defined(SEQAN3_HAS_ZLIB) && !defined(SEQAN3_HEADER_TEST)
30# error "This file cannot be used when building without GZip-support."
33#if defined(SEQAN3_HAS_ZLIB)
37namespace seqan3::contrib
51 {
'\x1f',
'\x8b',
'\x08',
'\x04',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\xff',
'\x06',
'\x00',
'\x42',
'\x43',
52 '\x02',
'\x00',
'\x1b',
'\x00',
'\x03',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00'}};
54template <
typename TAlgTag>
55struct CompressionContext
58template <
typename TAlgTag>
59struct DefaultPageSize;
62struct CompressionContext<detail::gz_compression>
73struct CompressionContext<detail::bgzf_compression> : CompressionContext<detail::gz_compression>
75 static constexpr size_t BLOCK_HEADER_LENGTH = detail::bgzf_compression::magic_header.size();
76 unsigned char headerPos;
80struct DefaultPageSize<detail::bgzf_compression>
82 static unsigned const MAX_BLOCK_SIZE = 64 * 1024;
83 static unsigned const BLOCK_FOOTER_LENGTH = 8;
85 static unsigned const ZLIB_BLOCK_OVERHEAD = 5;
91 BLOCK_HEADER_LENGTH = CompressionContext<detail::bgzf_compression>::BLOCK_HEADER_LENGTH
93 static unsigned const VALUE = MAX_BLOCK_SIZE - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH - ZLIB_BLOCK_OVERHEAD;
104inline void compressInit(CompressionContext<detail::gz_compression> & ctx)
106 int const GZIP_WINDOW_BITS = -15;
107 int const Z_DEFAULT_MEM_LEVEL = 8;
109 ctx.strm.zalloc = NULL;
110 ctx.strm.zfree = NULL;
117 deflateInit2(&ctx.strm, Z_BEST_SPEED, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
119 throw io_error(
"Calling deflateInit2() failed for gz file.");
126inline void compressInit(CompressionContext<detail::bgzf_compression> & ctx)
128 compressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
136inline uint16_t _bgzfUnpack16(
char const * buffer)
140 return detail::to_little_endian(tmp);
143inline uint32_t _bgzfUnpack32(
char const * buffer)
147 return detail::to_little_endian(tmp);
154inline void _bgzfPack16(
char * buffer, uint16_t value)
156 value = detail::to_little_endian(value);
158 reinterpret_cast<char *
>(&value) +
sizeof(uint16_t),
162inline void _bgzfPack32(
char * buffer, uint32_t value)
164 value = detail::to_little_endian(value);
166 reinterpret_cast<char *
>(&value) +
sizeof(uint32_t),
174template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
175inline TDestCapacity _compressBlock(TDestValue * dstBegin,
176 TDestCapacity dstCapacity,
177 TSourceValue * srcBegin,
178 TSourceLength srcLength,
179 CompressionContext<detail::bgzf_compression> & ctx)
181 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
182 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
184 assert(dstCapacity > BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH);
185 assert(
sizeof(TDestValue) == 1u);
186 assert(
sizeof(
unsigned) == 4u);
193 ctx.strm.next_in = (Bytef *)(srcBegin);
194 ctx.strm.next_out = (Bytef *)(dstBegin + BLOCK_HEADER_LENGTH);
195 ctx.strm.avail_in = srcLength *
sizeof(TSourceValue);
196 ctx.strm.avail_out = dstCapacity - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
198 int status = deflate(&ctx.strm, Z_FINISH);
199 if (status != Z_STREAM_END)
201 deflateEnd(&ctx.strm);
202 throw io_error(
"Deflation failed. Compressed BGZF data is too big.");
205 status = deflateEnd(&ctx.strm);
207 throw io_error(
"BGZF deflateEnd() failed.");
213 size_t len = dstCapacity - ctx.strm.avail_out;
214 _bgzfPack16(dstBegin + 16, len - 1);
216 dstBegin += len - BLOCK_FOOTER_LENGTH;
217 _bgzfPack32(dstBegin, crc32(crc32(0u, NULL, 0u), (Bytef *)(srcBegin), srcLength *
sizeof(TSourceValue)));
218 _bgzfPack32(dstBegin + 4, srcLength *
sizeof(TSourceValue));
220 return dstCapacity - ctx.strm.avail_out;
227inline void decompressInit(CompressionContext<detail::gz_compression> & ctx)
229 int const GZIP_WINDOW_BITS = -15;
231 ctx.strm.zalloc = NULL;
232 ctx.strm.zfree = NULL;
233 int status = inflateInit2(&ctx.strm, GZIP_WINDOW_BITS);
235 throw io_error(
"GZip inflateInit2() failed.");
242inline void decompressInit(CompressionContext<detail::bgzf_compression> & ctx)
244 decompressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
252template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
253inline TDestCapacity _decompressBlock(TDestValue * dstBegin,
254 TDestCapacity dstCapacity,
255 TSourceValue * srcBegin,
256 TSourceLength srcLength,
257 CompressionContext<detail::bgzf_compression> & ctx)
259 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
260 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
262 assert(
sizeof(TSourceValue) == 1u);
263 assert(
sizeof(
unsigned) == 4u);
267 if (srcLength <= BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH)
268 throw io_error(
"BGZF block too short.");
270 if (!detail::bgzf_compression::validate_header(
std::span{srcBegin, srcLength}))
271 throw io_error(
"Invalid BGZF block header.");
273 size_t compressedLen = _bgzfUnpack16(srcBegin + 16) + 1u;
274 if (compressedLen != srcLength)
275 throw io_error(
"BGZF compressed size mismatch.");
280 ctx.strm.next_in = (Bytef *)(srcBegin + BLOCK_HEADER_LENGTH);
281 ctx.strm.next_out = (Bytef *)(dstBegin);
282 ctx.strm.avail_in = srcLength - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
283 ctx.strm.avail_out = dstCapacity *
sizeof(TDestValue);
285 int status = inflate(&ctx.strm, Z_FINISH);
286 if (status != Z_STREAM_END)
288 inflateEnd(&ctx.strm);
289 throw io_error(
"Inflation failed. Decompressed BGZF data is too big.");
292 status = inflateEnd(&ctx.strm);
294 throw io_error(
"BGZF inflateEnd() failed.");
300 unsigned crc = crc32(crc32(0u, NULL, 0u), (Bytef *)(dstBegin), dstCapacity - ctx.strm.avail_out);
302 srcBegin += compressedLen - BLOCK_FOOTER_LENGTH;
303 if (_bgzfUnpack32(srcBegin) != crc)
304 throw io_error(
"BGZF wrong checksum.");
306 if (_bgzfUnpack32(srcBegin + 4) != dstCapacity - ctx.strm.avail_out)
307 throw io_error(
"BGZF size mismatch.");
309 return (dstCapacity - ctx.strm.avail_out) /
sizeof(TDestValue);
Provides seqan3::contrib::bgzf_thread_count.
Provides various transformation traits used by the range module.
Provides exceptions used in the I/O module.
Provides utility functions for bit twiddling.
T uninitialized_copy(T... args)