26#if !defined(SEQAN3_HAS_ZLIB) && !defined(SEQAN3_HEADER_TEST)
27# error "This file cannot be used when building without GZip-support."
30#if defined(SEQAN3_HAS_ZLIB)
34namespace seqan3::contrib
48 {
'\x1f',
'\x8b',
'\x08',
'\x04',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\xff',
'\x06',
'\x00',
'\x42',
'\x43',
49 '\x02',
'\x00',
'\x1b',
'\x00',
'\x03',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00',
'\x00'}};
51template <
typename TAlgTag>
52struct CompressionContext
55template <
typename TAlgTag>
56struct DefaultPageSize;
59struct CompressionContext<detail::gz_compression>
70struct CompressionContext<detail::bgzf_compression> : CompressionContext<detail::gz_compression>
72 static constexpr size_t BLOCK_HEADER_LENGTH = detail::bgzf_compression::magic_header.size();
73 unsigned char headerPos;
77struct DefaultPageSize<detail::bgzf_compression>
79 static unsigned const MAX_BLOCK_SIZE = 64 * 1024;
80 static unsigned const BLOCK_FOOTER_LENGTH = 8;
82 static unsigned const ZLIB_BLOCK_OVERHEAD = 5;
88 BLOCK_HEADER_LENGTH = CompressionContext<detail::bgzf_compression>::BLOCK_HEADER_LENGTH
90 static unsigned const VALUE = MAX_BLOCK_SIZE - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH - ZLIB_BLOCK_OVERHEAD;
101inline void compressInit(CompressionContext<detail::gz_compression> & ctx)
103 int const GZIP_WINDOW_BITS = -15;
104 int const Z_DEFAULT_MEM_LEVEL = 8;
106 ctx.strm.zalloc = NULL;
107 ctx.strm.zfree = NULL;
114 deflateInit2(&ctx.strm, Z_BEST_SPEED, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
116 throw io_error(
"Calling deflateInit2() failed for gz file.");
123inline void compressInit(CompressionContext<detail::bgzf_compression> & ctx)
125 compressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
133inline uint16_t _bgzfUnpack16(
char const * buffer)
137 return detail::to_little_endian(tmp);
140inline uint32_t _bgzfUnpack32(
char const * buffer)
144 return detail::to_little_endian(tmp);
151inline void _bgzfPack16(
char * buffer, uint16_t value)
153 value = detail::to_little_endian(value);
155 reinterpret_cast<char *
>(&value) +
sizeof(uint16_t),
159inline void _bgzfPack32(
char * buffer, uint32_t value)
161 value = detail::to_little_endian(value);
163 reinterpret_cast<char *
>(&value) +
sizeof(uint32_t),
171template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
172inline TDestCapacity _compressBlock(TDestValue * dstBegin,
173 TDestCapacity dstCapacity,
174 TSourceValue * srcBegin,
175 TSourceLength srcLength,
176 CompressionContext<detail::bgzf_compression> & ctx)
178 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
179 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
181 assert(dstCapacity > BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH);
182 assert(
sizeof(TDestValue) == 1u);
183 assert(
sizeof(
unsigned) == 4u);
190 ctx.strm.next_in = (Bytef *)(srcBegin);
191 ctx.strm.next_out = (Bytef *)(dstBegin + BLOCK_HEADER_LENGTH);
192 ctx.strm.avail_in = srcLength *
sizeof(TSourceValue);
193 ctx.strm.avail_out = dstCapacity - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
195 int status = deflate(&ctx.strm, Z_FINISH);
196 if (status != Z_STREAM_END)
198 deflateEnd(&ctx.strm);
199 throw io_error(
"Deflation failed. Compressed BGZF data is too big.");
202 status = deflateEnd(&ctx.strm);
204 throw io_error(
"BGZF deflateEnd() failed.");
210 size_t len = dstCapacity - ctx.strm.avail_out;
211 _bgzfPack16(dstBegin + 16, len - 1);
213 dstBegin += len - BLOCK_FOOTER_LENGTH;
214 _bgzfPack32(dstBegin, crc32(crc32(0u, NULL, 0u), (Bytef *)(srcBegin), srcLength *
sizeof(TSourceValue)));
215 _bgzfPack32(dstBegin + 4, srcLength *
sizeof(TSourceValue));
217 return dstCapacity - ctx.strm.avail_out;
224inline void decompressInit(CompressionContext<detail::gz_compression> & ctx)
226 int const GZIP_WINDOW_BITS = -15;
228 ctx.strm.zalloc = NULL;
229 ctx.strm.zfree = NULL;
230 int status = inflateInit2(&ctx.strm, GZIP_WINDOW_BITS);
232 throw io_error(
"GZip inflateInit2() failed.");
239inline void decompressInit(CompressionContext<detail::bgzf_compression> & ctx)
241 decompressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
249template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
250inline TDestCapacity _decompressBlock(TDestValue * dstBegin,
251 TDestCapacity dstCapacity,
252 TSourceValue * srcBegin,
253 TSourceLength srcLength,
254 CompressionContext<detail::bgzf_compression> & ctx)
256 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
257 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
259 assert(
sizeof(TSourceValue) == 1u);
260 assert(
sizeof(
unsigned) == 4u);
264 if (srcLength <= BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH)
265 throw io_error(
"BGZF block too short.");
267 if (!detail::bgzf_compression::validate_header(
std::span{srcBegin, srcLength}))
268 throw io_error(
"Invalid BGZF block header.");
270 size_t compressedLen = _bgzfUnpack16(srcBegin + 16) + 1u;
271 if (compressedLen != srcLength)
272 throw io_error(
"BGZF compressed size mismatch.");
277 ctx.strm.next_in = (Bytef *)(srcBegin + BLOCK_HEADER_LENGTH);
278 ctx.strm.next_out = (Bytef *)(dstBegin);
279 ctx.strm.avail_in = srcLength - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
280 ctx.strm.avail_out = dstCapacity *
sizeof(TDestValue);
282 int status = inflate(&ctx.strm, Z_FINISH);
283 if (status != Z_STREAM_END)
285 inflateEnd(&ctx.strm);
286 throw io_error(
"Inflation failed. Decompressed BGZF data is too big.");
289 status = inflateEnd(&ctx.strm);
291 throw io_error(
"BGZF inflateEnd() failed.");
297 unsigned crc = crc32(crc32(0u, NULL, 0u), (Bytef *)(dstBegin), dstCapacity - ctx.strm.avail_out);
299 srcBegin += compressedLen - BLOCK_FOOTER_LENGTH;
300 if (_bgzfUnpack32(srcBegin) != crc)
301 throw io_error(
"BGZF wrong checksum.");
303 if (_bgzfUnpack32(srcBegin + 4) != dstCapacity - ctx.strm.avail_out)
304 throw io_error(
"BGZF size mismatch.");
306 return (dstCapacity - ctx.strm.avail_out) /
sizeof(TDestValue);
Provides seqan3::contrib::bgzf_thread_count.
Provides various transformation traits used by the range module.
Provides exceptions used in the I/O module.
Provides utility functions for bit twiddling.
T uninitialized_copy(T... args)