SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
format_bam.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <bit>
13#include <cstring>
14#include <iterator>
15#include <ranges>
16#include <string>
17#include <vector>
18
31
32namespace seqan3
33{
34
47class format_bam : private detail::format_sam_base
48{
49public:
53 // string_buffer is of type std::string and has some problems with pre-C++11 ABI
54 format_bam() = default;
55 format_bam(format_bam const &) = default;
56 format_bam & operator=(format_bam const &) = default;
57 format_bam(format_bam &&) = default;
59 ~format_bam() = default;
60
62
65
66protected:
67 template <typename stream_type, // constraints checked by file
68 typename seq_legal_alph_type,
69 typename ref_seqs_type,
70 typename ref_ids_type,
71 typename stream_pos_type,
72 typename seq_type,
73 typename id_type,
74 typename ref_seq_type,
75 typename ref_id_type,
76 typename ref_offset_type,
77 typename cigar_type,
78 typename flag_type,
79 typename mapq_type,
80 typename qual_type,
81 typename mate_type,
82 typename tag_dict_type,
83 typename e_value_type,
84 typename bit_score_type>
85 void read_alignment_record(stream_type & stream,
87 ref_seqs_type & ref_seqs,
89 stream_pos_type & position_buffer,
90 seq_type & seq,
91 qual_type & qual,
92 id_type & id,
93 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
94 ref_id_type & ref_id,
95 ref_offset_type & ref_offset,
96 cigar_type & cigar_vector,
97 flag_type & flag,
98 mapq_type & mapq,
99 mate_type & mate,
100 tag_dict_type & tag_dict,
101 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
102 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score));
103
104 template <typename stream_type,
105 typename header_type,
106 typename seq_type,
107 typename id_type,
108 typename ref_seq_type,
109 typename ref_id_type,
110 typename cigar_type,
111 typename qual_type,
112 typename mate_type,
113 typename tag_dict_type>
114 void write_alignment_record([[maybe_unused]] stream_type & stream,
115 [[maybe_unused]] sam_file_output_options const & options,
116 [[maybe_unused]] header_type && header,
117 [[maybe_unused]] seq_type && seq,
118 [[maybe_unused]] qual_type && qual,
119 [[maybe_unused]] id_type && id,
120 [[maybe_unused]] ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
121 [[maybe_unused]] ref_id_type && ref_id,
122 [[maybe_unused]] std::optional<int32_t> ref_offset,
123 [[maybe_unused]] cigar_type && cigar_vector,
124 [[maybe_unused]] sam_flag const flag,
125 [[maybe_unused]] uint8_t const mapq,
126 [[maybe_unused]] mate_type && mate,
127 [[maybe_unused]] tag_dict_type && tag_dict,
128 [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(e_value),
129 [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(bit_score));
130
132 template <typename stream_t, typename header_type>
133 void write_header(stream_t & stream, sam_file_output_options const & options, header_type & header);
134
135private:
137 bool header_was_read{false};
138
140 std::string string_buffer{};
141
143 struct alignment_record_core
144 { // naming corresponds to official SAM/BAM specifications
145 int32_t block_size;
146 int32_t refID;
147 int32_t pos;
148 uint32_t l_read_name : 8;
149 uint32_t mapq : 8;
150 uint32_t bin : 16;
151 uint32_t n_cigar_op : 16;
152 sam_flag flag;
153 int32_t l_seq;
154 int32_t next_refID;
155 int32_t next_pos;
156 int32_t tlen;
157 };
158
159 static_assert(sizeof(alignment_record_core) == 36);
160
161 // clang-format off
163 static constexpr std::array<uint8_t, 256> char_to_sam_rank
164 {
165 []() constexpr {
167
168 using index_t = std::make_unsigned_t<char>;
169
170 // ret['M'] = 0; set anyway by initialization
171 ret[static_cast<index_t>('I')] = 1;
172 ret[static_cast<index_t>('D')] = 2;
173 ret[static_cast<index_t>('N')] = 3;
174 ret[static_cast<index_t>('S')] = 4;
175 ret[static_cast<index_t>('H')] = 5;
176 ret[static_cast<index_t>('P')] = 6;
177 ret[static_cast<index_t>('=')] = 7;
178 ret[static_cast<index_t>('X')] = 8;
179
180 return ret;
181 }()
182 };
183 // clang-format on
184
186 static uint16_t reg2bin(int32_t beg, int32_t end) noexcept
187 {
188 --end;
189 if (beg >> 14 == end >> 14)
190 return ((1 << 15) - 1) / 7 + (beg >> 14);
191 if (beg >> 17 == end >> 17)
192 return ((1 << 12) - 1) / 7 + (beg >> 17);
193 if (beg >> 20 == end >> 20)
194 return ((1 << 9) - 1) / 7 + (beg >> 20);
195 if (beg >> 23 == end >> 23)
196 return ((1 << 6) - 1) / 7 + (beg >> 23);
197 if (beg >> 26 == end >> 26)
198 return ((1 << 3) - 1) / 7 + (beg >> 26);
199 return 0;
200 }
201
208 template <typename stream_view_type, std::integral number_type>
209 void read_integral_byte_field(stream_view_type && stream_view, number_type & target)
210 {
211 std::ranges::copy_n(std::ranges::begin(stream_view), sizeof(target), reinterpret_cast<char *>(&target));
212 }
213
215 template <std::integral number_type>
216 void read_integral_byte_field(std::string_view const str, number_type & target)
217 {
218 std::memcpy(&target, str.data(), sizeof(target));
219 }
220
226 template <typename stream_view_type>
227 void read_float_byte_field(stream_view_type && stream_view, float & target)
228 {
229 std::ranges::copy_n(std::ranges::begin(stream_view), sizeof(int32_t), reinterpret_cast<char *>(&target));
230 }
231
232 template <typename value_type>
233 int32_t read_sam_dict_vector(seqan3::detail::sam_tag_variant & variant,
234 std::string_view const str,
235 value_type const & SEQAN3_DOXYGEN_ONLY(value));
236
237 void read_sam_dict(std::string_view const tag_str, sam_tag_dictionary & target);
238
239 std::vector<cigar> parse_binary_cigar(std::string_view const cigar_str) const;
240
241 static std::string get_tag_dict_str(sam_tag_dictionary const & tag_dict);
242};
243
245template <typename stream_type, // constraints checked by file
246 typename seq_legal_alph_type,
247 typename ref_seqs_type,
248 typename ref_ids_type,
249 typename stream_pos_type,
250 typename seq_type,
251 typename id_type,
252 typename ref_seq_type,
253 typename ref_id_type,
254 typename ref_offset_type,
255 typename cigar_type,
256 typename flag_type,
257 typename mapq_type,
258 typename qual_type,
259 typename mate_type,
260 typename tag_dict_type,
261 typename e_value_type,
262 typename bit_score_type>
263inline void format_bam::read_alignment_record(stream_type & stream,
265 ref_seqs_type & ref_seqs,
267 stream_pos_type & position_buffer,
268 seq_type & seq,
269 qual_type & qual,
270 id_type & id,
271 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
272 ref_id_type & ref_id,
273 ref_offset_type & ref_offset,
274 cigar_type & cigar_vector,
275 flag_type & flag,
276 mapq_type & mapq,
277 mate_type & mate,
278 tag_dict_type & tag_dict,
279 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
280 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
281{
282 static_assert(detail::decays_to_ignore_v<ref_offset_type>
283 || detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
284 "The ref_offset must be a specialisation of std::optional.");
285
286 static_assert(detail::decays_to_ignore_v<mapq_type> || std::same_as<mapq_type, uint8_t>,
287 "The type of field::mapq must be uint8_t.");
288
289 static_assert(detail::decays_to_ignore_v<flag_type> || std::same_as<flag_type, sam_flag>,
290 "The type of field::flag must be seqan3::sam_flag.");
291
292 auto stream_view = seqan3::detail::istreambuf(stream);
293
294 // Header
295 // -------------------------------------------------------------------------------------------------------------
296 if (!header_was_read)
297 {
298 // magic BAM string
299 if (!std::ranges::equal(stream_view | detail::take_exactly_or_throw(4), std::string_view{"BAM\1"}))
300 throw format_error{"File is not in BAM format."};
301
302 int32_t l_text{}; // length of header text including \0 character
303 int32_t n_ref{}; // number of reference sequences
304 int32_t l_name{}; // 1 + length of reference name including \0 character
305 int32_t l_ref{}; // length of reference sequence
306
307 read_integral_byte_field(stream_view, l_text);
308
309 if (l_text > 0) // header text is present
310 read_header(stream_view | detail::take_exactly_or_throw(l_text), header, ref_seqs, options);
311
312 read_integral_byte_field(stream_view, n_ref);
313
314 for (int32_t ref_idx = 0; ref_idx < n_ref; ++ref_idx)
315 {
316 read_integral_byte_field(stream_view, l_name);
317
318 string_buffer.resize(l_name - 1);
320 l_name - 1,
321 string_buffer.data()); // copy without \0 character
322 ++std::ranges::begin(stream_view); // skip \0 character
323
324 read_integral_byte_field(stream_view, l_ref);
325
326 if constexpr (detail::decays_to_ignore_v<ref_seqs_type>) // no reference information given
327 {
328 // If there was no header text, we parse reference sequences block as header information
329 if (l_text == 0)
330 {
331 auto & reference_ids = header.ref_ids();
332 // put the length of the reference sequence into ref_id_info
333 header.ref_id_info.emplace_back(l_ref, "");
334 // put the reference name into reference_ids
335 reference_ids.push_back(string_buffer);
336 // assign the reference name an ascending reference id (starts at index 0).
337 header.ref_dict.emplace(reference_ids.back(), reference_ids.size() - 1);
338 continue;
339 }
340 }
341
342 auto id_it = header.ref_dict.find(string_buffer);
343
344 // sanity checks of reference information to existing header object:
345 if (id_it == header.ref_dict.end()) // [unlikely]
346 {
347 throw format_error{detail::to_string("Unknown reference name '" + string_buffer
348 + "' found in BAM file header (header.ref_ids():",
349 header.ref_ids(),
350 ").")};
351 }
352 else if (id_it->second != ref_idx) // [unlikely]
353 {
354 throw format_error{detail::to_string("Reference id '",
355 string_buffer,
356 "' at position ",
357 ref_idx,
358 " does not correspond to the position ",
359 id_it->second,
360 " in the header (header.ref_ids():",
361 header.ref_ids(),
362 ").")};
363 }
364 else if (std::get<0>(header.ref_id_info[id_it->second]) != l_ref) // [unlikely]
365 {
366 throw format_error{"Provided reference has unequal length as specified in the header."};
367 }
368 }
369
370 header_was_read = true;
371
372 if (std::ranges::begin(stream_view) == std::ranges::end(stream_view)) // no records follow
373 return;
374 }
375
376 // read alignment record into buffer
377 // -------------------------------------------------------------------------------------------------------------
378 position_buffer = stream.tellg();
379
380 auto stream_it = detail::fast_istreambuf_iterator{*stream.rdbuf()};
381
382 alignment_record_core core;
383 std::string_view const core_str = stream_it.cache_bytes(sizeof(core));
384 std::ranges::copy(core_str, reinterpret_cast<char *>(&core));
385
386 if (core.refID >= static_cast<int32_t>(header.ref_ids().size()) || core.refID < -1) // [[unlikely]]
387 {
388 throw format_error{detail::to_string("Reference id index '",
389 core.refID,
390 "' is not in range of ",
391 "header.ref_ids(), which has size ",
392 header.ref_ids().size(),
393 ".")};
394 }
395 else if (core.refID > -1) // not unmapped
396 {
397 ref_id = core.refID; // field::ref_id
398 }
399
400 flag = core.flag; // field::flag
401 mapq = static_cast<uint8_t>(core.mapq); // field::mapq
402
403 if (core.pos > -1) // [[likely]]
404 ref_offset = core.pos; // field::ref_offset
405
406 if constexpr (!detail::decays_to_ignore_v<mate_type>) // field::mate
407 {
408 if (core.next_refID > -1)
409 get<0>(mate) = core.next_refID;
410
411 if (core.next_pos > -1) // [[likely]]
412 get<1>(mate) = core.next_pos;
413
414 get<2>(mate) = core.tlen;
415 }
416
417 // read id
418 // -------------------------------------------------------------------------------------------------------------
419 std::string_view record_str = stream_it.cache_bytes(core.block_size - (sizeof(alignment_record_core) - 4));
420 size_t considered_bytes{0};
421
422 if constexpr (!detail::decays_to_ignore_v<id_type>)
423 read_forward_range_field(record_str.substr(0, core.l_read_name - 1), id);
424
425 considered_bytes += core.l_read_name;
426
427 // read cigar string
428 // -------------------------------------------------------------------------------------------------------------
429 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
430 cigar_vector = parse_binary_cigar(record_str.substr(considered_bytes, core.n_cigar_op * 4));
431
432 considered_bytes += core.n_cigar_op * 4;
433
434 // read sequence
435 // -------------------------------------------------------------------------------------------------------------
436 if constexpr (!detail::decays_to_ignore_v<seq_type>)
437 {
438 size_t const number_of_bytes = (core.l_seq + 1) / 2;
439 std::string_view const seq_str = record_str.substr(considered_bytes, number_of_bytes);
440
441 seq.resize(
442 core.l_seq
443 + 1 /* reserve one more in case size is uneven. will be corrected */); // TODO: .resize() is not generic
444
445 using alph_t = std::ranges::range_value_t<decltype(seq)>;
446 constexpr auto from_dna16 = detail::convert_through_char_representation<dna16sam, alph_t>;
447
448 // 1 byte encodes two sequence characters
449 for (size_t i = 0, j = 0; i < number_of_bytes; ++i, j += 2)
450 {
451 seq[j] = from_dna16[to_rank(dna16sam{}.assign_rank(std::min(15, static_cast<uint8_t>(seq_str[i]) >> 4)))];
452 seq[j + 1] =
453 from_dna16[to_rank(dna16sam{}.assign_rank(std::min(15, static_cast<uint8_t>(seq_str[i]) & 0x0f)))];
454 }
455
456 seq.resize(core.l_seq); // remove extra letter
457 }
458
459 considered_bytes += (core.l_seq + 1) / 2;
460
461 // read qual string
462 // -------------------------------------------------------------------------------------------------------------
463 if constexpr (!detail::decays_to_ignore_v<qual_type>)
464 {
465 std::string_view const qual_str = record_str.substr(considered_bytes, core.l_seq);
466 qual.resize(core.l_seq); // TODO: this is not generic
467
468 for (int32_t i = 0; i < core.l_seq; ++i)
469 qual[i] = assign_char_to(static_cast<char>(qual_str[i] + 33), std::ranges::range_value_t<qual_type>{});
470 }
471
472 considered_bytes += core.l_seq;
473
474 // All remaining optional fields if any: SAM tags dictionary
475 // -------------------------------------------------------------------------------------------------------------
476 if constexpr (!detail::decays_to_ignore_v<tag_dict_type>)
477 read_sam_dict(record_str.substr(considered_bytes), tag_dict);
478
479 // DONE READING - wrap up
480 // -------------------------------------------------------------------------------------------------------------
481 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
482 {
483 int32_t const sc_front = soft_clipping_at_front(cigar_vector);
484
485 // Check cigar, if it matches ‘kSmN’, where ‘k’ equals lseq, ‘m’ is the reference sequence length in the
486 // alignment, and ‘S’ and ‘N’ are the soft-clipping and reference-clip, then the cigar string was larger
487 // than 65535 operations and is stored in the sam_tag_dictionary (tag GC).
488 if (core.l_seq != 0 && sc_front == core.l_seq)
489 {
490 if constexpr (detail::decays_to_ignore_v<tag_dict_type> | detail::decays_to_ignore_v<seq_type>)
491 { // maybe only throw in debug mode and otherwise return an empty alignment?
492 throw format_error{
493 detail::to_string("The cigar string '",
494 detail::get_cigar_string(cigar_vector),
495 "' suggests that the cigar string exceeded 65535 elements and was therefore ",
496 "stored in the optional field CG. You need to read in the field::tags and "
497 "field::seq in order to access this information.")};
498 }
499 else
500 {
501 auto it = tag_dict.find("CG"_tag);
502
503 if (it == tag_dict.end())
504 throw format_error{
505 detail::to_string("The cigar string '",
506 detail::get_cigar_string(cigar_vector),
507 "' suggests that the cigar string exceeded 65535 elements and was therefore ",
508 "stored in the optional field CG but this tag is not present in the given ",
509 "record.")};
510
511 cigar_vector = detail::parse_cigar(std::get<std::string>(it->second));
512 tag_dict.erase(it); // remove redundant information
513 }
514 }
515 }
516}
517
519template <typename stream_type,
520 typename header_type,
521 typename seq_type,
522 typename id_type,
523 typename ref_seq_type,
524 typename ref_id_type,
525 typename cigar_type,
526 typename qual_type,
527 typename mate_type,
528 typename tag_dict_type>
529inline void format_bam::write_alignment_record([[maybe_unused]] stream_type & stream,
530 [[maybe_unused]] sam_file_output_options const & options,
531 [[maybe_unused]] header_type && header,
532 [[maybe_unused]] seq_type && seq,
533 [[maybe_unused]] qual_type && qual,
534 [[maybe_unused]] id_type && id,
535 [[maybe_unused]] ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
536 [[maybe_unused]] ref_id_type && ref_id,
537 [[maybe_unused]] std::optional<int32_t> ref_offset,
538 [[maybe_unused]] cigar_type && cigar_vector,
539 [[maybe_unused]] sam_flag const flag,
540 [[maybe_unused]] uint8_t const mapq,
541 [[maybe_unused]] mate_type && mate,
542 [[maybe_unused]] tag_dict_type && tag_dict,
543 [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(e_value),
544 [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(bit_score))
545{
546 // ---------------------------------------------------------------------
547 // Type Requirements (as static asserts for user friendliness)
548 // ---------------------------------------------------------------------
549 static_assert((std::ranges::forward_range<seq_type> && alphabet<std::ranges::range_reference_t<seq_type>>),
550 "The seq object must be a std::ranges::forward_range over "
551 "letters that model seqan3::alphabet.");
552
553 static_assert((std::ranges::forward_range<id_type> && alphabet<std::ranges::range_reference_t<id_type>>),
554 "The id object must be a std::ranges::forward_range over "
555 "letters that model seqan3::alphabet.");
556
557 static_assert((std::ranges::forward_range<ref_seq_type> && alphabet<std::ranges::range_reference_t<ref_seq_type>>),
558 "The ref_seq object must be a std::ranges::forward_range "
559 "over letters that model seqan3::alphabet.");
560
561 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
562 {
563 static_assert((std::ranges::forward_range<ref_id_type> || std::integral<std::remove_reference_t<ref_id_type>>
564 || detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>, std::optional>),
565 "The ref_id object must be a std::ranges::forward_range "
566 "over letters that model seqan3::alphabet or an integral or a std::optional<integral>.");
567 }
568
569 static_assert((std::ranges::forward_range<qual_type> && alphabet<std::ranges::range_reference_t<qual_type>>),
570 "The qual object must be a std::ranges::forward_range "
571 "over letters that model seqan3::alphabet.");
572
574 "The mate object must be a std::tuple of size 3 with "
575 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
576 "2) a std::integral or std::optional<std::integral>, and "
577 "3) a std::integral.");
578
579 static_assert(
580 ((std::ranges::forward_range<decltype(std::get<0>(mate))>
581 || std::integral<std::remove_cvref_t<decltype(std::get<0>(mate))>>
582 || detail::is_type_specialisation_of_v<
583 std::remove_cvref_t<decltype(std::get<0>(mate))>,
584 std::optional>)&&(std::integral<std::remove_cvref_t<decltype(std::get<1>(mate))>>
585 || detail::is_type_specialisation_of_v<
586 std::remove_cvref_t<decltype(std::get<1>(mate))>,
587 std::optional>)&&std::integral<std::remove_cvref_t<decltype(std::get<2>(mate))>>),
588 "The mate object must be a std::tuple of size 3 with "
589 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
590 "2) a std::integral or std::optional<std::integral>, and "
591 "3) a std::integral.");
592
593 static_assert(std::same_as<std::remove_cvref_t<tag_dict_type>, sam_tag_dictionary>,
594 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
595
596 if constexpr (detail::decays_to_ignore_v<header_type>)
597 {
598 throw format_error{"BAM can only be written with a header but you did not provide enough information! "
599 "You can either construct the output file with reference names and reference length "
600 "information and the header will be created for you, or you can access the `header` member "
601 "directly."};
602 }
603 else
604 {
605 // ---------------------------------------------------------------------
606 // logical Requirements
607 // ---------------------------------------------------------------------
608
609 if (ref_offset.has_value() && (ref_offset.value() + 1) < 0)
610 throw format_error{detail::to_string("The ref_offset object must be >= -1 but is: ", ref_offset)};
611
612 detail::fast_ostreambuf_iterator stream_it{*stream.rdbuf()};
613
614 // ---------------------------------------------------------------------
615 // Writing the BAM Header on first call
616 // ---------------------------------------------------------------------
617 if (!header_was_written)
618 {
619 write_header(stream, options, header);
620 header_was_written = true;
621 }
622
623 // ---------------------------------------------------------------------
624 // Writing the Record
625 // ---------------------------------------------------------------------
626 int32_t ref_length{};
627
628 // Compute the ref_length from given cigar_vector which is needed to fill field `bin`.
629 if (!std::ranges::empty(cigar_vector))
630 {
631 int32_t dummy_seq_length{};
632 for (auto & [count, operation] : cigar_vector)
633 detail::update_alignment_lengths(ref_length, dummy_seq_length, operation.to_char(), count);
634 }
635
636 if (cigar_vector.size() >= (1 << 16)) // must be written into the sam tag CG
637 {
638 tag_dict["CG"_tag] = detail::get_cigar_string(cigar_vector);
639 cigar_vector.resize(2);
640 cigar_vector[0] = cigar{static_cast<uint32_t>(std::ranges::distance(seq)), 'S'_cigar_operation};
641 cigar_vector[1] = cigar{static_cast<uint32_t>(ref_length), 'N'_cigar_operation};
642 }
643
644 std::string tag_dict_binary_str = get_tag_dict_str(tag_dict);
645
646 // Compute the value for the l_read_name field for the bam record.
647 // This value is stored including a trailing `0`, so at most 254 characters of the id can be stored, since
648 // the data type to store the value is uint8_t and 255 is the maximal size.
649 // If the id is empty a '*' is written instead, i.e. the written id is never an empty string and stores at least
650 // 2 bytes.
651 uint8_t read_name_size = std::min<uint8_t>(std::ranges::distance(id), 254) + 1;
652 read_name_size += static_cast<uint8_t>(read_name_size == 1); // need size two since empty id is stored as '*'.
653
654 alignment_record_core core{/* block_size */ 0, // will be initialised right after
655 /* refID */ -1, // will be initialised right after
656 /* pos */ ref_offset.value_or(-1),
657 /* l_read_name */ read_name_size,
658 /* mapq */ mapq,
659 /* bin */ reg2bin(ref_offset.value_or(-1), ref_length),
660 /* n_cigar_op */ static_cast<uint16_t>(cigar_vector.size()),
661 /* flag */ flag,
662 /* l_seq */ static_cast<int32_t>(std::ranges::distance(seq)),
663 /* next_refId */ -1, // will be initialised right after
664 /* next_pos */ get<1>(mate).value_or(-1),
665 /* tlen */ get<2>(mate)};
666
667 auto check_and_assign_id_to = [&header]([[maybe_unused]] auto & id_source, [[maybe_unused]] auto & id_target)
668 {
669 using id_t = std::remove_reference_t<decltype(id_source)>;
670
671 if constexpr (!detail::decays_to_ignore_v<id_t>)
672 {
673 if constexpr (std::integral<id_t>)
674 {
675 id_target = id_source;
676 }
677 else if constexpr (detail::is_type_specialisation_of_v<id_t, std::optional>)
678 {
679 id_target = id_source.value_or(-1);
680 }
681 else
682 {
683 if (!std::ranges::empty(id_source)) // otherwise default will remain (-1)
684 {
685 auto id_it = header.ref_dict.end();
686
687 if constexpr (std::ranges::contiguous_range<decltype(id_source)>
688 && std::ranges::sized_range<decltype(id_source)>
689 && std::ranges::borrowed_range<decltype(id_source)>)
690 {
691 id_it = header.ref_dict.find(
692 std::span{std::ranges::data(id_source), std::ranges::size(id_source)});
693 }
694 else
695 {
696 using header_ref_id_type = std::remove_reference_t<decltype(header.ref_ids()[0])>;
697
698 static_assert(
699 implicitly_convertible_to<decltype(id_source), header_ref_id_type>,
700 "The ref_id type is not convertible to the reference id information stored in the "
701 "reference dictionary of the header object.");
702
703 id_it = header.ref_dict.find(id_source);
704 }
705
706 if (id_it == header.ref_dict.end())
707 {
708 throw format_error{detail::to_string("Unknown reference name '",
709 id_source,
710 "' could "
711 "not be found in BAM header ref_dict: ",
712 header.ref_dict,
713 ".")};
714 }
715
716 id_target = id_it->second;
717 }
718 }
719 }
720 };
721
722 // initialise core.refID
723 check_and_assign_id_to(ref_id, core.refID);
724
725 // initialise core.next_refID
726 check_and_assign_id_to(get<0>(mate), core.next_refID);
727
728 // initialise core.block_size
729 core.block_size = sizeof(core) - 4 /*block_size excluded*/ + core.l_read_name + core.n_cigar_op * 4
730 + // each int32_t has 4 bytes
731 (core.l_seq + 1) / 2 + // bitcompressed seq
732 core.l_seq + // quality string
733 tag_dict_binary_str.size();
734
735 std::ranges::copy_n(reinterpret_cast<char *>(&core), sizeof(core), stream_it); // write core
736
737 if (std::ranges::empty(id)) // empty id is represented as * for backward compatibility
738 stream_it = '*';
739 else
740 std::ranges::copy_n(std::ranges::begin(id), core.l_read_name - 1, stream_it); // write read id
741 stream_it = '\0';
742
743 // write cigar
744 for (auto [cigar_count, op] : cigar_vector)
745 {
746 cigar_count = cigar_count << 4;
747 cigar_count |= static_cast<int32_t>(char_to_sam_rank[op.to_char()]);
748 std::ranges::copy_n(reinterpret_cast<char *>(&cigar_count), 4, stream_it);
749 }
750
751 // write seq (bit-compressed: dna16sam characters go into one byte)
752 using alph_t = std::ranges::range_value_t<seq_type>;
753 constexpr auto to_dna16 = detail::convert_through_char_representation<alph_t, dna16sam>;
754
755 auto sit = std::ranges::begin(seq);
756 for (int32_t sidx = 0; sidx < ((core.l_seq & 1) ? core.l_seq - 1 : core.l_seq); ++sidx, ++sit)
757 {
758 uint8_t compressed_chr = to_rank(to_dna16[to_rank(*sit)]) << 4;
759 ++sidx, ++sit;
760 compressed_chr |= to_rank(to_dna16[to_rank(*sit)]);
761 stream_it = static_cast<char>(compressed_chr);
762 }
763
764 if (core.l_seq & 1) // write one more
765 stream_it = static_cast<char>(to_rank(to_dna16[to_rank(*sit)]) << 4);
766
767 // write qual
768 if (std::ranges::empty(qual))
769 {
770 auto v = views::repeat_n(static_cast<char>(255), core.l_seq);
771 std::ranges::copy_n(v.begin(), core.l_seq, stream_it);
772 }
773 else
774 {
775 if (std::ranges::distance(qual) != core.l_seq)
776 throw format_error{detail::to_string("Expected quality of same length as sequence with size ",
777 core.l_seq,
778 ". Got quality with size ",
779 std::ranges::distance(qual),
780 " instead.")};
781
782 auto v = qual
783 | std::views::transform(
784 [](auto chr)
785 {
786 return static_cast<char>(to_rank(chr));
787 });
788 std::ranges::copy_n(v.begin(), core.l_seq, stream_it);
789 }
790
791 // write optional fields
792 stream << tag_dict_binary_str;
793 } // if constexpr (!detail::decays_to_ignore_v<header_type>)
794}
795
797template <typename stream_t, typename header_type>
798inline void format_bam::write_header(stream_t & stream, sam_file_output_options const & options, header_type & header)
799{
800 if constexpr (detail::decays_to_ignore_v<header_type>)
801 {
802 throw format_error{"BAM can only be written with a header but you did not provide enough information! "
803 "You can either construct the output file with reference names and reference length "
804 "information and the header will be created for you, or you can access the `header` member "
805 "directly."};
806 }
807 else
808 {
809 detail::fast_ostreambuf_iterator stream_it{*stream.rdbuf()};
810
811 std::ranges::copy_n("BAM\1", 4, stream_it); // Do not copy the null terminator
812
813 // write SAM header to temporary stream first to query its size.
815 detail::format_sam_base::write_header(os, options, header);
816#if SEQAN3_WORKAROUND_GCC_NO_CXX11_ABI
817 int32_t const l_text{static_cast<int32_t>(os.str().size())};
818#else
819 int32_t const l_text{static_cast<int32_t>(os.view().size())};
820#endif
821 std::ranges::copy_n(reinterpret_cast<char const *>(&l_text), 4, stream_it); // write text length
822
823#if SEQAN3_WORKAROUND_GCC_NO_CXX11_ABI
824 auto header_view = os.str();
825#else
826 auto header_view = os.view();
827#endif
828 std::ranges::copy(header_view, stream_it);
829
830 assert(header.ref_ids().size() < (1ull << 32));
831 int32_t const n_ref{static_cast<int32_t>(header.ref_ids().size())};
832 std::ranges::copy_n(reinterpret_cast<char const *>(&n_ref), 4, stream_it); // write number of references
833
834 for (int32_t ridx = 0; ridx < n_ref; ++ridx)
835 {
836 assert(header.ref_ids()[ridx].size() + 1 < (1ull << 32));
837 int32_t const l_name{static_cast<int32_t>(header.ref_ids()[ridx].size()) + 1}; // plus null character
838 std::ranges::copy_n(reinterpret_cast<char const *>(&l_name), 4, stream_it); // write l_name
839 // write reference name:
840 std::ranges::copy(header.ref_ids()[ridx], stream_it);
841 stream_it = '\0'; // ++ is not necessary for ostream_iterator
842 // write reference sequence length:
843 std::ranges::copy_n(reinterpret_cast<char *>(&get<0>(header.ref_id_info[ridx])), 4, stream_it);
844 }
845 }
846}
847
866template <typename value_type>
867inline int32_t format_bam::read_sam_dict_vector(seqan3::detail::sam_tag_variant & variant,
868 std::string_view const str,
869 value_type const & SEQAN3_DOXYGEN_ONLY(value))
870{
871 auto it = str.begin();
872
873 // Read vector size from string_view and advance `it`.
874 int32_t const vector_size = [&]()
875 {
876 int32_t size{};
877 read_integral_byte_field(std::string_view{it, str.end()}, size);
878 it += sizeof(size);
879 return size;
880 }();
881
882 int32_t bytes_left{vector_size};
883
884 std::vector<value_type> tmp_vector;
885 tmp_vector.reserve(vector_size);
886
887 value_type tmp{};
888
889 while (bytes_left > 0)
890 {
891 if constexpr (std::integral<value_type>)
892 read_integral_byte_field(std::string_view{it, str.end()}, tmp);
893 else if constexpr (std::same_as<value_type, float>)
894 read_float_byte_field(std::string_view{it, str.end()}, tmp);
895 else
896 static_assert(std::is_same_v<value_type, void>, "format_bam::read_sam_dict_vector: unsupported value_type");
897
898 it += sizeof(tmp);
899 tmp_vector.push_back(std::move(tmp));
900 --bytes_left;
901 }
902
903 variant = std::move(tmp_vector);
904
905 return vector_size;
906}
907
923inline void format_bam::read_sam_dict(std::string_view const tag_str, sam_tag_dictionary & target)
924{
925 /* Every BAM tag has the format "[TAG][TYPE_ID][VALUE]", where TAG is a two letter
926 name tag which is converted to a unique integer identifier and TYPE_ID is one character in [A,i,Z,H,B,f]
927 describing the type for the upcoming VALUES. If TYPE_ID=='B' it signals an array of
928 VALUE's and the inner value type is identified by the next character, one of [cCsSiIf], followed
929 by the length (int32_t) of the array, followed by the values.
930 */
931 auto it = tag_str.begin();
932
933 // Deduces int_t from passed argument.
934 auto parse_integer_into_target = [&]<std::integral int_t>(uint16_t const tag, int_t)
935 {
936 int_t tmp{};
937 read_integral_byte_field(std::string_view{it, tag_str.end()}, tmp);
938 target[tag] = static_cast<int32_t>(tmp); // readable sam format only allows int32_t
939 it += sizeof(tmp);
940 };
941
942 // Deduces array_value_t from passed argument.
943 auto parse_array_into_target = [&]<arithmetic array_value_t>(uint16_t const tag, array_value_t)
944 {
945 int32_t const count = read_sam_dict_vector(target[tag], std::string_view{it, tag_str.end()}, array_value_t{});
946 it += sizeof(int32_t) /*length is stored within the vector*/ + sizeof(array_value_t) * count;
947 };
948
949 // Read uint16_t from string_view and advance `it`.
950 auto parse_tag = [&]()
951 {
952 uint16_t tag = static_cast<uint16_t>(*it) << 8;
953 ++it; // skip char read before
954 tag |= static_cast<uint16_t>(*it);
955 ++it; // skip char read before
956 return tag;
957 };
958
959 while (it != tag_str.end())
960 {
961 uint16_t const tag = parse_tag();
962
963 char const type_id{*it};
964 ++it; // skip char read before
965
966 switch (type_id)
967 {
968 case 'A': // char
969 {
970 target[tag] = *it;
971 ++it; // skip char that has been read
972 break;
973 }
974 // all integer sizes are possible
975 case 'c': // int8_t
976 {
977 parse_integer_into_target(tag, int8_t{});
978 break;
979 }
980 case 'C': // uint8_t
981 {
982 parse_integer_into_target(tag, uint8_t{});
983 break;
984 }
985 case 's': // int16_t
986 {
987 parse_integer_into_target(tag, int16_t{});
988 break;
989 }
990 case 'S': // uint16_t
991 {
992 parse_integer_into_target(tag, uint16_t{});
993 break;
994 }
995 case 'i': // int32_t
996 {
997 parse_integer_into_target(tag, int32_t{});
998 break;
999 }
1000 case 'I': // uint32_t
1001 {
1002 parse_integer_into_target(tag, uint32_t{});
1003 break;
1004 }
1005 case 'f': // float
1006 {
1007 float tmp{};
1008 read_float_byte_field(std::string_view{it, tag_str.end()}, tmp);
1009 target[tag] = tmp;
1010 it += sizeof(float);
1011 break;
1012 }
1013 case 'Z': // string
1014 {
1015 std::string const v{static_cast<char const *>(it)}; // parses until '\0'
1016 it += v.size() + 1;
1017 target[tag] = std::move(v);
1018 break;
1019 }
1020 case 'H': // byte array, represented as null-terminated string; specification requires even number of bytes
1021 {
1022 std::string_view const str{static_cast<char const *>(it)}; // parses until '\0'
1023
1024 std::vector<std::byte> tmp_vector{};
1025 // std::from_chars cannot directly parse into a std::byte
1026 uint8_t dummy_byte{};
1027
1028 if (str.size() % 2 != 0)
1029 throw format_error{"[CORRUPTED BAM FILE] Hexadecimal tag must have even number of digits."};
1030
1031 // H encodes bytes in a hexadecimal format. Two hex values are stored for each byte as characters.
1032 // E.g., '1' and 'A' need one byte each and are read as `\x1A`, which is 27 in decimal.
1033 for (auto hex_begin = str.begin(), hex_end = str.begin() + 2; hex_begin != str.end();
1034 hex_begin += 2, hex_end += 2)
1035 {
1036 auto res = std::from_chars(hex_begin, hex_end, dummy_byte, 16);
1037
1038 if (res.ec == std::errc::invalid_argument)
1039 throw format_error{std::string("[CORRUPTED BAM FILE] The string '")
1040 + std::string(hex_begin, hex_end) + "' could not be cast into type uint8_t."};
1041
1042 if (res.ec == std::errc::result_out_of_range)
1043 throw format_error{std::string("[CORRUPTED BAM FILE] Casting '") + std::string(str)
1044 + "' into type uint8_t would cause an overflow."};
1045
1046 tmp_vector.push_back(std::byte{dummy_byte});
1047 }
1048
1049 target[tag] = std::move(tmp_vector);
1050
1051 it += str.size() + 1;
1052
1053 break;
1054 }
1055 case 'B': // Array. Value type depends on second char [cCsSiIf]
1056 {
1057 char array_value_type_id = *it;
1058 ++it; // skip char read before
1059
1060 switch (array_value_type_id)
1061 {
1062 case 'c': // int8_t
1063 parse_array_into_target(tag, int8_t{});
1064 break;
1065 case 'C': // uint8_t
1066 parse_array_into_target(tag, uint8_t{});
1067 break;
1068 case 's': // int16_t
1069 parse_array_into_target(tag, int16_t{});
1070 break;
1071 case 'S': // uint16_t
1072 parse_array_into_target(tag, uint16_t{});
1073 break;
1074 case 'i': // int32_t
1075 parse_array_into_target(tag, int32_t{});
1076 break;
1077 case 'I': // uint32_t
1078 parse_array_into_target(tag, uint32_t{});
1079 break;
1080 case 'f': // float
1081 parse_array_into_target(tag, float{});
1082 break;
1083 default:
1084 throw format_error{detail::to_string("The first character in the numerical id of a SAM tag ",
1085 "must be one of [cCsSiIf] but '",
1086 array_value_type_id,
1087 "' was given.")};
1088 }
1089 break;
1090 }
1091 default:
1092 throw format_error{detail::to_string("The second character in the numerical id of a "
1093 "SAM tag must be one of [A,i,Z,H,B,f] but '",
1094 type_id,
1095 "' was given.")};
1096 }
1097 }
1098}
1099
1105inline std::vector<cigar> format_bam::parse_binary_cigar(std::string_view const cigar_str) const
1106{
1107 // The cigar operation is encoded in 4 bits.
1108 constexpr std::array<char, 16>
1109 cigar_operation_mapping{'M', 'I', 'D', 'N', 'S', 'H', 'P', '=', 'X', '*', '*', '*', '*', '*', '*', '*'};
1110 // The rightmost 4 bits encode the operation, the other bits encode the count.
1111 constexpr uint32_t cigar_operation_mask = 0x0f; // rightmost 4 bits are set to one
1112
1113 std::vector<cigar> cigar_vector{};
1114 char operation{'\0'};
1115 uint32_t count{};
1116 uint32_t operation_and_count{}; // In BAM, operation and count values are stored within one 32 bit integer.
1117
1118 assert(cigar_str.size() % 4 == 0); // One cigar letter is stored in 4 bytes (uint32_t).
1119
1120 for (auto it = cigar_str.begin(); it != cigar_str.end(); it += sizeof(operation_and_count))
1121 {
1122 std::memcpy(&operation_and_count, it, sizeof(operation_and_count));
1123 operation = cigar_operation_mapping[operation_and_count & cigar_operation_mask];
1124 count = operation_and_count >> 4;
1125
1126 cigar_vector.emplace_back(count, seqan3::assign_char_strictly_to(operation, cigar::operation{}));
1127 }
1128
1129 return cigar_vector;
1130}
1131
1135inline std::string format_bam::get_tag_dict_str(sam_tag_dictionary const & tag_dict)
1136{
1137 std::string result{};
1138
1139 auto stream_variant_fn = [&result](auto && arg) // helper to print a std::variant
1140 {
1141 // T is either char, int32_t, float, std::string, or a std::vector<some int>
1142 using T = std::remove_cvref_t<decltype(arg)>;
1143
1144 if constexpr (std::same_as<T, int32_t>)
1145 {
1146 // always choose the smallest possible representation [cCsSiI]
1147 size_t const absolute_arg = std::abs(arg);
1148 auto n = std::countr_zero(std::bit_ceil(absolute_arg + 1u) >> 1u) / 8u;
1149 bool const negative = arg < 0;
1150 n = n * n + 2 * negative; // for switch case order
1151
1152 switch (n)
1153 {
1154 case 0:
1155 {
1156 result[result.size() - 1] = 'C';
1157 result.append(reinterpret_cast<char const *>(&arg), 1);
1158 break;
1159 }
1160 case 1:
1161 {
1162 result[result.size() - 1] = 'S';
1163 result.append(reinterpret_cast<char const *>(&arg), 2);
1164 break;
1165 }
1166 case 2:
1167 {
1168 result[result.size() - 1] = 'c';
1169 int8_t tmp = static_cast<int8_t>(arg);
1170 result.append(reinterpret_cast<char const *>(&tmp), 1);
1171 break;
1172 }
1173 case 3:
1174 {
1175 result[result.size() - 1] = 's';
1176 int16_t tmp = static_cast<int16_t>(arg);
1177 result.append(reinterpret_cast<char const *>(&tmp), 2);
1178 break;
1179 }
1180 default:
1181 {
1182 result.append(reinterpret_cast<char const *>(&arg), 4); // always i
1183 break;
1184 }
1185 }
1186 }
1187 else if constexpr (std::same_as<T, std::string>)
1188 {
1189 result.append(reinterpret_cast<char const *>(arg.data()), arg.size() + 1 /*+ null character*/);
1190 }
1191 else if constexpr (!std::ranges::range<T>) // char, float
1192 {
1193 result.append(reinterpret_cast<char const *>(&arg), sizeof(arg));
1194 }
1195 else // std::vector of some arithmetic_type type
1196 {
1197 int32_t sz{static_cast<int32_t>(arg.size())};
1198 result.append(reinterpret_cast<char *>(&sz), 4);
1199 result.append(reinterpret_cast<char const *>(arg.data()),
1200 arg.size() * sizeof(std::ranges::range_value_t<T>));
1201 }
1202 };
1203
1204 for (auto & [tag, variant] : tag_dict)
1205 {
1206 result.push_back(static_cast<char>(tag / 256));
1207 result.push_back(static_cast<char>(tag % 256));
1208
1209 result.push_back(detail::sam_tag_type_char[variant.index()]);
1210
1211 if (!is_char<'\0'>(detail::sam_tag_type_char_extra[variant.index()]))
1212 result.push_back(detail::sam_tag_type_char_extra[variant.index()]);
1213
1214 std::visit(stream_variant_fn, variant);
1215 }
1216
1217 return result;
1218}
1219
1220} // namespace seqan3
T begin(T... args)
T bit_ceil(T... args)
constexpr derived_type & assign_rank(rank_type const c) noexcept
Assign from a numeric value.
Definition alphabet_base.hpp:184
The seqan3::cigar semialphabet pairs a counter with a seqan3::cigar::operation letter.
Definition alphabet/cigar/cigar.hpp:57
exposition_only::cigar_operation operation
The (extended) cigar operation alphabet of M,D,I,H,N,P,S,X,=.
Definition alphabet/cigar/cigar.hpp:93
A 16 letter DNA alphabet, containing all IUPAC symbols minus the gap and plus an equality sign ('=').
Definition dna16sam.hpp:45
The BAM format.
Definition format_bam.hpp:48
void read_alignment_record(stream_type &stream, sam_file_input_options< seq_legal_alph_type > const &options, ref_seqs_type &ref_seqs, sam_file_header< ref_ids_type > &header, stream_pos_type &position_buffer, seq_type &seq, qual_type &qual, id_type &id, ref_seq_type &ref_seq, ref_id_type &ref_id, ref_offset_type &ref_offset, cigar_type &cigar_vector, flag_type &flag, mapq_type &mapq, mate_type &mate, tag_dict_type &tag_dict, e_value_type &e_value, bit_score_type &bit_score)
Read from the specified stream and back-insert into the given field buffers.
Definition format_bam.hpp:263
format_bam()=default
Defaulted.
void write_alignment_record(stream_type &stream, sam_file_output_options const &options, header_type &&header, seq_type &&seq, qual_type &&qual, id_type &&id, ref_seq_type &&ref_seq, ref_id_type &&ref_id, std::optional< int32_t > ref_offset, cigar_type &&cigar_vector, sam_flag const flag, uint8_t const mapq, mate_type &&mate, tag_dict_type &&tag_dict, double e_value, double bit_score)
Write the given fields to the specified stream.
Definition format_bam.hpp:529
format_bam & operator=(format_bam &&)=default
Defaulted.
format_bam & operator=(format_bam const &)=default
Defaulted.
format_bam(format_bam &&)=default
Defaulted.
~format_bam()=default
Defaulted.
format_bam(format_bam const &)=default
Defaulted.
static std::vector< std::string > file_extensions
The valid file extensions for this format; note that you can modify this value.
Definition format_bam.hpp:64
Stores the header information of SAM/BAM files.
Definition header.hpp:46
ref_ids_type & ref_ids()
The range of reference ids.
Definition header.hpp:140
std::unordered_map< key_type, int32_t, key_hasher, detail::view_equality_fn > ref_dict
The mapping of reference id to position in the ref_ids() range and the ref_id_info range.
Definition header.hpp:179
std::vector< std::tuple< int32_t, std::string > > ref_id_info
The reference information. (used by the SAM/BAM format)
Definition header.hpp:176
The SAM tag dictionary class that stores all optional SAM fields.
Definition sam_tag_dictionary.hpp:327
T copy(T... args)
T copy_n(T... args)
T countr_zero(T... args)
T data(T... args)
Provides seqan3::dna16sam.
T emplace_back(T... args)
T end(T... args)
T equal(T... args)
Provides seqan3::detail::fast_ostreambuf_iterator.
Provides the seqan3::format_sam_base that can be inherited from.
T from_chars(T... args)
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition alphabet/concept.hpp:521
constexpr auto assign_char_strictly_to
Assign a character to an alphabet object, throw if the character is not valid.
Definition alphabet/concept.hpp:731
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition alphabet/concept.hpp:152
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition sam_flag.hpp:73
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ ref_seq
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ bit_score
The bit score (statistical significance indicator), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ id
The identifier, usually a string.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr auto is_char
Checks whether a given letter is the same as the template non-type argument.
Definition predicate.hpp:60
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition type_pack/traits.hpp:161
constexpr size_t size
The size of a type pack.
Definition type_pack/traits.hpp:143
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition repeat_n.hpp:88
Provides the seqan3::sam_file_header class.
T index(T... args)
The generic alphabet concept that covers most data types used in ranges.
A type that satisfies std::is_arithmetic_v<t>.
Checks whether from can be implicityly converted to to.
Whether a type behaves like a tuple.
Auxiliary functions for the SAM IO.
Provides seqan3::detail::istreambuf.
T memcpy(T... args)
T min(T... args)
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
Provides seqan3::debug_stream and related types.
T push_back(T... args)
T reserve(T... args)
T resize(T... args)
Provides seqan3::sam_file_input_options.
Provides helper data structures for the seqan3::sam_file_output.
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
T size(T... args)
Provides seqan3::views::slice.
T str(T... args)
Thrown if information given to output format didn't match expectations.
Definition io/exception.hpp:88
The options type defines various option members that influence the behaviour of all or some formats.
Definition sam_file/input_options.hpp:26
The options type defines various option members that influence the behavior of all or some formats.
Definition sam_file/output_options.hpp:23
T substr(T... args)
Provides seqan3::views::take_exactly and seqan3::views::take_exactly_or_throw.
T visit(T... args)
Hide me