SeqAn3 3.4.0-rc.4
The Modern C++ library for sequence analysis.
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages Concepts
format_bam.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <bit>
13#include <cstring>
14#include <iterator>
15#include <ranges>
16#include <string>
17#include <vector>
18
32
33namespace seqan3
34{
35
48class format_bam : private detail::format_sam_base
49{
50public:
54 // string_buffer is of type std::string and has some problems with pre-C++11 ABI
55 format_bam() = default;
56 format_bam(format_bam const &) = default;
57 format_bam & operator=(format_bam const &) = default;
58 format_bam(format_bam &&) = default;
60 ~format_bam() = default;
61
63
66
67protected:
68 template <typename stream_type, // constraints checked by file
69 typename seq_legal_alph_type,
70 typename ref_seqs_type,
71 typename ref_ids_type,
72 typename stream_pos_type,
73 typename seq_type,
74 typename id_type,
75 typename ref_seq_type,
76 typename ref_id_type,
77 typename ref_offset_type,
78 typename cigar_type,
79 typename flag_type,
80 typename mapq_type,
81 typename qual_type,
82 typename mate_type,
83 typename tag_dict_type,
84 typename e_value_type,
85 typename bit_score_type>
86 void read_alignment_record(stream_type & stream,
87 sam_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
88 ref_seqs_type & ref_seqs,
90 stream_pos_type & position_buffer,
91 seq_type & seq,
92 qual_type & qual,
93 id_type & id,
94 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
95 ref_id_type & ref_id,
96 ref_offset_type & ref_offset,
97 cigar_type & cigar_vector,
98 flag_type & flag,
99 mapq_type & mapq,
100 mate_type & mate,
101 tag_dict_type & tag_dict,
102 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
103 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score));
104
105 template <typename stream_type,
106 typename header_type,
107 typename seq_type,
108 typename id_type,
109 typename ref_seq_type,
110 typename ref_id_type,
111 typename cigar_type,
112 typename qual_type,
113 typename mate_type,
114 typename tag_dict_type>
115 void write_alignment_record([[maybe_unused]] stream_type & stream,
116 [[maybe_unused]] sam_file_output_options const & options,
117 [[maybe_unused]] header_type && header,
118 [[maybe_unused]] seq_type && seq,
119 [[maybe_unused]] qual_type && qual,
120 [[maybe_unused]] id_type && id,
121 [[maybe_unused]] ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
122 [[maybe_unused]] ref_id_type && ref_id,
123 [[maybe_unused]] std::optional<int32_t> ref_offset,
124 [[maybe_unused]] cigar_type && cigar_vector,
125 [[maybe_unused]] sam_flag const flag,
126 [[maybe_unused]] uint8_t const mapq,
127 [[maybe_unused]] mate_type && mate,
128 [[maybe_unused]] tag_dict_type && tag_dict,
129 [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(e_value),
130 [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(bit_score));
131
133 template <typename stream_t, typename header_type>
134 void write_header(stream_t & stream, sam_file_output_options const & options, header_type & header);
135
136private:
138 bool header_was_read{false};
139
141 std::string string_buffer{};
142
144 struct alignment_record_core
145 { // naming corresponds to official SAM/BAM specifications
146 int32_t block_size;
147 int32_t refID;
148 int32_t pos;
149 uint32_t l_read_name : 8;
150 uint32_t mapq : 8;
151 uint32_t bin : 16;
152 uint32_t n_cigar_op : 16;
153 sam_flag flag;
154 int32_t l_seq;
155 int32_t next_refID;
156 int32_t next_pos;
157 int32_t tlen;
158 };
159
160 static_assert(sizeof(alignment_record_core) == 36);
161
163 static constexpr std::array<uint8_t, 256> char_to_sam_rank{[]() constexpr
164 {
166
167 using index_t = std::make_unsigned_t<char>;
168
169 // ret['M'] = 0; set anyway by initialization
170 ret[static_cast<index_t>('I')] = 1;
171 ret[static_cast<index_t>('D')] = 2;
172 ret[static_cast<index_t>('N')] = 3;
173 ret[static_cast<index_t>('S')] = 4;
174 ret[static_cast<index_t>('H')] = 5;
175 ret[static_cast<index_t>('P')] = 6;
176 ret[static_cast<index_t>('=')] = 7;
177 ret[static_cast<index_t>('X')] = 8;
178
179 return ret;
180 }()};
181
183 static uint16_t reg2bin(int32_t beg, int32_t end) noexcept
184 {
185 --end;
186 if (beg >> 14 == end >> 14)
187 return ((1 << 15) - 1) / 7 + (beg >> 14);
188 if (beg >> 17 == end >> 17)
189 return ((1 << 12) - 1) / 7 + (beg >> 17);
190 if (beg >> 20 == end >> 20)
191 return ((1 << 9) - 1) / 7 + (beg >> 20);
192 if (beg >> 23 == end >> 23)
193 return ((1 << 6) - 1) / 7 + (beg >> 23);
194 if (beg >> 26 == end >> 26)
195 return ((1 << 3) - 1) / 7 + (beg >> 26);
196 return 0;
197 }
198
205 template <typename stream_view_type, std::integral number_type>
206 void read_integral_byte_field(stream_view_type && stream_view, number_type & target)
207 {
208 std::ranges::copy_n(std::ranges::begin(stream_view), sizeof(target), reinterpret_cast<char *>(&target));
209 }
210
212 template <std::integral number_type>
213 void read_integral_byte_field(std::string_view const str, number_type & target)
214 {
215 std::memcpy(&target, str.data(), sizeof(target));
216 }
217
223 template <typename stream_view_type>
224 void read_float_byte_field(stream_view_type && stream_view, float & target)
225 {
226 std::ranges::copy_n(std::ranges::begin(stream_view), sizeof(int32_t), reinterpret_cast<char *>(&target));
227 }
228
229 template <typename value_type>
230 int32_t read_sam_dict_vector(seqan3::detail::sam_tag_variant & variant,
231 std::string_view const str,
232 value_type const & SEQAN3_DOXYGEN_ONLY(value));
233
234 void read_sam_dict(std::string_view const tag_str, sam_tag_dictionary & target);
235
236 std::vector<cigar> parse_binary_cigar(std::string_view const cigar_str) const;
237
238 static std::string get_tag_dict_str(sam_tag_dictionary const & tag_dict);
239};
240
242template <typename stream_type, // constraints checked by file
243 typename seq_legal_alph_type,
244 typename ref_seqs_type,
245 typename ref_ids_type,
246 typename stream_pos_type,
247 typename seq_type,
248 typename id_type,
249 typename ref_seq_type,
250 typename ref_id_type,
251 typename ref_offset_type,
252 typename cigar_type,
253 typename flag_type,
254 typename mapq_type,
255 typename qual_type,
256 typename mate_type,
257 typename tag_dict_type,
258 typename e_value_type,
259 typename bit_score_type>
260inline void
262 sam_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
263 ref_seqs_type & ref_seqs,
265 stream_pos_type & position_buffer,
266 seq_type & seq,
267 qual_type & qual,
268 id_type & id,
269 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
270 ref_id_type & ref_id,
271 ref_offset_type & ref_offset,
272 cigar_type & cigar_vector,
273 flag_type & flag,
274 mapq_type & mapq,
275 mate_type & mate,
276 tag_dict_type & tag_dict,
277 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
278 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
279{
280 static_assert(detail::decays_to_ignore_v<ref_offset_type>
281 || detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
282 "The ref_offset must be a specialisation of std::optional.");
283
284 static_assert(detail::decays_to_ignore_v<mapq_type> || std::same_as<mapq_type, uint8_t>,
285 "The type of field::mapq must be uint8_t.");
286
287 static_assert(detail::decays_to_ignore_v<flag_type> || std::same_as<flag_type, sam_flag>,
288 "The type of field::flag must be seqan3::sam_flag.");
289
290 auto stream_view = seqan3::detail::istreambuf(stream);
291
292 // Header
293 // -------------------------------------------------------------------------------------------------------------
294 if (!header_was_read)
295 {
296 // magic BAM string
297 if (!std::ranges::equal(stream_view | detail::take_exactly_or_throw(4), std::string_view{"BAM\1"}))
298 throw format_error{"File is not in BAM format."};
299
300 int32_t l_text{}; // length of header text including \0 character
301 int32_t n_ref{}; // number of reference sequences
302 int32_t l_name{}; // 1 + length of reference name including \0 character
303 int32_t l_ref{}; // length of reference sequence
304
305 read_integral_byte_field(stream_view, l_text);
306
307 if (l_text > 0) // header text is present
308 read_header(stream_view | detail::take_exactly_or_throw(l_text), header, ref_seqs);
309
310 read_integral_byte_field(stream_view, n_ref);
311
312 for (int32_t ref_idx = 0; ref_idx < n_ref; ++ref_idx)
313 {
314 read_integral_byte_field(stream_view, l_name);
315
316 string_buffer.resize(l_name - 1);
318 l_name - 1,
319 string_buffer.data()); // copy without \0 character
320 ++std::ranges::begin(stream_view); // skip \0 character
321
322 read_integral_byte_field(stream_view, l_ref);
323
324 if constexpr (detail::decays_to_ignore_v<ref_seqs_type>) // no reference information given
325 {
326 // If there was no header text, we parse reference sequences block as header information
327 if (l_text == 0)
328 {
329 auto & reference_ids = header.ref_ids();
330 // put the length of the reference sequence into ref_id_info
331 header.ref_id_info.emplace_back(l_ref, "");
332 // put the reference name into reference_ids
333 reference_ids.push_back(string_buffer);
334 // assign the reference name an ascending reference id (starts at index 0).
335 header.ref_dict.emplace(reference_ids.back(), reference_ids.size() - 1);
336 continue;
337 }
338 }
339
340 auto id_it = header.ref_dict.find(string_buffer);
341
342 // sanity checks of reference information to existing header object:
343 if (id_it == header.ref_dict.end()) // [unlikely]
344 {
345 throw format_error{detail::to_string("Unknown reference name '" + string_buffer
346 + "' found in BAM file header (header.ref_ids():",
347 header.ref_ids(),
348 ").")};
349 }
350 else if (id_it->second != ref_idx) // [unlikely]
351 {
352 throw format_error{detail::to_string("Reference id '",
353 string_buffer,
354 "' at position ",
355 ref_idx,
356 " does not correspond to the position ",
357 id_it->second,
358 " in the header (header.ref_ids():",
359 header.ref_ids(),
360 ").")};
361 }
362 else if (std::get<0>(header.ref_id_info[id_it->second]) != l_ref) // [unlikely]
363 {
364 throw format_error{"Provided reference has unequal length as specified in the header."};
365 }
366 }
367
368 header_was_read = true;
369
370 if (std::ranges::begin(stream_view) == std::ranges::end(stream_view)) // no records follow
371 return;
372 }
373
374 // read alignment record into buffer
375 // -------------------------------------------------------------------------------------------------------------
376 position_buffer = stream.tellg();
377
378 auto stream_it = detail::fast_istreambuf_iterator{*stream.rdbuf()};
379
380 alignment_record_core core;
381 std::string_view const core_str = stream_it.cache_bytes(sizeof(core));
382 std::ranges::copy(core_str, reinterpret_cast<char *>(&core));
383
384 if (core.refID >= static_cast<int32_t>(header.ref_ids().size()) || core.refID < -1) // [[unlikely]]
385 {
386 throw format_error{detail::to_string("Reference id index '",
387 core.refID,
388 "' is not in range of ",
389 "header.ref_ids(), which has size ",
390 header.ref_ids().size(),
391 ".")};
392 }
393 else if (core.refID > -1) // not unmapped
394 {
395 ref_id = core.refID; // field::ref_id
396 }
397
398 flag = core.flag; // field::flag
399 mapq = static_cast<uint8_t>(core.mapq); // field::mapq
400
401 if (core.pos > -1) // [[likely]]
402 ref_offset = core.pos; // field::ref_offset
403
404 if constexpr (!detail::decays_to_ignore_v<mate_type>) // field::mate
405 {
406 if (core.next_refID > -1)
407 get<0>(mate) = core.next_refID;
408
409 if (core.next_pos > -1) // [[likely]]
410 get<1>(mate) = core.next_pos;
411
412 get<2>(mate) = core.tlen;
413 }
414
415 // read id
416 // -------------------------------------------------------------------------------------------------------------
417 std::string_view record_str = stream_it.cache_bytes(core.block_size - (sizeof(alignment_record_core) - 4));
418 size_t considered_bytes{0};
419
420 if constexpr (!detail::decays_to_ignore_v<id_type>)
421 read_forward_range_field(record_str.substr(0, core.l_read_name - 1), id);
422
423 considered_bytes += core.l_read_name;
424
425 // read cigar string
426 // -------------------------------------------------------------------------------------------------------------
427 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
428 cigar_vector = parse_binary_cigar(record_str.substr(considered_bytes, core.n_cigar_op * 4));
429
430 considered_bytes += core.n_cigar_op * 4;
431
432 // read sequence
433 // -------------------------------------------------------------------------------------------------------------
434 if constexpr (!detail::decays_to_ignore_v<seq_type>)
435 {
436 size_t const number_of_bytes = (core.l_seq + 1) / 2;
437 std::string_view const seq_str = record_str.substr(considered_bytes, number_of_bytes);
438
439 seq.resize(
440 core.l_seq
441 + 1 /* reserve one more in case size is uneven. will be corrected */); // TODO: .resize() is not generic
442
443 using alph_t = std::ranges::range_value_t<decltype(seq)>;
444 constexpr auto from_dna16 = detail::convert_through_char_representation<dna16sam, alph_t>;
445
446 // 1 byte encodes two sequence characters
447 for (size_t i = 0, j = 0; i < number_of_bytes; ++i, j += 2)
448 {
449 seq[j] = from_dna16[to_rank(dna16sam{}.assign_rank(std::min(15, static_cast<uint8_t>(seq_str[i]) >> 4)))];
450 seq[j + 1] =
451 from_dna16[to_rank(dna16sam{}.assign_rank(std::min(15, static_cast<uint8_t>(seq_str[i]) & 0x0f)))];
452 }
453
454 seq.resize(core.l_seq); // remove extra letter
455 }
456
457 considered_bytes += (core.l_seq + 1) / 2;
458
459 // read qual string
460 // -------------------------------------------------------------------------------------------------------------
461 if constexpr (!detail::decays_to_ignore_v<qual_type>)
462 {
463 std::string_view const qual_str = record_str.substr(considered_bytes, core.l_seq);
464 qual.resize(core.l_seq); // TODO: this is not generic
465
466 for (int32_t i = 0; i < core.l_seq; ++i)
467 qual[i] = assign_char_to(static_cast<char>(qual_str[i] + 33), std::ranges::range_value_t<qual_type>{});
468 }
469
470 considered_bytes += core.l_seq;
471
472 // All remaining optional fields if any: SAM tags dictionary
473 // -------------------------------------------------------------------------------------------------------------
474 if constexpr (!detail::decays_to_ignore_v<tag_dict_type>)
475 read_sam_dict(record_str.substr(considered_bytes), tag_dict);
476
477 // DONE READING - wrap up
478 // -------------------------------------------------------------------------------------------------------------
479 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
480 {
481 int32_t const sc_front = soft_clipping_at_front(cigar_vector);
482
483 // Check cigar, if it matches ‘kSmN’, where ‘k’ equals lseq, ‘m’ is the reference sequence length in the
484 // alignment, and ‘S’ and ‘N’ are the soft-clipping and reference-clip, then the cigar string was larger
485 // than 65535 operations and is stored in the sam_tag_dictionary (tag GC).
486 if (core.l_seq != 0 && sc_front == core.l_seq)
487 {
488 if constexpr (detail::decays_to_ignore_v<tag_dict_type> | detail::decays_to_ignore_v<seq_type>)
489 { // maybe only throw in debug mode and otherwise return an empty alignment?
490 throw format_error{
491 detail::to_string("The cigar string '",
492 detail::get_cigar_string(cigar_vector),
493 "' suggests that the cigar string exceeded 65535 elements and was therefore ",
494 "stored in the optional field CG. You need to read in the field::tags and "
495 "field::seq in order to access this information.")};
496 }
497 else
498 {
499 auto it = tag_dict.find("CG"_tag);
500
501 if (it == tag_dict.end())
502 throw format_error{
503 detail::to_string("The cigar string '",
504 detail::get_cigar_string(cigar_vector),
505 "' suggests that the cigar string exceeded 65535 elements and was therefore ",
506 "stored in the optional field CG but this tag is not present in the given ",
507 "record.")};
508
509 cigar_vector = detail::parse_cigar(std::get<std::string>(it->second));
510 tag_dict.erase(it); // remove redundant information
511 }
512 }
513 }
514}
515
517template <typename stream_type,
518 typename header_type,
519 typename seq_type,
520 typename id_type,
521 typename ref_seq_type,
522 typename ref_id_type,
523 typename cigar_type,
524 typename qual_type,
525 typename mate_type,
526 typename tag_dict_type>
527inline void format_bam::write_alignment_record([[maybe_unused]] stream_type & stream,
528 [[maybe_unused]] sam_file_output_options const & options,
529 [[maybe_unused]] header_type && header,
530 [[maybe_unused]] seq_type && seq,
531 [[maybe_unused]] qual_type && qual,
532 [[maybe_unused]] id_type && id,
533 [[maybe_unused]] ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
534 [[maybe_unused]] ref_id_type && ref_id,
535 [[maybe_unused]] std::optional<int32_t> ref_offset,
536 [[maybe_unused]] cigar_type && cigar_vector,
537 [[maybe_unused]] sam_flag const flag,
538 [[maybe_unused]] uint8_t const mapq,
539 [[maybe_unused]] mate_type && mate,
540 [[maybe_unused]] tag_dict_type && tag_dict,
541 [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(e_value),
542 [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(bit_score))
543{
544 // ---------------------------------------------------------------------
545 // Type Requirements (as static asserts for user friendliness)
546 // ---------------------------------------------------------------------
547 static_assert((std::ranges::forward_range<seq_type> && alphabet<std::ranges::range_reference_t<seq_type>>),
548 "The seq object must be a std::ranges::forward_range over "
549 "letters that model seqan3::alphabet.");
550
551 static_assert((std::ranges::forward_range<id_type> && alphabet<std::ranges::range_reference_t<id_type>>),
552 "The id object must be a std::ranges::forward_range over "
553 "letters that model seqan3::alphabet.");
554
555 static_assert((std::ranges::forward_range<ref_seq_type> && alphabet<std::ranges::range_reference_t<ref_seq_type>>),
556 "The ref_seq object must be a std::ranges::forward_range "
557 "over letters that model seqan3::alphabet.");
558
559 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
560 {
561 static_assert((std::ranges::forward_range<ref_id_type> || std::integral<std::remove_reference_t<ref_id_type>>
562 || detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>, std::optional>),
563 "The ref_id object must be a std::ranges::forward_range "
564 "over letters that model seqan3::alphabet or an integral or a std::optional<integral>.");
565 }
566
567 static_assert((std::ranges::forward_range<qual_type> && alphabet<std::ranges::range_reference_t<qual_type>>),
568 "The qual object must be a std::ranges::forward_range "
569 "over letters that model seqan3::alphabet.");
570
572 "The mate object must be a std::tuple of size 3 with "
573 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
574 "2) a std::integral or std::optional<std::integral>, and "
575 "3) a std::integral.");
576
577 static_assert(
578 ((std::ranges::forward_range<decltype(std::get<0>(mate))>
579 || std::integral<std::remove_cvref_t<decltype(std::get<0>(mate))>>
580 || detail::is_type_specialisation_of_v<std::remove_cvref_t<decltype(std::get<0>(mate))>, std::optional>)
581 && (std::integral<std::remove_cvref_t<decltype(std::get<1>(mate))>>
582 || detail::is_type_specialisation_of_v<std::remove_cvref_t<decltype(std::get<1>(mate))>, std::optional>)
583 && std::integral<std::remove_cvref_t<decltype(std::get<2>(mate))>>),
584 "The mate object must be a std::tuple of size 3 with "
585 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
586 "2) a std::integral or std::optional<std::integral>, and "
587 "3) a std::integral.");
588
589 static_assert(std::same_as<std::remove_cvref_t<tag_dict_type>, sam_tag_dictionary>,
590 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
591
592 if constexpr (detail::decays_to_ignore_v<header_type>)
593 {
594 throw format_error{"BAM can only be written with a header but you did not provide enough information! "
595 "You can either construct the output file with reference names and reference length "
596 "information and the header will be created for you, or you can access the `header` member "
597 "directly."};
598 }
599 else
600 {
601 // ---------------------------------------------------------------------
602 // logical Requirements
603 // ---------------------------------------------------------------------
604
605 if (ref_offset.has_value() && (ref_offset.value() + 1) < 0)
606 throw format_error{detail::to_string("The ref_offset object must be >= -1 but is: ", ref_offset)};
607
608 detail::fast_ostreambuf_iterator stream_it{*stream.rdbuf()};
609
610 // ---------------------------------------------------------------------
611 // Writing the BAM Header on first call
612 // ---------------------------------------------------------------------
613 if (!header_was_written)
614 {
615 write_header(stream, options, header);
616 header_was_written = true;
617 }
618
619 // ---------------------------------------------------------------------
620 // Writing the Record
621 // ---------------------------------------------------------------------
622 int32_t ref_length{};
623
624 // Compute the ref_length from given cigar_vector which is needed to fill field `bin`.
625 if (!std::ranges::empty(cigar_vector))
626 {
627 int32_t dummy_seq_length{};
628 for (auto & [count, operation] : cigar_vector)
629 detail::update_alignment_lengths(ref_length, dummy_seq_length, operation.to_char(), count);
630 }
631
632 if (cigar_vector.size() >= (1 << 16)) // must be written into the sam tag CG
633 {
634 tag_dict["CG"_tag] = detail::get_cigar_string(cigar_vector);
635 cigar_vector.resize(2);
636 cigar_vector[0] = cigar{static_cast<uint32_t>(std::ranges::distance(seq)), 'S'_cigar_operation};
637 cigar_vector[1] = cigar{static_cast<uint32_t>(ref_length), 'N'_cigar_operation};
638 }
639
640 std::string tag_dict_binary_str = get_tag_dict_str(tag_dict);
641
642 // Compute the value for the l_read_name field for the bam record.
643 // This value is stored including a trailing `0`, so at most 254 characters of the id can be stored, since
644 // the data type to store the value is uint8_t and 255 is the maximal size.
645 // If the id is empty a '*' is written instead, i.e. the written id is never an empty string and stores at least
646 // 2 bytes.
647 uint8_t read_name_size = std::min<uint8_t>(std::ranges::distance(id), 254) + 1;
648 read_name_size += static_cast<uint8_t>(read_name_size == 1); // need size two since empty id is stored as '*'.
649
650 alignment_record_core core{/* block_size */ 0, // will be initialised right after
651 /* refID */ -1, // will be initialised right after
652 /* pos */ ref_offset.value_or(-1),
653 /* l_read_name */ read_name_size,
654 /* mapq */ mapq,
655 /* bin */ reg2bin(ref_offset.value_or(-1), ref_length),
656 /* n_cigar_op */ static_cast<uint16_t>(cigar_vector.size()),
657 /* flag */ flag,
658 /* l_seq */ static_cast<int32_t>(std::ranges::distance(seq)),
659 /* next_refId */ -1, // will be initialised right after
660 /* next_pos */ get<1>(mate).value_or(-1),
661 /* tlen */ get<2>(mate)};
662
663 auto check_and_assign_id_to = [&header]([[maybe_unused]] auto & id_source, [[maybe_unused]] auto & id_target)
664 {
665 using id_t = std::remove_reference_t<decltype(id_source)>;
666
667 if constexpr (!detail::decays_to_ignore_v<id_t>)
668 {
669 if constexpr (std::integral<id_t>)
670 {
671 id_target = id_source;
672 }
673 else if constexpr (detail::is_type_specialisation_of_v<id_t, std::optional>)
674 {
675 id_target = id_source.value_or(-1);
676 }
677 else
678 {
679 if (!std::ranges::empty(id_source)) // otherwise default will remain (-1)
680 {
681 auto id_it = header.ref_dict.end();
682
683 if constexpr (std::ranges::contiguous_range<decltype(id_source)>
684 && std::ranges::sized_range<decltype(id_source)>
685 && std::ranges::borrowed_range<decltype(id_source)>)
686 {
687 id_it = header.ref_dict.find(
688 std::span{std::ranges::data(id_source), std::ranges::size(id_source)});
689 }
690 else
691 {
692 using header_ref_id_type = std::remove_reference_t<decltype(header.ref_ids()[0])>;
693
694 static_assert(
695 implicitly_convertible_to<decltype(id_source), header_ref_id_type>,
696 "The ref_id type is not convertible to the reference id information stored in the "
697 "reference dictionary of the header object.");
698
699 id_it = header.ref_dict.find(id_source);
700 }
701
702 if (id_it == header.ref_dict.end())
703 {
704 throw format_error{detail::to_string("Unknown reference name '",
705 id_source,
706 "' could "
707 "not be found in BAM header ref_dict: ",
708 header.ref_dict,
709 ".")};
710 }
711
712 id_target = id_it->second;
713 }
714 }
715 }
716 };
717
718 // initialise core.refID
719 check_and_assign_id_to(ref_id, core.refID);
720
721 // initialise core.next_refID
722 check_and_assign_id_to(get<0>(mate), core.next_refID);
723
724 // initialise core.block_size
725 core.block_size = sizeof(core) - 4 /*block_size excluded*/ + core.l_read_name + core.n_cigar_op * 4
726 + // each int32_t has 4 bytes
727 (core.l_seq + 1) / 2 + // bitcompressed seq
728 core.l_seq + // quality string
729 tag_dict_binary_str.size();
730
731 std::ranges::copy_n(reinterpret_cast<char *>(&core), sizeof(core), stream_it); // write core
732
733 if (std::ranges::empty(id)) // empty id is represented as * for backward compatibility
734 stream_it = '*';
735 else
736 std::ranges::copy_n(std::ranges::begin(id), core.l_read_name - 1, stream_it); // write read id
737 stream_it = '\0';
738
739 // write cigar
740 for (auto [cigar_count, op] : cigar_vector)
741 {
742 cigar_count = cigar_count << 4;
743 cigar_count |= static_cast<int32_t>(char_to_sam_rank[op.to_char()]);
744 std::ranges::copy_n(reinterpret_cast<char *>(&cigar_count), 4, stream_it);
745 }
746
747 // write seq (bit-compressed: dna16sam characters go into one byte)
748 using alph_t = std::ranges::range_value_t<seq_type>;
749 constexpr auto to_dna16 = detail::convert_through_char_representation<alph_t, dna16sam>;
750
751 auto sit = std::ranges::begin(seq);
752 for (int32_t sidx = 0; sidx < ((core.l_seq & 1) ? core.l_seq - 1 : core.l_seq); ++sidx, ++sit)
753 {
754 uint8_t compressed_chr = to_rank(to_dna16[to_rank(*sit)]) << 4;
755 ++sidx, ++sit;
756 compressed_chr |= to_rank(to_dna16[to_rank(*sit)]);
757 stream_it = static_cast<char>(compressed_chr);
758 }
759
760 if (core.l_seq & 1) // write one more
761 stream_it = static_cast<char>(to_rank(to_dna16[to_rank(*sit)]) << 4);
762
763 // write qual
764 if (std::ranges::empty(qual))
765 {
766 auto v = views::repeat_n(static_cast<char>(255), core.l_seq);
767 std::ranges::copy_n(v.begin(), core.l_seq, stream_it);
768 }
769 else
770 {
771 if (std::ranges::distance(qual) != core.l_seq)
772 throw format_error{detail::to_string("Expected quality of same length as sequence with size ",
773 core.l_seq,
774 ". Got quality with size ",
775 std::ranges::distance(qual),
776 " instead.")};
777
778 auto v = qual
779 | std::views::transform(
780 [](auto chr)
781 {
782 return static_cast<char>(to_rank(chr));
783 });
784 std::ranges::copy_n(v.begin(), core.l_seq, stream_it);
785 }
786
787 // write optional fields
788 stream << tag_dict_binary_str;
789 } // if constexpr (!detail::decays_to_ignore_v<header_type>)
790}
791
793template <typename stream_t, typename header_type>
794inline void format_bam::write_header(stream_t & stream, sam_file_output_options const & options, header_type & header)
795{
796 if constexpr (detail::decays_to_ignore_v<header_type>)
797 {
798 throw format_error{"BAM can only be written with a header but you did not provide enough information! "
799 "You can either construct the output file with reference names and reference length "
800 "information and the header will be created for you, or you can access the `header` member "
801 "directly."};
802 }
803 else
804 {
805 detail::fast_ostreambuf_iterator stream_it{*stream.rdbuf()};
806
807 std::ranges::copy_n("BAM\1", 4, stream_it); // Do not copy the null terminator
808
809 // write SAM header to temporary stream first to query its size.
811 detail::format_sam_base::write_header(os, options, header);
812#if SEQAN3_WORKAROUND_GCC_NO_CXX11_ABI
813 int32_t const l_text{static_cast<int32_t>(os.str().size())};
814#else
815 int32_t const l_text{static_cast<int32_t>(os.view().size())};
816#endif
817 std::ranges::copy_n(reinterpret_cast<char const *>(&l_text), 4, stream_it); // write text length
818
819#if SEQAN3_WORKAROUND_GCC_NO_CXX11_ABI
820 auto header_view = os.str();
821#else
822 auto header_view = os.view();
823#endif
824 std::ranges::copy(header_view, stream_it);
825
826 assert(header.ref_ids().size() < (1ull << 32));
827 int32_t const n_ref{static_cast<int32_t>(header.ref_ids().size())};
828 std::ranges::copy_n(reinterpret_cast<char const *>(&n_ref), 4, stream_it); // write number of references
829
830 for (int32_t ridx = 0; ridx < n_ref; ++ridx)
831 {
832 assert(header.ref_ids()[ridx].size() + 1 < (1ull << 32));
833 int32_t const l_name{static_cast<int32_t>(header.ref_ids()[ridx].size()) + 1}; // plus null character
834 std::ranges::copy_n(reinterpret_cast<char const *>(&l_name), 4, stream_it); // write l_name
835 // write reference name:
836 std::ranges::copy(header.ref_ids()[ridx], stream_it);
837 stream_it = '\0'; // ++ is not necessary for ostream_iterator
838 // write reference sequence length:
839 std::ranges::copy_n(reinterpret_cast<char *>(&get<0>(header.ref_id_info[ridx])), 4, stream_it);
840 }
841 }
842}
843
862template <typename value_type>
863inline int32_t format_bam::read_sam_dict_vector(seqan3::detail::sam_tag_variant & variant,
864 std::string_view const str,
865 value_type const & SEQAN3_DOXYGEN_ONLY(value))
866{
867 auto it = str.begin();
868
869 // Read vector size from string_view and advance `it`.
870 int32_t const vector_size = [&]()
871 {
872 int32_t size{};
873 read_integral_byte_field(std::string_view{it, str.end()}, size);
874 it += sizeof(size);
875 return size;
876 }();
877
878 int32_t bytes_left{vector_size};
879
880 std::vector<value_type> tmp_vector;
881 tmp_vector.reserve(vector_size);
882
883 value_type tmp{};
884
885 while (bytes_left > 0)
886 {
887 if constexpr (std::integral<value_type>)
888 read_integral_byte_field(std::string_view{it, str.end()}, tmp);
889 else if constexpr (std::same_as<value_type, float>)
890 read_float_byte_field(std::string_view{it, str.end()}, tmp);
891 else
892 static_assert(std::is_same_v<value_type, void>, "format_bam::read_sam_dict_vector: unsupported value_type");
893
894 it += sizeof(tmp);
895 tmp_vector.push_back(std::move(tmp));
896 --bytes_left;
897 }
898
899 variant = std::move(tmp_vector);
900
901 return vector_size;
902}
903
919inline void format_bam::read_sam_dict(std::string_view const tag_str, sam_tag_dictionary & target)
920{
921 /* Every BAM tag has the format "[TAG][TYPE_ID][VALUE]", where TAG is a two letter
922 name tag which is converted to a unique integer identifier and TYPE_ID is one character in [A,i,Z,H,B,f]
923 describing the type for the upcoming VALUES. If TYPE_ID=='B' it signals an array of
924 VALUE's and the inner value type is identified by the next character, one of [cCsSiIf], followed
925 by the length (int32_t) of the array, followed by the values.
926 */
927 auto it = tag_str.begin();
928
929 // Deduces int_t from passed argument.
930 auto parse_integer_into_target = [&]<std::integral int_t>(uint16_t const tag, int_t)
931 {
932 int_t tmp{};
933 read_integral_byte_field(std::string_view{it, tag_str.end()}, tmp);
934 target[tag] = static_cast<int32_t>(tmp); // readable sam format only allows int32_t
935 it += sizeof(tmp);
936 };
937
938 // Deduces array_value_t from passed argument.
939 auto parse_array_into_target = [&]<arithmetic array_value_t>(uint16_t const tag, array_value_t)
940 {
941 int32_t const count = read_sam_dict_vector(target[tag], std::string_view{it, tag_str.end()}, array_value_t{});
942 it += sizeof(int32_t) /*length is stored within the vector*/ + sizeof(array_value_t) * count;
943 };
944
945 // Read uint16_t from string_view and advance `it`.
946 auto parse_tag = [&]()
947 {
948 uint16_t tag = static_cast<uint16_t>(*it) << 8;
949 ++it; // skip char read before
950 tag |= static_cast<uint16_t>(*it);
951 ++it; // skip char read before
952 return tag;
953 };
954
955 while (it != tag_str.end())
956 {
957 uint16_t const tag = parse_tag();
958
959 char const type_id{*it};
960 ++it; // skip char read before
961
962 switch (type_id)
963 {
964 case 'A': // char
965 {
966 target[tag] = *it;
967 ++it; // skip char that has been read
968 break;
969 }
970 // all integer sizes are possible
971 case 'c': // int8_t
972 {
973 parse_integer_into_target(tag, int8_t{});
974 break;
975 }
976 case 'C': // uint8_t
977 {
978 parse_integer_into_target(tag, uint8_t{});
979 break;
980 }
981 case 's': // int16_t
982 {
983 parse_integer_into_target(tag, int16_t{});
984 break;
985 }
986 case 'S': // uint16_t
987 {
988 parse_integer_into_target(tag, uint16_t{});
989 break;
990 }
991 case 'i': // int32_t
992 {
993 parse_integer_into_target(tag, int32_t{});
994 break;
995 }
996 case 'I': // uint32_t
997 {
998 parse_integer_into_target(tag, uint32_t{});
999 break;
1000 }
1001 case 'f': // float
1002 {
1003 float tmp{};
1004 read_float_byte_field(std::string_view{it, tag_str.end()}, tmp);
1005 target[tag] = tmp;
1006 it += sizeof(float);
1007 break;
1008 }
1009 case 'Z': // string
1010 {
1011 std::string const v{static_cast<char const *>(it)}; // parses until '\0'
1012 it += v.size() + 1;
1013 target[tag] = std::move(v);
1014 break;
1015 }
1016 case 'H': // byte array, represented as null-terminated string; specification requires even number of bytes
1017 {
1018 std::string_view const str{static_cast<char const *>(it)}; // parses until '\0'
1019
1020 std::vector<std::byte> tmp_vector{};
1021 // std::from_chars cannot directly parse into a std::byte
1022 uint8_t dummy_byte{};
1023
1024 if (str.size() % 2 != 0)
1025 throw format_error{"[CORRUPTED BAM FILE] Hexadecimal tag must have even number of digits."};
1026
1027 // H encodes bytes in a hexadecimal format. Two hex values are stored for each byte as characters.
1028 // E.g., '1' and 'A' need one byte each and are read as `\x1A`, which is 27 in decimal.
1029 for (auto hex_begin = str.begin(), hex_end = str.begin() + 2; hex_begin != str.end();
1030 hex_begin += 2, hex_end += 2)
1031 {
1032 auto res = std::from_chars(hex_begin, hex_end, dummy_byte, 16);
1033
1034 if (res.ec == std::errc::invalid_argument)
1035 throw format_error{std::string("[CORRUPTED BAM FILE] The string '")
1036 + std::string(hex_begin, hex_end) + "' could not be cast into type uint8_t."};
1037
1038 if (res.ec == std::errc::result_out_of_range)
1039 throw format_error{std::string("[CORRUPTED BAM FILE] Casting '") + std::string(str)
1040 + "' into type uint8_t would cause an overflow."};
1041
1042 tmp_vector.push_back(std::byte{dummy_byte});
1043 }
1044
1045 target[tag] = std::move(tmp_vector);
1046
1047 it += str.size() + 1;
1048
1049 break;
1050 }
1051 case 'B': // Array. Value type depends on second char [cCsSiIf]
1052 {
1053 char array_value_type_id = *it;
1054 ++it; // skip char read before
1055
1056 switch (array_value_type_id)
1057 {
1058 case 'c': // int8_t
1059 parse_array_into_target(tag, int8_t{});
1060 break;
1061 case 'C': // uint8_t
1062 parse_array_into_target(tag, uint8_t{});
1063 break;
1064 case 's': // int16_t
1065 parse_array_into_target(tag, int16_t{});
1066 break;
1067 case 'S': // uint16_t
1068 parse_array_into_target(tag, uint16_t{});
1069 break;
1070 case 'i': // int32_t
1071 parse_array_into_target(tag, int32_t{});
1072 break;
1073 case 'I': // uint32_t
1074 parse_array_into_target(tag, uint32_t{});
1075 break;
1076 case 'f': // float
1077 parse_array_into_target(tag, float{});
1078 break;
1079 default:
1080 throw format_error{detail::to_string("The first character in the numerical id of a SAM tag ",
1081 "must be one of [cCsSiIf] but '",
1082 array_value_type_id,
1083 "' was given.")};
1084 }
1085 break;
1086 }
1087 default:
1088 throw format_error{detail::to_string("The second character in the numerical id of a "
1089 "SAM tag must be one of [A,i,Z,H,B,f] but '",
1090 type_id,
1091 "' was given.")};
1092 }
1093 }
1094}
1095
1101inline std::vector<cigar> format_bam::parse_binary_cigar(std::string_view const cigar_str) const
1102{
1103 // The cigar operation is encoded in 4 bits.
1104 constexpr std::array<char, 16>
1105 cigar_operation_mapping{'M', 'I', 'D', 'N', 'S', 'H', 'P', '=', 'X', '*', '*', '*', '*', '*', '*', '*'};
1106 // The rightmost 4 bits encode the operation, the other bits encode the count.
1107 constexpr uint32_t cigar_operation_mask = 0x0f; // rightmost 4 bits are set to one
1108
1109 std::vector<cigar> cigar_vector{};
1110 char operation{'\0'};
1111 uint32_t count{};
1112 uint32_t operation_and_count{}; // In BAM, operation and count values are stored within one 32 bit integer.
1113
1114 assert(cigar_str.size() % 4 == 0); // One cigar letter is stored in 4 bytes (uint32_t).
1115
1116 for (auto it = cigar_str.begin(); it != cigar_str.end(); it += sizeof(operation_and_count))
1117 {
1118 std::memcpy(&operation_and_count, it, sizeof(operation_and_count));
1119 operation = cigar_operation_mapping[operation_and_count & cigar_operation_mask];
1120 count = operation_and_count >> 4;
1121
1122 cigar_vector.emplace_back(count, seqan3::assign_char_strictly_to(operation, cigar::operation{}));
1123 }
1124
1125 return cigar_vector;
1126}
1127
1131inline std::string format_bam::get_tag_dict_str(sam_tag_dictionary const & tag_dict)
1132{
1133 std::string result{};
1134
1135 auto stream_variant_fn = [&result](auto && arg) // helper to print a std::variant
1136 {
1137 // T is either char, int32_t, float, std::string, or a std::vector<some int>
1138 using T = std::remove_cvref_t<decltype(arg)>;
1139
1140 if constexpr (std::same_as<T, int32_t>)
1141 {
1142 // always choose the smallest possible representation [cCsSiI]
1143 size_t const absolute_arg = std::abs(arg);
1144 auto n = std::countr_zero(std::bit_ceil(absolute_arg + 1u) >> 1u) / 8u;
1145 bool const negative = arg < 0;
1146 n = n * n + 2 * negative; // for switch case order
1147
1148 switch (n)
1149 {
1150 case 0:
1151 {
1152 result[result.size() - 1] = 'C';
1153 result.append(reinterpret_cast<char const *>(&arg), 1);
1154 break;
1155 }
1156 case 1:
1157 {
1158 result[result.size() - 1] = 'S';
1159 result.append(reinterpret_cast<char const *>(&arg), 2);
1160 break;
1161 }
1162 case 2:
1163 {
1164 result[result.size() - 1] = 'c';
1165 int8_t tmp = static_cast<int8_t>(arg);
1166 result.append(reinterpret_cast<char const *>(&tmp), 1);
1167 break;
1168 }
1169 case 3:
1170 {
1171 result[result.size() - 1] = 's';
1172 int16_t tmp = static_cast<int16_t>(arg);
1173 result.append(reinterpret_cast<char const *>(&tmp), 2);
1174 break;
1175 }
1176 default:
1177 {
1178 result.append(reinterpret_cast<char const *>(&arg), 4); // always i
1179 break;
1180 }
1181 }
1182 }
1183 else if constexpr (std::same_as<T, std::string>)
1184 {
1185 result.append(reinterpret_cast<char const *>(arg.data()), arg.size() + 1 /*+ null character*/);
1186 }
1187 else if constexpr (!std::ranges::range<T>) // char, float
1188 {
1189 result.append(reinterpret_cast<char const *>(&arg), sizeof(arg));
1190 }
1191 else // std::vector of some arithmetic_type type
1192 {
1193 int32_t sz{static_cast<int32_t>(arg.size())};
1194 result.append(reinterpret_cast<char *>(&sz), 4);
1195 result.append(reinterpret_cast<char const *>(arg.data()),
1196 arg.size() * sizeof(std::ranges::range_value_t<T>));
1197 }
1198 };
1199
1200 for (auto & [tag, variant] : tag_dict)
1201 {
1202 result.push_back(static_cast<char>(tag / 256));
1203 result.push_back(static_cast<char>(tag % 256));
1204
1205 result.push_back(detail::sam_tag_type_char[variant.index()]);
1206
1207 if (!is_char<'\0'>(detail::sam_tag_type_char_extra[variant.index()]))
1208 result.push_back(detail::sam_tag_type_char_extra[variant.index()]);
1209
1210 std::visit(stream_variant_fn, variant);
1211 }
1212
1213 return result;
1214}
1215
1216} // namespace seqan3
T begin(T... args)
T bit_ceil(T... args)
constexpr derived_type & assign_rank(rank_type const c) noexcept
Assign from a numeric value.
Definition alphabet_base.hpp:184
The seqan3::cigar semialphabet pairs a counter with a seqan3::cigar::operation letter.
Definition alphabet/cigar/cigar.hpp:57
exposition_only::cigar_operation operation
The (extended) cigar operation alphabet of M,D,I,H,N,P,S,X,=.
Definition alphabet/cigar/cigar.hpp:93
A 16 letter DNA alphabet, containing all IUPAC symbols minus the gap and plus an equality sign ('=').
Definition dna16sam.hpp:45
The BAM format.
Definition format_bam.hpp:49
void read_alignment_record(stream_type &stream, sam_file_input_options< seq_legal_alph_type > const &options, ref_seqs_type &ref_seqs, sam_file_header< ref_ids_type > &header, stream_pos_type &position_buffer, seq_type &seq, qual_type &qual, id_type &id, ref_seq_type &ref_seq, ref_id_type &ref_id, ref_offset_type &ref_offset, cigar_type &cigar_vector, flag_type &flag, mapq_type &mapq, mate_type &mate, tag_dict_type &tag_dict, e_value_type &e_value, bit_score_type &bit_score)
Read from the specified stream and back-insert into the given field buffers.
Definition format_bam.hpp:261
format_bam()=default
Defaulted.
void write_alignment_record(stream_type &stream, sam_file_output_options const &options, header_type &&header, seq_type &&seq, qual_type &&qual, id_type &&id, ref_seq_type &&ref_seq, ref_id_type &&ref_id, std::optional< int32_t > ref_offset, cigar_type &&cigar_vector, sam_flag const flag, uint8_t const mapq, mate_type &&mate, tag_dict_type &&tag_dict, double e_value, double bit_score)
Write the given fields to the specified stream.
Definition format_bam.hpp:527
format_bam & operator=(format_bam &&)=default
Defaulted.
format_bam & operator=(format_bam const &)=default
Defaulted.
format_bam(format_bam &&)=default
Defaulted.
~format_bam()=default
Defaulted.
format_bam(format_bam const &)=default
Defaulted.
static std::vector< std::string > file_extensions
The valid file extensions for this format; note that you can modify this value.
Definition format_bam.hpp:65
Stores the header information of SAM/BAM files.
Definition header.hpp:47
ref_ids_type & ref_ids()
The range of reference ids.
Definition header.hpp:124
std::unordered_map< key_type, int32_t, key_hasher, detail::view_equality_fn > ref_dict
The mapping of reference id to position in the ref_ids() range and the ref_id_info range.
Definition header.hpp:163
std::vector< std::tuple< int32_t, std::string > > ref_id_info
The reference information. (used by the SAM/BAM format)
Definition header.hpp:160
The SAM tag dictionary class that stores all optional SAM fields.
Definition sam_tag_dictionary.hpp:327
T copy(T... args)
T copy_n(T... args)
T countr_zero(T... args)
T data(T... args)
Provides seqan3::dna16sam.
T emplace_back(T... args)
T end(T... args)
T equal(T... args)
Provides seqan3::detail::fast_ostreambuf_iterator.
Provides the seqan3::format_sam_base that can be inherited from.
T from_chars(T... args)
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition alphabet/concept.hpp:517
constexpr auto assign_char_strictly_to
Assign a character to an alphabet object, throw if the character is not valid.
Definition alphabet/concept.hpp:721
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition alphabet/concept.hpp:152
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition sam_flag.hpp:73
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ ref_seq
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ bit_score
The bit score (statistical significance indicator), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ id
The identifier, usually a string.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr auto is_char
Checks whether a given letter is the same as the template non-type argument.
Definition predicate.hpp:60
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition type_pack/traits.hpp:161
constexpr size_t size
The size of a type pack.
Definition type_pack/traits.hpp:143
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition repeat_n.hpp:88
Provides the seqan3::sam_file_header class.
T index(T... args)
The generic alphabet concept that covers most data types used in ranges.
A type that satisfies std::is_arithmetic_v<t>.
Checks whether from can be implicityly converted to to.
Whether a type behaves like a tuple.
Auxiliary functions for the SAM IO.
Provides seqan3::detail::istreambuf.
T memcpy(T... args)
T min(T... args)
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
Provides seqan3::debug_stream and related types.
T push_back(T... args)
T reserve(T... args)
T resize(T... args)
Provides seqan3::sam_file_input_options.
Provides helper data structures for the seqan3::sam_file_output.
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
T size(T... args)
Provides seqan3::views::slice.
T str(T... args)
Thrown if information given to output format didn't match expectations.
Definition io/exception.hpp:88
The options type defines various option members that influence the behaviour of all or some formats.
Definition sam_file/input_options.hpp:26
The options type defines various option members that influence the behavior of all or some formats.
Definition sam_file/output_options.hpp:23
T substr(T... args)
Provides seqan3::views::take_exactly and seqan3::views::take_exactly_or_throw.
Provides seqan3::debug_stream and related types.
T visit(T... args)
Hide me