SeqAn3  3.0.2
The Modern C++ library for sequence analysis.
format_bam.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <iterator>
16 #include <string>
17 #include <vector>
18 
19 #include <seqan3/alphabet/detail/convert.hpp>
43 #include <seqan3/std/concepts>
44 #include <seqan3/std/ranges>
45 
46 namespace seqan3
47 {
48 
59 class format_bam : private detail::format_sam_base
60 {
61 public:
65  format_bam() noexcept = default;
66  format_bam(format_bam const &) noexcept = default;
67  format_bam & operator=(format_bam const &) noexcept = default;
68  format_bam(format_bam &&) noexcept = default;
69  format_bam & operator=(format_bam &&) noexcept = default;
70  ~format_bam() noexcept = default;
71 
74  static inline std::vector<std::string> file_extensions
75  {
76  { "bam" }
77  };
78 
79 protected:
80  template <typename stream_type, // constraints checked by file
81  typename seq_legal_alph_type,
82  typename ref_seqs_type,
83  typename ref_ids_type,
84  typename seq_type,
85  typename id_type,
86  typename offset_type,
87  typename ref_seq_type,
88  typename ref_id_type,
89  typename ref_offset_type,
90  typename align_type,
91  typename cigar_type,
92  typename flag_type,
93  typename mapq_type,
94  typename qual_type,
95  typename mate_type,
96  typename tag_dict_type,
97  typename e_value_type,
98  typename bit_score_type>
99  void read_alignment_record(stream_type & stream,
100  alignment_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
101  ref_seqs_type & ref_seqs,
103  seq_type & seq,
104  qual_type & qual,
105  id_type & id,
106  offset_type & offset,
107  ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
108  ref_id_type & ref_id,
109  ref_offset_type & ref_offset,
110  align_type & align,
111  cigar_type & cigar_vector,
112  flag_type & flag,
113  mapq_type & mapq,
114  mate_type & mate,
115  tag_dict_type & tag_dict,
116  e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
117  bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score));
118 
119  template <typename stream_type,
120  typename header_type,
121  typename seq_type,
122  typename id_type,
123  typename ref_seq_type,
124  typename ref_id_type,
125  typename align_type,
126  typename cigar_type,
127  typename qual_type,
128  typename mate_type,
129  typename tag_dict_type>
130  void write_alignment_record([[maybe_unused]] stream_type & stream,
131  [[maybe_unused]] alignment_file_output_options const & options,
132  [[maybe_unused]] header_type && header,
133  [[maybe_unused]] seq_type && seq,
134  [[maybe_unused]] qual_type && qual,
135  [[maybe_unused]] id_type && id,
136  [[maybe_unused]] int32_t const offset,
137  [[maybe_unused]] ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
138  [[maybe_unused]] ref_id_type && ref_id,
139  [[maybe_unused]] std::optional<int32_t> ref_offset,
140  [[maybe_unused]] align_type && align,
141  [[maybe_unused]] cigar_type && cigar_vector,
142  [[maybe_unused]] sam_flag const flag,
143  [[maybe_unused]] uint8_t const mapq,
144  [[maybe_unused]] mate_type && mate,
145  [[maybe_unused]] tag_dict_type && tag_dict,
146  [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(e_value),
147  [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(bit_score));
148 
149 private:
151  bool header_was_read{false};
152 
154  std::string string_buffer{};
155 
157  struct alignment_record_core
158  { // naming corresponds to official SAM/BAM specifications
159  int32_t block_size;
160  int32_t refID;
161  int32_t pos;
162  uint32_t l_read_name:8;
163  uint32_t mapq:8;
164  uint32_t bin:16;
165  uint32_t n_cigar_op:16;
166  sam_flag flag;
167  int32_t l_seq;
168  int32_t next_refID;
169  int32_t next_pos;
170  int32_t tlen;
171  };
172 
174  static constexpr std::array<uint8_t, 256> char_to_sam_rank
175  {
176  [] () constexpr
177  {
179 
180  using index_t = std::make_unsigned_t<char>;
181 
182  // ret['M'] = 0; set anyway by initialization
183  ret[static_cast<index_t>('I')] = 1;
184  ret[static_cast<index_t>('D')] = 2;
185  ret[static_cast<index_t>('N')] = 3;
186  ret[static_cast<index_t>('S')] = 4;
187  ret[static_cast<index_t>('H')] = 5;
188  ret[static_cast<index_t>('P')] = 6;
189  ret[static_cast<index_t>('=')] = 7;
190  ret[static_cast<index_t>('X')] = 8;
191 
192  return ret;
193  }()
194  };
195 
197  static uint16_t reg2bin(int32_t beg, int32_t end) noexcept
198  {
199  --end;
200  if (beg >> 14 == end >> 14) return ((1 << 15) - 1) / 7 + (beg >> 14);
201  if (beg >> 17 == end >> 17) return ((1 << 12) - 1) / 7 + (beg >> 17);
202  if (beg >> 20 == end >> 20) return ((1 << 9) - 1) / 7 + (beg >> 20);
203  if (beg >> 23 == end >> 23) return ((1 << 6) - 1) / 7 + (beg >> 23);
204  if (beg >> 26 == end >> 26) return ((1 << 3) - 1) / 7 + (beg >> 26);
205  return 0;
206  }
207 
208  using format_sam_base::read_field; // inherit read_field functions from format_base explicitly
209 
216  template <typename stream_view_type, std::integral number_type>
217  void read_field(stream_view_type && stream_view, number_type & target)
218  {
219  std::ranges::copy_n(std::ranges::begin(stream_view), sizeof(target), reinterpret_cast<char *>(&target));
220  }
221 
227  template <typename stream_view_type>
228  void read_field(stream_view_type && stream_view, float & target)
229  {
230  std::ranges::copy_n(std::ranges::begin(stream_view), sizeof(int32_t), reinterpret_cast<char *>(&target));
231  }
232 
243  template <typename stream_view_type, typename optional_value_type>
244  void read_field(stream_view_type && stream_view, std::optional<optional_value_type> & target)
245  {
246  optional_value_type tmp;
247  read_field(std::forward<stream_view_type>(stream_view), tmp);
248  target = tmp;
249  }
250 
251  template <typename stream_view_type, typename value_type>
252  void read_sam_dict_vector(seqan3::detail::sam_tag_variant & variant,
253  stream_view_type && stream_view,
254  value_type const & SEQAN3_DOXYGEN_ONLY(value));
255 
256  template <typename stream_view_type>
257  void read_field(stream_view_type && stream_view, sam_tag_dictionary & target);
258 
259  template <typename cigar_input_type>
260  auto parse_binary_cigar(cigar_input_type && cigar_input, uint16_t n_cigar_op) const;
261 
262  static std::string get_tag_dict_str(sam_tag_dictionary const & tag_dict);
263 };
264 
266 template <typename stream_type, // constraints checked by file
267  typename seq_legal_alph_type,
268  typename ref_seqs_type,
269  typename ref_ids_type,
270  typename seq_type,
271  typename id_type,
272  typename offset_type,
273  typename ref_seq_type,
274  typename ref_id_type,
275  typename ref_offset_type,
276  typename align_type,
277  typename cigar_type,
278  typename flag_type,
279  typename mapq_type,
280  typename qual_type,
281  typename mate_type,
282  typename tag_dict_type,
283  typename e_value_type,
284  typename bit_score_type>
285 inline void format_bam::read_alignment_record(stream_type & stream,
286  alignment_file_input_options<seq_legal_alph_type> const & SEQAN3_DOXYGEN_ONLY(options),
287  ref_seqs_type & ref_seqs,
289  seq_type & seq,
290  qual_type & qual,
291  id_type & id,
292  offset_type & offset,
293  ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
294  ref_id_type & ref_id,
295  ref_offset_type & ref_offset,
296  align_type & align,
297  cigar_type & cigar_vector,
298  flag_type & flag,
299  mapq_type & mapq,
300  mate_type & mate,
301  tag_dict_type & tag_dict,
302  e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
303  bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
304 {
305  static_assert(detail::decays_to_ignore_v<ref_offset_type> ||
306  detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
307  "The ref_offset must be a specialisation of std::optional.");
308 
309  static_assert(detail::decays_to_ignore_v<mapq_type> || std::same_as<mapq_type, uint8_t>,
310  "The type of field::mapq must be uint8_t.");
311 
312  static_assert(detail::decays_to_ignore_v<flag_type> || std::same_as<flag_type, sam_flag>,
313  "The type of field::flag must be seqan3::sam_flag.");
314 
316  auto stream_view = std::ranges::subrange<decltype(stream_buf_t{stream}), decltype(stream_buf_t{})>
317  {stream_buf_t{stream}, stream_buf_t{}};
318 
319  // these variables need to be stored to compute the ALIGNMENT
320  [[maybe_unused]] int32_t offset_tmp{};
321  [[maybe_unused]] int32_t soft_clipping_end{};
322  [[maybe_unused]] std::vector<cigar> tmp_cigar_vector{};
323  [[maybe_unused]] int32_t ref_length{0}, seq_length{0}; // length of aligned part for ref and query
324 
325  // Header
326  // -------------------------------------------------------------------------------------------------------------
327  if (!header_was_read)
328  {
329  // magic BAM string
330  if (!std::ranges::equal(stream_view | views::take_exactly_or_throw(4), std::string_view{"BAM\1"}))
331  throw format_error{"File is not in BAM format."};
332 
333  int32_t tmp32{};
334  read_field(stream_view, tmp32);
335 
336  if (tmp32 > 0) // header text is present
337  read_header(stream_view | views::take_exactly_or_throw(tmp32), header, ref_seqs);
338 
339  int32_t n_ref;
340  read_field(stream_view, n_ref);
341 
342  for (int32_t ref_idx = 0; ref_idx < n_ref; ++ref_idx)
343  {
344  read_field(stream_view, tmp32); // l_name (length of reference name including \0 character)
345 
346  string_buffer.resize(tmp32 - 1);
347  std::ranges::copy_n(std::ranges::begin(stream_view), tmp32 - 1, string_buffer.data()); // copy without \0 character
348  std::ranges::next(std::ranges::begin(stream_view)); // skip \0 character
349 
350  read_field(stream_view, tmp32); // l_ref (length of reference sequence)
351 
352  auto id_it = header.ref_dict.find(string_buffer);
353 
354  // sanity checks of reference information to existing header object:
355  if (id_it == header.ref_dict.end()) // [unlikely]
356  {
357  throw format_error{detail::to_string("Unknown reference name '" + string_buffer +
358  "' found in BAM file header (header.ref_ids():",
359  header.ref_ids(), ").")};
360  }
361  else if (id_it->second != ref_idx) // [unlikely]
362  {
363  throw format_error{detail::to_string("Reference id '", string_buffer, "' at position ", ref_idx,
364  " does not correspond to the position ", id_it->second,
365  " in the header (header.ref_ids():", header.ref_ids(), ").")};
366  }
367  else if (std::get<0>(header.ref_id_info[id_it->second]) != tmp32) // [unlikely]
368  {
369  throw format_error{"Provided reference has unequal length as specified in the header."};
370  }
371  }
372 
373  header_was_read = true;
374 
375  if (stream_buf_t{stream} == stream_buf_t{}) // no records follow
376  return;
377  }
378 
379  // read alignment record into buffer
380  // -------------------------------------------------------------------------------------------------------------
381  alignment_record_core core;
382  std::ranges::copy(stream_view | views::take_exactly_or_throw(sizeof(core)), reinterpret_cast<char *>(&core));
383 
384  if (core.refID >= static_cast<int32_t>(header.ref_ids().size()) || core.refID < -1) // [[unlikely]]
385  {
386  throw format_error{detail::to_string("Reference id index '", core.refID, "' is not in range of ",
387  "header.ref_ids(), which has size ", header.ref_ids().size(), ".")};
388  }
389  else if (core.refID > -1) // not unmapped
390  {
391  ref_id = core.refID; // field::ref_id
392  }
393 
394  flag = core.flag; // field::flag
395  mapq = core.mapq; // field::mapq
396 
397  if (core.pos > -1) // [[likely]]
398  ref_offset = core.pos; // field::ref_offset
399 
400  if constexpr (!detail::decays_to_ignore_v<mate_type>) // field::mate
401  {
402  if (core.next_refID > -1)
403  get<0>(mate) = core.next_refID;
404 
405  if (core.next_pos > -1) // [[likely]]
406  get<1>(mate) = core.next_pos;
407 
408  get<2>(mate) = core.tlen;
409  }
410 
411  // read id
412  // -------------------------------------------------------------------------------------------------------------
413  read_field(stream_view | views::take_exactly_or_throw(core.l_read_name - 1), id); // field::id
414  std::ranges::next(std::ranges::begin(stream_view)); // skip '\0'
415 
416  // read cigar string
417  // -------------------------------------------------------------------------------------------------------------
418  if constexpr (!detail::decays_to_ignore_v<align_type> || !detail::decays_to_ignore_v<cigar_type>)
419  {
420  std::tie(tmp_cigar_vector, ref_length, seq_length) = parse_binary_cigar(stream_view, core.n_cigar_op);
421  transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
422  // the actual cigar_vector is swapped with tmp_cigar_vector at the end to avoid copying
423  }
424  else
425  {
426  detail::consume(stream_view | views::take_exactly_or_throw(core.n_cigar_op * 4));
427  }
428 
429  offset = offset_tmp;
430 
431  // read sequence
432  // -------------------------------------------------------------------------------------------------------------
433  if (core.l_seq > 0) // sequence information is given
434  {
435  auto seq_stream = stream_view
436  | views::take_exactly_or_throw(core.l_seq / 2) // one too short if uneven
438  {
439  return {sam_dna16{}.assign_rank(std::min(15, static_cast<uint8_t>(c) >> 4)),
440  sam_dna16{}.assign_rank(std::min(15, static_cast<uint8_t>(c) & 0x0f))};
441  });
442 
443  if constexpr (detail::decays_to_ignore_v<seq_type>)
444  {
445  if constexpr (!detail::decays_to_ignore_v<align_type>)
446  {
447  static_assert(sequence_container<std::remove_reference_t<decltype(get<1>(align))>>,
448  "If you want to read ALIGNMENT but not SEQ, the alignment"
449  " object must store a sequence container at the second (query) position.");
450 
451  if (!tmp_cigar_vector.empty()) // only parse alignment if cigar information was given
452  {
453  assert(core.l_seq == (seq_length + offset_tmp + soft_clipping_end)); // sanity check
454  using alph_t = std::ranges::range_value_t<decltype(get<1>(align))>;
455  constexpr auto from_dna16 = detail::convert_through_char_representation<alph_t, sam_dna16>;
456 
457  get<1>(align).reserve(seq_length);
458 
459  auto tmp_iter = std::ranges::begin(seq_stream);
460  std::ranges::advance(tmp_iter, offset_tmp / 2); // skip soft clipped bases at the beginning
461 
462  if (offset_tmp & 1)
463  {
464  get<1>(align).push_back(from_dna16[to_rank(get<1>(*tmp_iter))]);
465  ++tmp_iter;
466  --seq_length;
467  }
468 
469  for (size_t i = (seq_length / 2); i > 0; --i)
470  {
471  get<1>(align).push_back(from_dna16[to_rank(get<0>(*tmp_iter))]);
472  get<1>(align).push_back(from_dna16[to_rank(get<1>(*tmp_iter))]);
473  ++tmp_iter;
474  }
475 
476  if (seq_length & 1)
477  {
478  get<1>(align).push_back(from_dna16[to_rank(get<0>(*tmp_iter))]);
479  ++tmp_iter;
480  --soft_clipping_end;
481  }
482 
483  std::ranges::advance(tmp_iter, (soft_clipping_end + !(seq_length & 1)) / 2);
484  }
485  else
486  {
487  get<1>(align) = std::remove_reference_t<decltype(get<1>(align))>{}; // assign empty container
488  }
489  }
490  else
491  {
492  detail::consume(seq_stream);
493  if (core.l_seq & 1)
494  std::ranges::next(std::ranges::begin(stream_view));
495  }
496  }
497  else
498  {
499  using alph_t = std::ranges::range_value_t<decltype(seq)>;
500  constexpr auto from_dna16 = detail::convert_through_char_representation<alph_t, sam_dna16>;
501 
502  for (auto [d1, d2] : seq_stream)
503  {
504  seq.push_back(from_dna16[to_rank(d1)]);
505  seq.push_back(from_dna16[to_rank(d2)]);
506  }
507 
508  if (core.l_seq & 1)
509  {
510  sam_dna16 d = sam_dna16{}.assign_rank(std::min(15, static_cast<uint8_t>(*std::ranges::begin(stream_view)) >> 4));
511  seq.push_back(from_dna16[to_rank(d)]);
512  std::ranges::next(std::ranges::begin(stream_view));
513  }
514 
515  if constexpr (!detail::decays_to_ignore_v<align_type>)
516  {
517  assign_unaligned(get<1>(align),
518  seq | views::slice(static_cast<std::ranges::range_difference_t<seq_type>>(offset_tmp),
519  std::ranges::distance(seq) - soft_clipping_end));
520  }
521  }
522  }
523 
524  // read qual string
525  // -------------------------------------------------------------------------------------------------------------
526  read_field(stream_view | views::take_exactly_or_throw(core.l_seq)
527  | std::views::transform([] (char chr) { return static_cast<char>(chr + 33); }), qual);
528 
529  // All remaining optional fields if any: SAM tags dictionary
530  // -------------------------------------------------------------------------------------------------------------
531  int32_t remaining_bytes = core.block_size - (sizeof(alignment_record_core) - 4/*block_size excluded*/) -
532  core.l_read_name - core.n_cigar_op * 4 - (core.l_seq + 1) / 2 - core.l_seq;
533  assert(remaining_bytes >= 0);
534  auto tags_view = stream_view | views::take_exactly_or_throw(remaining_bytes);
535 
536  while (tags_view.size() > 0)
537  read_field(tags_view, tag_dict);
538 
539  // DONE READING - wrap up
540  // -------------------------------------------------------------------------------------------------------------
541  if constexpr (!detail::decays_to_ignore_v<align_type> || !detail::decays_to_ignore_v<cigar_type>)
542  {
543  // Check cigar, if it matches ‘kSmN’, where ‘k’ equals lseq, ‘m’ is the reference sequence length in the
544  // alignment, and ‘S’ and ‘N’ are the soft-clipping and reference-clip, then the cigar string was larger
545  // than 65535 operations and is stored in the sam_tag_dictionary (tag GC).
546  if (core.l_seq != 0 && offset_tmp == core.l_seq)
547  {
548  if constexpr (detail::decays_to_ignore_v<tag_dict_type> | detail::decays_to_ignore_v<seq_type>)
549  { // maybe only throw in debug mode and otherwise return an empty alignment?
550  throw format_error{detail::to_string("The cigar string '", offset_tmp, "S", ref_length,
551  "N' suggests that the cigar string exceeded 65535 elements and was therefore ",
552  "stored in the optional field CG. You need to read in the field::tags and "
553  "field::seq in order to access this information.")};
554  }
555  else
556  {
557  auto it = tag_dict.find("CG"_tag);
558 
559  if (it == tag_dict.end())
560  throw format_error{detail::to_string("The cigar string '", offset_tmp, "S", ref_length,
561  "N' suggests that the cigar string exceeded 65535 elements and was therefore ",
562  "stored in the optional field CG but this tag is not present in the given ",
563  "record.")};
564 
565  auto cigar_view = std::views::all(std::get<std::string>(it->second));
566  std::tie(tmp_cigar_vector, ref_length, seq_length) = parse_cigar(cigar_view);
567  offset_tmp = soft_clipping_end = 0;
568  transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
569  tag_dict.erase(it); // remove redundant information
570 
571  if constexpr (!detail::decays_to_ignore_v<align_type>)
572  {
573  assign_unaligned(get<1>(align),
574  seq | views::slice(static_cast<std::ranges::range_difference_t<seq_type>>(offset_tmp),
575  std::ranges::distance(seq) - soft_clipping_end));
576  }
577  }
578  }
579  }
580 
581  // Alignment object construction
582  if constexpr (!detail::decays_to_ignore_v<align_type>)
583  construct_alignment(align, tmp_cigar_vector, core.refID, ref_seqs, core.pos, ref_length); // inherited from SAM
584 
585  if constexpr (!detail::decays_to_ignore_v<cigar_type>)
586  std::swap(cigar_vector, tmp_cigar_vector);
587 }
588 
590 template <typename stream_type,
591  typename header_type,
592  typename seq_type,
593  typename id_type,
594  typename ref_seq_type,
595  typename ref_id_type,
596  typename align_type,
597  typename cigar_type,
598  typename qual_type,
599  typename mate_type,
600  typename tag_dict_type>
601 inline void format_bam::write_alignment_record([[maybe_unused]] stream_type & stream,
602  [[maybe_unused]] alignment_file_output_options const & options,
603  [[maybe_unused]] header_type && header,
604  [[maybe_unused]] seq_type && seq,
605  [[maybe_unused]] qual_type && qual,
606  [[maybe_unused]] id_type && id,
607  [[maybe_unused]] int32_t const offset,
608  [[maybe_unused]] ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
609  [[maybe_unused]] ref_id_type && ref_id,
610  [[maybe_unused]] std::optional<int32_t> ref_offset,
611  [[maybe_unused]] align_type && align,
612  [[maybe_unused]] cigar_type && cigar_vector,
613  [[maybe_unused]] sam_flag const flag,
614  [[maybe_unused]] uint8_t const mapq,
615  [[maybe_unused]] mate_type && mate,
616  [[maybe_unused]] tag_dict_type && tag_dict,
617  [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(e_value),
618  [[maybe_unused]] double SEQAN3_DOXYGEN_ONLY(bit_score))
619 {
620  // ---------------------------------------------------------------------
621  // Type Requirements (as static asserts for user friendliness)
622  // ---------------------------------------------------------------------
623  static_assert((std::ranges::forward_range<seq_type> &&
624  alphabet<std::ranges::range_reference_t<seq_type>>),
625  "The seq object must be a std::ranges::forward_range over "
626  "letters that model seqan3::alphabet.");
627 
628  static_assert((std::ranges::forward_range<id_type> &&
629  alphabet<std::ranges::range_reference_t<id_type>>),
630  "The id object must be a std::ranges::forward_range over "
631  "letters that model seqan3::alphabet.");
632 
633  static_assert((std::ranges::forward_range<ref_seq_type> &&
634  alphabet<std::ranges::range_reference_t<ref_seq_type>>),
635  "The ref_seq object must be a std::ranges::forward_range "
636  "over letters that model seqan3::alphabet.");
637 
638  if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
639  {
640  static_assert((std::ranges::forward_range<ref_id_type> ||
641  std::integral<std::remove_reference_t<ref_id_type>> ||
642  detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>, std::optional>),
643  "The ref_id object must be a std::ranges::forward_range "
644  "over letters that model seqan3::alphabet or an integral or a std::optional<integral>.");
645  }
646 
648  "The align object must be a std::pair of two ranges whose "
649  "value_type is comparable to seqan3::gap");
650 
651  static_assert((std::tuple_size_v<std::remove_cvref_t<align_type>> == 2 &&
652  std::equality_comparable_with<gap, std::ranges::range_reference_t<decltype(std::get<0>(align))>> &&
653  std::equality_comparable_with<gap, std::ranges::range_reference_t<decltype(std::get<1>(align))>>),
654  "The align object must be a std::pair of two ranges whose "
655  "value_type is comparable to seqan3::gap");
656 
657  static_assert((std::ranges::forward_range<qual_type> &&
658  alphabet<std::ranges::range_reference_t<qual_type>>),
659  "The qual object must be a std::ranges::forward_range "
660  "over letters that model seqan3::alphabet.");
661 
663  "The mate object must be a std::tuple of size 3 with "
664  "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
665  "2) a std::integral or std::optional<std::integral>, and "
666  "3) a std::integral.");
667 
668  static_assert(((std::ranges::forward_range<decltype(std::get<0>(mate))> ||
669  std::integral<std::remove_cvref_t<decltype(std::get<0>(mate))>> ||
670  detail::is_type_specialisation_of_v<std::remove_cvref_t<decltype(std::get<0>(mate))>, std::optional>) &&
671  (std::integral<std::remove_cvref_t<decltype(std::get<1>(mate))>> ||
672  detail::is_type_specialisation_of_v<std::remove_cvref_t<decltype(std::get<1>(mate))>, std::optional>) &&
673  std::integral<std::remove_cvref_t<decltype(std::get<2>(mate))>>),
674  "The mate object must be a std::tuple of size 3 with "
675  "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
676  "2) a std::integral or std::optional<std::integral>, and "
677  "3) a std::integral.");
678 
679  static_assert(std::same_as<std::remove_cvref_t<tag_dict_type>, sam_tag_dictionary>,
680  "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
681 
682  if constexpr (detail::decays_to_ignore_v<header_type>)
683  {
684  throw format_error{"BAM can only be written with a header but you did not provide enough information! "
685  "You can either construct the output file with ref_ids and ref_seqs information and "
686  "the header will be created for you, or you can access the `header` member directly."};
687  }
688  else
689  {
690  // ---------------------------------------------------------------------
691  // logical Requirements
692  // ---------------------------------------------------------------------
693 
694  if (ref_offset.has_value() && (ref_offset.value() + 1) < 0)
695  throw format_error{detail::to_string("The ref_offset object must be >= -1 but is: ", ref_offset)};
696 
697  std::cpp20::ostreambuf_iterator stream_it{stream};
698 
699  // ---------------------------------------------------------------------
700  // Writing the BAM Header on first call
701  // ---------------------------------------------------------------------
702  if (!header_was_written)
703  {
704  stream << "BAM\1";
706  write_header(os, options, header); // write SAM header to temporary stream to query the size.
707  int32_t l_text{static_cast<int32_t>(os.str().size())};
708  std::ranges::copy_n(reinterpret_cast<char *>(&l_text), 4, stream_it); // write read id
709 
710  stream << os.str();
711 
712  int32_t n_ref{static_cast<int32_t>(header.ref_ids().size())};
713  std::ranges::copy_n(reinterpret_cast<char *>(&n_ref), 4, stream_it); // write read id
714 
715  for (int32_t ridx = 0; ridx < n_ref; ++ridx)
716  {
717  int32_t l_name{static_cast<int32_t>(header.ref_ids()[ridx].size()) + 1}; // plus null character
718  std::ranges::copy_n(reinterpret_cast<char *>(&l_name), 4, stream_it); // write l_name
719  // write reference name:
720  std::ranges::copy(header.ref_ids()[ridx].begin(), header.ref_ids()[ridx].end(), stream_it);
721  stream_it = '\0';
722  // write reference sequence length:
723  std::ranges::copy_n(reinterpret_cast<char *>(&get<0>(header.ref_id_info[ridx])), 4, stream_it);
724  }
725 
726  header_was_written = true;
727  }
728 
729  // ---------------------------------------------------------------------
730  // Writing the Record
731  // ---------------------------------------------------------------------
732  int32_t ref_length{};
733 
734  // if alignment is non-empty, replace cigar_vector.
735  // else, compute the ref_length from given cigar_vector which is needed to fill field `bin`.
736  if (!std::ranges::empty(cigar_vector))
737  {
738  int32_t dummy_seq_length{};
739  for (auto & [count, operation] : cigar_vector)
740  update_alignment_lengths(ref_length, dummy_seq_length, operation.to_char(), count);
741  }
742  else if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align)))
743  {
744  ref_length = std::ranges::distance(get<1>(align));
745 
746  // compute possible distance from alignment end to sequence end
747  // which indicates soft clipping at the end.
748  // This should be replaced by a free count_gaps function for
749  // aligned sequences which is more efficient if possible.
750  int32_t off_end{static_cast<int32_t>(std::ranges::distance(seq)) - offset};
751 
752  for (auto chr : get<1>(align))
753  if (chr == gap{})
754  ++off_end;
755 
756  off_end -= ref_length;
757  cigar_vector = detail::get_cigar_vector(align, offset, off_end);
758  }
759 
760  if (cigar_vector.size() >= (1 << 16)) // must be written into the sam tag CG
761  {
762  tag_dict["CG"_tag] = detail::get_cigar_string(cigar_vector);
763  cigar_vector.resize(2);
764  cigar_vector[0] = cigar{static_cast<uint32_t>(std::ranges::distance(seq)), 'S'_cigar_op};
765  cigar_vector[1] = cigar{static_cast<uint32_t>(std::ranges::distance(get<1>(align))), 'N'_cigar_op};
766  }
767 
768  std::string tag_dict_binary_str = get_tag_dict_str(tag_dict);
769 
770  // Compute the value for the l_read_name field for the bam record.
771  // This value is stored including a trailing `0`, so at most 254 characters of the id can be stored, since
772  // the data type to store the value is uint8_t and 255 is the maximal size.
773  // If the id is empty a '*' is written instead, i.e. the written id is never an empty string and stores at least
774  // 2 bytes.
775  uint8_t read_name_size = std::min<uint8_t>(std::ranges::distance(id), 254) + 1;
776  read_name_size += static_cast<uint8_t>(read_name_size == 1); // need size two since empty id is stored as '*'.
777 
778  alignment_record_core core
779  {
780  /* block_size */ 0, // will be initialised right after
781  /* refID */ -1, // will be initialised right after
782  /* pos */ ref_offset.value_or(-1),
783  /* l_read_name */ read_name_size,
784  /* mapq */ mapq,
785  /* bin */ reg2bin(ref_offset.value_or(-1), ref_length),
786  /* n_cigar_op */ static_cast<uint16_t>(cigar_vector.size()),
787  /* flag */ flag,
788  /* l_seq */ static_cast<int32_t>(std::ranges::distance(seq)),
789  /* next_refId */ -1, // will be initialised right after
790  /* next_pos */ get<1>(mate).value_or(-1),
791  /* tlen */ get<2>(mate)
792  };
793 
794  auto check_and_assign_id_to = [&header] ([[maybe_unused]] auto & id_source,
795  [[maybe_unused]] auto & id_target)
796  {
797  using id_t = std::remove_reference_t<decltype(id_source)>;
798 
799  if constexpr (!detail::decays_to_ignore_v<id_t>)
800  {
801  if constexpr (std::integral<id_t>)
802  {
803  id_target = id_source;
804  }
805  else if constexpr (detail::is_type_specialisation_of_v<id_t, std::optional>)
806  {
807  id_target = id_source.value_or(-1);
808  }
809  else
810  {
811  if (!std::ranges::empty(id_source)) // otherwise default will remain (-1)
812  {
813  auto id_it = header.ref_dict.end();
814 
815  if constexpr (std::ranges::contiguous_range<decltype(id_source)> &&
816  std::ranges::sized_range<decltype(id_source)> &&
817  std::ranges::borrowed_range<decltype(id_source)>)
818  {
819  id_it = header.ref_dict.find(std::span{std::ranges::data(id_source),
820  std::ranges::size(id_source)});
821  }
822  else
823  {
824  using header_ref_id_type = std::remove_reference_t<decltype(header.ref_ids()[0])>;
825 
826  static_assert(implicitly_convertible_to<decltype(id_source), header_ref_id_type>,
827  "The ref_id type is not convertible to the reference id information stored in the "
828  "reference dictionary of the header object.");
829 
830  id_it = header.ref_dict.find(id_source);
831  }
832 
833  if (id_it == header.ref_dict.end())
834  {
835  throw format_error{detail::to_string("Unknown reference name '", id_source, "' could "
836  "not be found in BAM header ref_dict: ",
837  header.ref_dict, ".")};
838  }
839 
840  id_target = id_it->second;
841  }
842  }
843  }
844  };
845 
846  // initialise core.refID
847  check_and_assign_id_to(ref_id, core.refID);
848 
849  // initialise core.next_refID
850  check_and_assign_id_to(get<0>(mate), core.next_refID);
851 
852  // initialise core.block_size
853  core.block_size = sizeof(core) - 4/*block_size excluded*/ +
854  core.l_read_name +
855  core.n_cigar_op * 4 + // each int32_t has 4 bytes
856  (core.l_seq + 1) / 2 + // bitcompressed seq
857  core.l_seq + // quality string
858  tag_dict_binary_str.size();
859 
860  std::ranges::copy_n(reinterpret_cast<char *>(&core), sizeof(core), stream_it); // write core
861 
862  if (std::ranges::empty(id)) // empty id is represented as * for backward compatibility
863  stream_it = '*';
864  else
865  std::ranges::copy_n(std::ranges::begin(id), core.l_read_name - 1, stream_it); // write read id
866  stream_it = '\0';
867 
868  // write cigar
869  for (auto [cigar_count, op] : cigar_vector)
870  {
871  cigar_count = cigar_count << 4;
872  cigar_count |= static_cast<int32_t>(char_to_sam_rank[op.to_char()]);
873  std::ranges::copy_n(reinterpret_cast<char *>(&cigar_count), 4, stream_it);
874  }
875 
876  // write seq (bit-compressed: sam_dna16 characters go into one byte)
877  using alph_t = std::ranges::range_value_t<seq_type>;
878  constexpr auto to_dna16 = detail::convert_through_char_representation<sam_dna16, alph_t>;
879 
880  auto sit = std::ranges::begin(seq);
881  for (int32_t sidx = 0; sidx < ((core.l_seq & 1) ? core.l_seq - 1 : core.l_seq); ++sidx, ++sit)
882  {
883  uint8_t compressed_chr = to_rank(to_dna16[to_rank(*sit)]) << 4;
884  ++sidx, ++sit;
885  compressed_chr |= to_rank(to_dna16[to_rank(*sit)]);
886  stream_it = static_cast<char>(compressed_chr);
887  }
888 
889  if (core.l_seq & 1) // write one more
890  stream_it = static_cast<char>(to_rank(to_dna16[to_rank(*sit)]) << 4);
891 
892  // write qual
893  if (std::ranges::empty(qual))
894  {
895  auto v = views::repeat_n(static_cast<char>(255), core.l_seq);
896  std::ranges::copy_n(v.begin(), core.l_seq, stream_it);
897  }
898  else
899  {
900  if (std::ranges::distance(qual) != core.l_seq)
901  throw format_error{detail::to_string("Expected quality of same length as sequence with size ",
902  core.l_seq, ". Got quality with size ",
903  std::ranges::distance(qual), " instead.")};
904 
905  auto v = qual | std::views::transform([] (auto chr) { return static_cast<char>(to_rank(chr)); });
906  std::ranges::copy_n(v.begin(), core.l_seq, stream_it);
907  }
908 
909  // write optional fields
910  stream << tag_dict_binary_str;
911  } // if constexpr (!detail::decays_to_ignore_v<header_type>)
912 }
913 
915 template <typename stream_view_type, typename value_type>
916 inline void format_bam::read_sam_dict_vector(seqan3::detail::sam_tag_variant & variant,
917  stream_view_type && stream_view,
918  value_type const & SEQAN3_DOXYGEN_ONLY(value))
919 {
920  int32_t count;
921  read_field(stream_view, count); // read length of vector
922  std::vector<value_type> tmp_vector;
923  tmp_vector.reserve(count);
924 
925  while (count > 0)
926  {
927  value_type tmp{};
928  read_field(stream_view, tmp);
929  tmp_vector.push_back(std::move(tmp));
930  --count;
931  }
932  variant = std::move(tmp_vector);
933 }
934 
952 template <typename stream_view_type>
953 inline void format_bam::read_field(stream_view_type && stream_view, sam_tag_dictionary & target)
954 {
955  /* Every BA< tag has the format "[TAG][TYPE_ID][VALUE]", where TAG is a two letter
956  name tag which is converted to a unique integer identifier and TYPE_ID is one character in [A,i,Z,H,B,f]
957  describing the type for the upcoming VALUES. If TYPE_ID=='B' it signals an array of
958  VALUE's and the inner value type is identified by the next character, one of [cCsSiIf], followed
959  by the length (int32_t) of the array, followed by the values.
960  */
961  uint16_t tag = static_cast<uint16_t>(*std::ranges::begin(stream_view)) << 8;
962  std::ranges::next(std::ranges::begin(stream_view)); // skip char read before
963  tag += static_cast<uint16_t>(*std::ranges::begin(stream_view));
964  std::ranges::next(std::ranges::begin(stream_view)); // skip char read before
965  char type_id = static_cast<char>(*std::ranges::begin(stream_view));
966  std::ranges::next(std::ranges::begin(stream_view)); // skip char read before
967 
968  switch (type_id)
969  {
970  case 'A' : // char
971  {
972  target[tag] = static_cast<char>(*std::ranges::begin(stream_view));
973  std::ranges::next(std::ranges::begin(stream_view)); // skip char that has been read
974  break;
975  }
976  // all integer sizes are possible
977  case 'c' : // int8_t
978  {
979  int8_t tmp;
980  read_field(stream_view, tmp);
981  target[tag] = static_cast<int32_t>(tmp); // readable sam format only allows int32_t
982  break;
983  }
984  case 'C' : // uint8_t
985  {
986  uint8_t tmp;
987  read_field(stream_view, tmp);
988  target[tag] = static_cast<int32_t>(tmp); // readable sam format only allows int32_t
989  break;
990  }
991  case 's' : // int16_t
992  {
993  int16_t tmp;
994  read_field(stream_view, tmp);
995  target[tag] = static_cast<int32_t>(tmp); // readable sam format only allows int32_t
996  break;
997  }
998  case 'S' : // uint16_t
999  {
1000  uint16_t tmp;
1001  read_field(stream_view, tmp);
1002  target[tag] = static_cast<int32_t>(tmp); // readable sam format only allows int32_t
1003  break;
1004  }
1005  case 'i' : // int32_t
1006  {
1007  int32_t tmp;
1008  read_field(stream_view, tmp);
1009  target[tag] = std::move(tmp); // readable sam format only allows int32_t
1010  break;
1011  }
1012  case 'I' : // uint32_t
1013  {
1014  uint32_t tmp;
1015  read_field(stream_view, tmp);
1016  target[tag] = static_cast<int32_t>(tmp); // readable sam format only allows int32_t
1017  break;
1018  }
1019  case 'f' : // float
1020  {
1021  float tmp;
1022  read_field(stream_view, tmp);
1023  target[tag] = tmp;
1024  break;
1025  }
1026  case 'Z' : // string
1027  {
1028  string_buffer.clear();
1029  while (!is_char<'\0'>(*std::ranges::begin(stream_view)))
1030  {
1031  string_buffer.push_back(*std::ranges::begin(stream_view));
1032  std::ranges::next(std::ranges::begin(stream_view));
1033  }
1034  std::ranges::next(std::ranges::begin(stream_view)); // skip \0
1035  target[tag] = string_buffer;
1036  break;
1037  }
1038  case 'H' :
1039  {
1040  // TODO
1041  break;
1042  }
1043  case 'B' : // Array. Value type depends on second char [cCsSiIf]
1044  {
1045  char array_value_type_id = *std::ranges::begin(stream_view);
1046  std::ranges::next(std::ranges::begin(stream_view)); // skip char read before
1047 
1048  switch (array_value_type_id)
1049  {
1050  case 'c' : // int8_t
1051  read_sam_dict_vector(target[tag], stream_view, int8_t{});
1052  break;
1053  case 'C' : // uint8_t
1054  read_sam_dict_vector(target[tag], stream_view, uint8_t{});
1055  break;
1056  case 's' : // int16_t
1057  read_sam_dict_vector(target[tag], stream_view, int16_t{});
1058  break;
1059  case 'S' : // uint16_t
1060  read_sam_dict_vector(target[tag], stream_view, uint16_t{});
1061  break;
1062  case 'i' : // int32_t
1063  read_sam_dict_vector(target[tag], stream_view, int32_t{});
1064  break;
1065  case 'I' : // uint32_t
1066  read_sam_dict_vector(target[tag], stream_view, uint32_t{});
1067  break;
1068  case 'f' : // float
1069  read_sam_dict_vector(target[tag], stream_view, float{});
1070  break;
1071  default:
1072  throw format_error{detail::to_string("The first character in the numerical id of a SAM tag ",
1073  "must be one of [cCsSiIf] but '", array_value_type_id, "' was given.")};
1074  }
1075  break;
1076  }
1077  default:
1078  throw format_error{detail::to_string("The second character in the numerical id of a "
1079  "SAM tag must be one of [A,i,Z,H,B,f] but '", type_id, "' was given.")};
1080  }
1081 }
1082 
1097 template <typename cigar_input_type>
1098 inline auto format_bam::parse_binary_cigar(cigar_input_type && cigar_input, uint16_t n_cigar_op) const
1099 {
1100  std::vector<cigar> operations{};
1101  char operation{'\0'};
1102  uint32_t count{};
1103  int32_t ref_length{}, seq_length{};
1104  uint32_t operation_and_count{}; // in BAM operation and count values are stored within one 32 bit integer
1105  constexpr char const * cigar_mapping = "MIDNSHP=X*******";
1106  constexpr uint32_t cigar_mask = 0x0f; // 0000000000001111
1107 
1108  if (n_cigar_op == 0) // [[unlikely]]
1109  return std::tuple{operations, ref_length, seq_length};
1110 
1111  // parse the rest of the cigar
1112  // -------------------------------------------------------------------------------------------------------------
1113  while (n_cigar_op > 0) // until stream is not empty
1114  {
1115  std::ranges::copy_n(std::ranges::begin(cigar_input),
1116  sizeof(operation_and_count),
1117  reinterpret_cast<char*>(&operation_and_count));
1118  operation = cigar_mapping[operation_and_count & cigar_mask];
1119  count = operation_and_count >> 4;
1120 
1121  update_alignment_lengths(ref_length, seq_length, operation, count);
1122  operations.emplace_back(count, cigar_op{}.assign_char(operation));
1123  --n_cigar_op;
1124  }
1125 
1126  return std::tuple{operations, ref_length, seq_length};
1127 }
1128 
1132 inline std::string format_bam::get_tag_dict_str(sam_tag_dictionary const & tag_dict)
1133 {
1134  std::string result{};
1135 
1136  auto stream_variant_fn = [&result] (auto && arg) // helper to print an std::variant
1137  {
1138  // T is either char, int32_t, float, std::string, or a std::vector<some int>
1139  using T = std::remove_cvref_t<decltype(arg)>;
1140 
1141  if constexpr (std::same_as<T, int32_t>)
1142  {
1143  // always choose the smallest possible representation [cCsSiI]
1144  bool negative = arg < 0;
1145  auto n = __builtin_ctz(detail::next_power_of_two(((negative) ? arg * -1 : arg) + 1) >> 1) / 8;
1146  n = n * n + 2 * negative; // for switch case order
1147 
1148  switch (n)
1149  {
1150  case 0:
1151  {
1152  result[result.size() - 1] = 'C';
1153  result.append(reinterpret_cast<char const *>(&arg), 1);
1154  break;
1155  }
1156  case 1:
1157  {
1158  result[result.size() - 1] = 'S';
1159  result.append(reinterpret_cast<char const *>(&arg), 2);
1160  break;
1161  }
1162  case 2:
1163  {
1164  result[result.size() - 1] = 'c';
1165  int8_t tmp = static_cast<int8_t>(arg);
1166  result.append(reinterpret_cast<char const *>(&tmp), 1);
1167  break;
1168  }
1169  case 3:
1170  {
1171  result[result.size() - 1] = 's';
1172  int16_t tmp = static_cast<int16_t>(arg);
1173  result.append(reinterpret_cast<char const *>(&tmp), 2);
1174  break;
1175  }
1176  default:
1177  {
1178  result.append(reinterpret_cast<char const *>(&arg), 4); // always i
1179  break;
1180  }
1181  }
1182  }
1183  else if constexpr (std::same_as<T, std::string>)
1184  {
1185  result.append(reinterpret_cast<char const *>(arg.data()), arg.size() + 1/*+ null character*/);
1186  }
1187  else if constexpr (!std::ranges::range<T>) // char, float
1188  {
1189  result.append(reinterpret_cast<char const *>(&arg), sizeof(arg));
1190  }
1191  else // std::vector of some arithmetic_type type
1192  {
1193  int32_t sz{static_cast<int32_t>(arg.size())};
1194  result.append(reinterpret_cast<char *>(&sz), 4);
1195  result.append(reinterpret_cast<char const *>(arg.data()),
1196  arg.size() * sizeof(std::ranges::range_value_t<T>));
1197  }
1198  };
1199 
1200  for (auto & [tag, variant] : tag_dict)
1201  {
1202  result.push_back(static_cast<char>(tag / 256));
1203  result.push_back(static_cast<char>(tag % 256));
1204 
1205  result.push_back(detail::sam_tag_type_char[variant.index()]);
1206 
1207  if (!is_char<'\0'>(detail::sam_tag_type_char_extra[variant.index()]))
1208  result.push_back(detail::sam_tag_type_char_extra[variant.index()]);
1209 
1210  std::visit(stream_variant_fn, variant);
1211  }
1212 
1213  return result;
1214 }
1215 
1216 } // namespace seqan3
seqan3::format_bam::file_extensions
static std::vector< std::string > file_extensions
The valid file extensions for this format; note that you can modify this value.
Definition: format_bam.hpp:75
seqan3::views::take_exactly_or_throw
constexpr auto take_exactly_or_throw
A view adaptor that returns the first size elements from the underlying range and also exposes size i...
Definition: take_exactly.hpp:91
seqan3::gap
The alphabet of a gap character '-'.
Definition: gap.hpp:37
seqan3::alignment_file_output_options
The options type defines various option members that influence the behavior of all or some formats.
Definition: output_options.hpp:23
std::string::resize
T resize(T... args)
misc.hpp
Provides helper data structures for the seqan3::alignment_file_output.
bit_manipulation.hpp
Provides utility functions for bit twiddling.
take_until.hpp
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
std::span
std::string
sequence_container
A more refined container concept than seqan3::container.
std::string_view
tuple.hpp
Provides seqan3::tuple_like.
sam_tag_dictionary.hpp
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
std::pair
std::vector::reserve
T reserve(T... args)
seqan3::format_error
Thrown if information given to output format didn't match expectations.
Definition: exception.hpp:85
vector
std::string::size
T size(T... args)
seqan3::format_bam::read_alignment_record
void read_alignment_record(stream_type &stream, alignment_file_input_options< seq_legal_alph_type > const &options, ref_seqs_type &ref_seqs, alignment_file_header< ref_ids_type > &header, seq_type &seq, qual_type &qual, id_type &id, offset_type &offset, ref_seq_type &ref_seq, ref_id_type &ref_id, ref_offset_type &ref_offset, align_type &align, cigar_type &cigar_vector, flag_type &flag, mapq_type &mapq, mate_type &mate, tag_dict_type &tag_dict, e_value_type &e_value, bit_score_type &bit_score)
Read from the specified stream and back-insert into the given field buffers.
Definition: format_bam.hpp:285
template_inspection.hpp
Provides seqan3::type_list and auxiliary type traits.
seqan3::sam_flag
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: misc.hpp:71
seqan3::to_rank
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:143
std::tuple
seqan3::alignment_file_input_options
The options type defines various option members that influence the behaviour of all or some formats.
Definition: input_options.hpp:24
seqan3::alignment_file_header::ref_ids
ref_ids_type & ref_ids()
The range of reference ids.
Definition: header.hpp:119
seqan3::format_bam::format_bam
format_bam() noexcept=default
Defaulted.
std::string::clear
T clear(T... args)
concepts
The Concepts library.
input_format_concept.hpp
Provides seqan3::alignment_file_input_format and auxiliary classes.
std::tie
T tie(T... args)
std::vector::push_back
T push_back(T... args)
seqan3::seq
constexpr sequenced_policy seq
Global execution policy object for sequenced execution policy.
Definition: execution.hpp:54
seqan3::value_type
Exposes the value_type of another type.
Definition: pre.hpp:58
slice.hpp
Provides seqan3::views::slice.
seqan3::sam_dna16
A 16 letter DNA alphabet, containing all IUPAC symbols minus the gap and plus an equality sign ('=').
Definition: sam_dna16.hpp:46
seqan3::views::move
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
seqan3::alignment_file_header
Stores the header information of alignment files.
Definition: header.hpp:33
range.hpp
Provides various transformation traits used by the range module.
core_language.hpp
Provides concepts for core language types and relations that don't have concepts in C++20 (yet).
seqan3::alignment_file_header::ref_dict
std::unordered_map< key_type, int32_t, std::hash< key_type >, detail::view_equality_fn > ref_dict
The mapping of reference id to position in the ref_ids() range and the ref_id_info range.
Definition: header.hpp:158
seqan3::cigar
The cigar semialphabet pairs a counter with a seqan3::cigar_op letter.
Definition: cigar.hpp:55
header.hpp
Provides the seqan3::alignment_file_header class.
std::array< uint8_t, 256 >
seqan3
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
detail.hpp
Auxiliary functions for the alignment IO.
std::istreambuf_iterator
output_options.hpp
Provides seqan3::alignment_file_output_options.
seqan3::pack_traits::size
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:116
tuple_like
Whether a type behaves like a tuple.
std::swap
T swap(T... args)
std::min
T min(T... args)
std::ostringstream
std::variant::index
T index(T... args)
ranges
Adaptations of concepts from the Ranges TS.
seqan3::sam_tag_dictionary
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:325
std::remove_reference_t
seqan3::format_bam
The BAM format.
Definition: format_bam.hpp:60
take_exactly.hpp
Provides seqan3::views::take_exactly and seqan3::views::take_exactly_or_throw.
seqan3::alignment_file_header::ref_id_info
std::vector< std::tuple< int32_t, std::string > > ref_id_info
The reference information. (used by the SAM/BAM format)
Definition: header.hpp:155
alphabet
The generic alphabet concept that covers most data types used in ranges.
std::ranges::begin
T begin(T... args)
predicate.hpp
Provides character predicates for tokenisation.
std
SeqAn specific customisations in the standard namespace.
seqan3::views::slice
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:141
ignore_output_iterator.hpp
Provides seqan3::detail::ignore_output_iterator for writing to null stream.
std::visit
T visit(T... args)
output_format_concept.hpp
Provides seqan3::alignment_file_output_format and auxiliary classes.
std::remove_cvref_t
sam_dna16.hpp
Provides seqan3::sam_dna16.
implicitly_convertible_to
Resolves to std::ranges::implicitly_convertible_to<type1, type2>().
std::optional
to_string.hpp
Auxiliary for pretty printing of exception messages.
std::ostringstream::str
T str(T... args)
seqan3::alphabet_base::assign_rank
constexpr derived_type & assign_rank(rank_type const c) noexcept
Assign from a numeric value.
Definition: alphabet_base.hpp:167
seqan3::format_bam::write_alignment_record
void write_alignment_record([[maybe_unused]] stream_type &stream, [[maybe_unused]] alignment_file_output_options const &options, [[maybe_unused]] header_type &&header, [[maybe_unused]] seq_type &&seq, [[maybe_unused]] qual_type &&qual, [[maybe_unused]] id_type &&id, [[maybe_unused]] int32_t const offset, [[maybe_unused]] ref_seq_type &&ref_seq, [[maybe_unused]] ref_id_type &&ref_id, [[maybe_unused]] std::optional< int32_t > ref_offset, [[maybe_unused]] align_type &&align, [[maybe_unused]] cigar_type &&cigar_vector, [[maybe_unused]] sam_flag const flag, [[maybe_unused]] uint8_t const mapq, [[maybe_unused]] mate_type &&mate, [[maybe_unused]] tag_dict_type &&tag_dict, [[maybe_unused]] double e_value, [[maybe_unused]] double bit_score)
Write the given fields to the specified stream.
Definition: format_bam.hpp:601
seqan3::views::repeat_n
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:94
std::end
T end(T... args)
seqan3::pack_traits::transform
seqan3::type_list< trait_t< pack_t >... > transform
Apply a transformation trait to every type in the pack and return a seqan3::type_list of the results.
Definition: traits.hpp:307
misc.hpp
Provides various utility functions.
input_options.hpp
Provides seqan3::alignment_file_input_options.
format_sam_base.hpp
Provides the seqan3::format_sam_base that can be inherited from.
misc.hpp
Provides various utility functions.
std::string::data
T data(T... args)
seqan3::pack_traits::count
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: traits.hpp:134
std::make_unsigned_t
std::tuple_size_v
T tuple_size_v
std::variant
string