SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
format_sam_base.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
14 #pragma once
15 
16 #include <seqan3/std/ranges>
17 #include <string>
18 #include <vector>
19 
31 
32 namespace seqan3::detail
33 {
34 
43 class format_sam_base
44 {
45 protected:
49  format_sam_base() = default;
50  format_sam_base(format_sam_base const &) = default;
51  format_sam_base & operator=(format_sam_base const &) = default;
52  format_sam_base(format_sam_base &&) = default;
53  format_sam_base & operator=(format_sam_base &&) = default;
54  ~format_sam_base() = default;
55 
57 
59  static constexpr std::array format_version{'1', '.', '6'};
60 
62  std::array<char, 316> arithmetic_buffer{}; // Doubles can be up to 316 characters
63 
65  bool header_was_written{false};
66 
68  bool ref_info_present_in_header{false};
69 
70  template <typename ref_id_type,
71  typename ref_id_tmp_type,
72  typename header_type,
73  typename ref_seqs_type>
74  void check_and_assign_ref_id(ref_id_type & ref_id,
75  ref_id_tmp_type & ref_id_tmp,
76  header_type & header,
77  ref_seqs_type & /*tag*/);
78 
79  template <typename align_type, typename ref_seqs_type>
80  void construct_alignment(align_type & align,
81  std::vector<cigar> & cigar_vector,
82  [[maybe_unused]] int32_t rid,
83  [[maybe_unused]] ref_seqs_type & ref_seqs,
84  [[maybe_unused]] int32_t ref_start,
85  size_t ref_length);
86 
87  void transfer_soft_clipping_to(std::vector<cigar> const & cigar_vector, int32_t & sc_begin, int32_t & sc_end) const;
88 
89  template <typename stream_view_type>
90  void read_field(stream_view_type && stream_view, detail::ignore_t const & SEQAN3_DOXYGEN_ONLY(target));
91 
92  template <typename stream_view_t>
93  void read_field(stream_view_t && stream_view, std::byte & byte_target);
94 
95  template <typename stream_view_type, std::ranges::forward_range target_range_type>
96  void read_field(stream_view_type && stream_view, target_range_type & target);
97 
98  template <typename stream_view_t, arithmetic arithmetic_target_type>
99  void read_field(stream_view_t && stream_view, arithmetic_target_type & arithmetic_target);
100 
101  template <typename stream_view_type, typename optional_value_type>
102  void read_field(stream_view_type && stream_view, std::optional<optional_value_type> & target);
103 
104  template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type>
105  void read_header(stream_view_type && stream_view,
106  sam_file_header<ref_ids_type> & hdr,
107  ref_seqs_type & /*ref_id_to_pos_map*/);
108 
109  template <typename stream_t, typename ref_ids_type>
110  void write_header(stream_t & stream,
111  sam_file_output_options const & options,
112  sam_file_header<ref_ids_type> & header);
113 };
114 
125 template <typename ref_id_type,
126  typename ref_id_tmp_type,
127  typename header_type,
128  typename ref_seqs_type>
129 inline void format_sam_base::check_and_assign_ref_id(ref_id_type & ref_id,
130  ref_id_tmp_type & ref_id_tmp,
131  header_type & header,
132  ref_seqs_type & /*tag*/)
133 {
134  if (!std::ranges::empty(ref_id_tmp)) // otherwise the std::optional will not be filled
135  {
136  auto search = header.ref_dict.find(ref_id_tmp);
137 
138  if (search == header.ref_dict.end())
139  {
140  if constexpr(detail::decays_to_ignore_v<ref_seqs_type>) // no reference information given
141  {
142  if (ref_info_present_in_header)
143  {
144  throw format_error{"Unknown reference id found in record which is not present in the header."};
145  }
146  else
147  {
148  header.ref_ids().push_back(ref_id_tmp);
149  auto pos = std::ranges::size(header.ref_ids()) - 1;
150  header.ref_dict[header.ref_ids()[pos]] = pos;
151  ref_id = pos;
152  }
153  }
154  else
155  {
156  throw format_error{"Unknown reference id found in record which is not present in the given ids."};
157  }
158  }
159  else
160  {
161  ref_id = search->second;
162  }
163  }
164 }
165 
171 inline void format_sam_base::transfer_soft_clipping_to(std::vector<cigar> const & cigar_vector,
172  int32_t & sc_begin,
173  int32_t & sc_end) const
174 {
175  // Checks if the given index in the cigar vector is a soft clip.
176  auto soft_clipping_at = [&] (size_t const index) { return cigar_vector[index] == 'S'_cigar_operation; };
177  // Checks if the given index in the cigar vector is a hard clip.
178  auto hard_clipping_at = [&] (size_t const index) { return cigar_vector[index] == 'H'_cigar_operation; };
179  // Checks if the given cigar vector as at least min_size many elements.
180  auto vector_size_at_least = [&] (size_t const min_size) { return cigar_vector.size() >= min_size; };
181  // Returns the cigar count of the ith cigar element in the given cigar vector.
182  auto cigar_count_at = [&] (size_t const index) { return get<0>(cigar_vector[index]); };
183 
184  // check for soft clipping at the first two positions
185  if (vector_size_at_least(1) && soft_clipping_at(0))
186  sc_begin = cigar_count_at(0);
187  else if (vector_size_at_least(2) && hard_clipping_at(0) && soft_clipping_at(1))
188  sc_begin = cigar_count_at(1);
189 
190  // Check for soft clipping at the last two positions. But only if the vector size has at least 2, respectively
191  // 3 elements. Accordingly, if the following arithmetics overflow they are protected by the corresponding
192  // if expressions below.
193  auto last_index = cigar_vector.size() - 1;
194  auto second_last_index = last_index - 1;
195 
196  if (vector_size_at_least(2) && soft_clipping_at(last_index))
197  sc_end = cigar_count_at(last_index);
198  else if (vector_size_at_least(3) && hard_clipping_at(last_index) && soft_clipping_at(second_last_index))
199  sc_end = cigar_count_at(second_last_index);
200 }
201 
212 template <typename align_type, typename ref_seqs_type>
213 inline void format_sam_base::construct_alignment(align_type & align,
214  std::vector<cigar> & cigar_vector,
215  [[maybe_unused]] int32_t rid,
216  [[maybe_unused]] ref_seqs_type & ref_seqs,
217  [[maybe_unused]] int32_t ref_start,
218  size_t ref_length)
219 {
220  if (rid > -1 && ref_start > -1 && // read is mapped
221  !cigar_vector.empty() && // alignment field was not empty
222  !std::ranges::empty(get<1>(align))) // seq field was not empty
223  {
224  if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
225  {
226  assert(static_cast<size_t>(ref_start + ref_length) <= std::ranges::size(ref_seqs[rid]));
227  // copy over unaligned reference sequence part
228  assign_unaligned(get<0>(align), ref_seqs[rid] | views::slice(ref_start, ref_start + ref_length));
229  }
230  else
231  {
233  auto dummy_seq = views::repeat_n(std::ranges::range_value_t<unaligned_t>{}, ref_length)
234  | std::views::transform(detail::access_restrictor_fn{});
235  static_assert(std::same_as<unaligned_t, decltype(dummy_seq)>,
236  "No reference information was given so the type of the first alignment tuple position"
237  "must have an unaligned sequence type of a dummy sequence ("
238  "views::repeat_n(dna5{}, size_t{}) | "
239  "std::views::transform(detail::access_restrictor_fn{}))");
240 
241  assign_unaligned(get<0>(align), dummy_seq); // assign dummy sequence
242  }
243 
244  // insert gaps according to the cigar information
245  detail::alignment_from_cigar(align, cigar_vector);
246  }
247  else // not enough information for an alignment, assign an empty view/dummy_sequence
248  {
249  if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>) // reference info given
250  {
251  assert(std::ranges::size(ref_seqs) > 0); // we assume that the given ref info is not empty
252  assign_unaligned(get<0>(align), ref_seqs[0] | views::slice(0, 0));
253  }
254  else
255  {
257  assign_unaligned(get<0>(align), views::repeat_n(std::ranges::range_value_t<unaligned_t>{}, 0)
258  | std::views::transform(detail::access_restrictor_fn{}));
259  }
260  }
261 }
262 
269 template <typename stream_view_type>
270 inline void format_sam_base::read_field(stream_view_type && stream_view,
271  detail::ignore_t const & SEQAN3_DOXYGEN_ONLY(target))
272 {
273  detail::consume(stream_view);
274 }
275 
285 template <typename stream_view_t>
286 inline void format_sam_base::read_field(stream_view_t && stream_view, std::byte & byte_target)
287 {
288  // unfortunately std::from_chars only accepts char const * so we need a buffer.
289  auto [ignore, end] = std::ranges::copy(stream_view, arithmetic_buffer.data());
290  (void) ignore;
291 
292  uint8_t byte{};
293  // std::from_chars cannot directly parse into a std::byte
294  std::from_chars_result res = std::from_chars(arithmetic_buffer.begin(), end, byte, 16);
295 
296  if (res.ec == std::errc::invalid_argument || res.ptr != end)
297  throw format_error{std::string("[CORRUPTED SAM FILE] The string '") +
298  std::string(arithmetic_buffer.begin(), end) +
299  "' could not be cast into type uint8_t."};
300 
301  if (res.ec == std::errc::result_out_of_range)
302  throw format_error{std::string("[CORRUPTED SAM FILE] Casting '") + std::string(arithmetic_buffer.begin(), end) +
303  "' into type uint8_t would cause an overflow."};
304  byte_target = std::byte{byte};
305 }
306 
314 template <typename stream_view_type, std::ranges::forward_range target_range_type>
315 inline void format_sam_base::read_field(stream_view_type && stream_view, target_range_type & target)
316 {
317  using target_range_value_t = std::ranges::range_value_t<target_range_type>;
318  using begin_iterator_t = std::ranges::iterator_t<stream_view_type>;
319  using end_iterator_t = std::ranges::sentinel_t<stream_view_type>;
320 
321  // Note that we need to cache the begin iterator since the stream_view is an input range that may be consuming
322  // and in that case might read `past-the-end` on a second call of std::ranges::begin.
323  if (auto it = std::ranges::begin(stream_view); it != std::ranges::end(stream_view))
324  {
325  // Write to target if field does not represent an empty string, denoted as single '*' character.
326  if (char c = *it; !(++it == std::ranges::end(stream_view) && c == '*'))
327  {
328  target.push_back(seqan3::assign_char_to(c, target_range_value_t{}));
329  std::ranges::copy(std::ranges::subrange<begin_iterator_t, end_iterator_t>{it, std::ranges::end(stream_view)}
330  | views::char_to<target_range_value_t>,
331  std::cpp20::back_inserter(target));
332  }
333  }
334 }
335 
346 template <typename stream_view_t, arithmetic arithmetic_target_type>
347 inline void format_sam_base::read_field(stream_view_t && stream_view, arithmetic_target_type & arithmetic_target)
348 {
349  // unfortunately std::from_chars only accepts char const * so we need a buffer.
350  auto [ignore, end] = std::ranges::copy(stream_view, arithmetic_buffer.data());
351  (void) ignore;
352  std::from_chars_result res = std::from_chars(arithmetic_buffer.begin(), end, arithmetic_target);
353 
354  if (res.ec == std::errc::invalid_argument || res.ptr != end)
355  throw format_error{std::string("[CORRUPTED SAM FILE] The string '") +
356  std::string(arithmetic_buffer.begin(), end) +
357  "' could not be cast into type " +
358  detail::type_name_as_string<arithmetic_target_type>};
359 
360  if (res.ec == std::errc::result_out_of_range)
361  throw format_error{std::string("[CORRUPTED SAM FILE] Casting '") + std::string(arithmetic_buffer.begin(), end) +
362  "' into type " + detail::type_name_as_string<arithmetic_target_type> +
363  " would cause an overflow."};
364 }
365 
376 template <typename stream_view_type, typename optional_value_type>
377 inline void format_sam_base::read_field(stream_view_type && stream_view, std::optional<optional_value_type> & target)
378 {
379  optional_value_type tmp;
380  read_field(std::forward<stream_view_type>(stream_view), tmp);
381  target = tmp;
382 }
383 
400 template <typename stream_view_type, typename ref_ids_type, typename ref_seqs_type>
401 inline void format_sam_base::read_header(stream_view_type && stream_view,
402  sam_file_header<ref_ids_type> & hdr,
403  ref_seqs_type & /*ref_id_to_pos_map*/)
404 {
405  auto it = std::ranges::begin(stream_view);
406  auto end = std::ranges::end(stream_view);
407  std::vector<char> string_buffer{};
408 
409  auto make_tag = [] (uint8_t char1, uint8_t char2) constexpr
410  {
411  return static_cast<uint16_t>(char1) | (static_cast<uint16_t>(char2) << CHAR_BIT);
412  };
413 
414  std::array<char, 2> raw_tag{};
415 
416  auto parse_and_make_tag = [&] ()
417  {
418  raw_tag[0] = *it;
419  ++it;
420  raw_tag[1] = *it;
421  ++it;
422  return make_tag(raw_tag[0], raw_tag[1]);
423  };
424 
425  auto take_until_predicate = [&it, &string_buffer] (auto const & predicate)
426  {
427  string_buffer.clear();
428  while (!predicate(*it))
429  {
430  string_buffer.push_back(*it);
431  ++it;
432  }
433  };
434 
435  auto skip_until_predicate = [&it] (auto const & predicate)
436  {
437  while (!predicate(*it))
438  ++it;
439  };
440 
441  auto parse_tag_value = [&] (auto & value) // helper function to parse the next tag value
442  {
443  skip_until_predicate(is_char<':'>);
444  ++it; // skip :
445  take_until_predicate(is_char<'\t'> || is_char<'\n'>);
446  read_field(string_buffer, value);
447  };
448 
449  // Some tags are not parsed individually. Instead, these are simply copied into a std::string.
450  // Multiple tags must be separated by a `\t`, hence we prepend a tab to the string, except the first time.
451  // Alternatively, we could always append a `\t`, but this would have the side effect that we might need to trim a
452  // trailing tab after parsing all tags via `pop_back()`.
453  // Unfortunately, we do not know when we are parsing the last tag (and in this case just not append a tab),
454  // because even though we can check if the line ends in a `\n`, it is not guaranteed that the last tag of the
455  // line is passed to this lambda. For example, the line might end with a tag that is properly parsed, such as `ID`.
456  auto parse_and_append_unhandled_tag_to_string = [&] (std::string & value, std::array<char, 2> raw_tag)
457  {
458  take_until_predicate(is_char<'\t'> || is_char<'\n'>);
459  if (!value.empty())
460  value.push_back('\t');
461  value.push_back(raw_tag[0]);
462  value.push_back(raw_tag[1]);
463  read_field(string_buffer, value);
464  };
465 
466  auto print_cerr_of_unspported_tag = [&it] (char const * const header_tag, std::array<char, 2> raw_tag)
467  {
468  std::cerr << "Unsupported SAM header tag in @" << header_tag << ": " << raw_tag[0] << raw_tag[1] << '\n';
469  };
470 
471  while (it != end && is_char<'@'>(*it))
472  {
473  ++it; // skip @
474 
475  switch (parse_and_make_tag())
476  {
477  case make_tag('H', 'D'): // HD (header) tag
478  {
479  // All tags can appear in any order, VN is the only required tag
480  while (is_char<'\t'>(*it))
481  {
482  ++it; // skip tab
483  std::string * header_entry{nullptr};
484 
485  switch (parse_and_make_tag())
486  {
487  case make_tag('V', 'N'): // parse required VN (version) tag
488  {
489  header_entry = std::addressof(hdr.format_version);
490  break;
491  }
492  case make_tag('S', 'O'): // SO (sorting) tag
493  {
494  header_entry = std::addressof(hdr.sorting);
495  break;
496  }
497  case make_tag('S', 'S'): // SS (sub-order) tag
498  {
499  header_entry = std::addressof(hdr.subsorting);
500  break;
501  }
502  case make_tag('G', 'O'): // GO (grouping) tag
503  {
504  header_entry = std::addressof(hdr.grouping);
505  break;
506  }
507  default: // unsupported header tag
508  {
509  print_cerr_of_unspported_tag("HD", raw_tag);
510  }
511  }
512 
513  if (header_entry != nullptr)
514  parse_tag_value(*header_entry);
515  else
516  skip_until_predicate(is_char<'\t'> || is_char<'\n'>);
517  }
518  ++it; // skip newline
519 
520  if (hdr.format_version.empty())
521  throw format_error{std::string{"The required VN tag in @HD is missing."}};
522 
523  break;
524  }
525 
526  case make_tag('S', 'Q'): // SQ (sequence dictionary) tag
527  {
528  ref_info_present_in_header = true;
529  std::ranges::range_value_t<decltype(hdr.ref_ids())> id;
530  std::optional<int32_t> sequence_length{};
532 
533  // All tags can appear in any order, SN and LN are required tags
534  while (is_char<'\t'>(*it))
535  {
536  ++it; // skip tab
537 
538  switch (parse_and_make_tag())
539  {
540  case make_tag('S', 'N'): // parse required SN (sequence name) tag
541  {
542  parse_tag_value(id);
543  break;
544  }
545  case make_tag('L', 'N'): // parse required LN (length) tag
546  {
547  parse_tag_value(sequence_length);
548  break;
549  }
550  default: // Any other tag
551  {
552  parse_and_append_unhandled_tag_to_string(get<1>(info), raw_tag);
553  }
554  }
555  }
556  ++it; // skip newline
557 
558  if (id.empty())
559  throw format_error{std::string{"The required SN tag in @SQ is missing."}};
560  if (!sequence_length.has_value())
561  throw format_error{std::string{"The required LN tag in @SQ is missing."}};
562  if (sequence_length.value() <= 0)
563  throw format_error{std::string{"The value of LN in @SQ must be positive."}};
564 
565  get<0>(info) = sequence_length.value();
566  // If reference information was given, the ids exist and we can fill ref_dict directly.
567  // If not, we need to update the ids first and fill the reference dictionary afterwards.
568  if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>) // reference information given
569  {
570  auto id_it = hdr.ref_dict.find(id);
571 
572  if (id_it == hdr.ref_dict.end())
573  throw format_error{detail::to_string("Unknown reference name '", id, "' found in SAM header ",
574  "(header.ref_ids(): ", hdr.ref_ids(), ").")};
575 
576  auto & given_ref_info = hdr.ref_id_info[id_it->second];
577 
578  if (std::get<0>(given_ref_info) != std::get<0>(info))
579  throw format_error{"Provided and header-based reference length differ."};
580 
581  hdr.ref_id_info[id_it->second] = std::move(info);
582  }
583  else
584  {
585  static_assert(!detail::is_type_specialisation_of_v<decltype(hdr.ref_ids()), std::deque>,
586  "The range over reference ids must be of type std::deque such that pointers are not "
587  "invalidated.");
588 
589  hdr.ref_ids().push_back(id);
590  hdr.ref_id_info.push_back(info);
591  hdr.ref_dict[(hdr.ref_ids())[(hdr.ref_ids()).size() - 1]] = (hdr.ref_ids()).size() - 1;
592  }
593  break;
594  }
595 
596  case make_tag('R', 'G'): // RG (read group) tag
597  {
599 
600  // All tags can appear in any order, SN and LN are required tags
601  while (is_char<'\t'>(*it))
602  {
603  ++it; // skip tab
604 
605  switch (parse_and_make_tag())
606  {
607  case make_tag('I', 'D'): // parse required ID tag
608  {
609  parse_tag_value(get<0>(tmp));
610  break;
611  }
612  default: // Any other tag
613  {
614  parse_and_append_unhandled_tag_to_string(get<1>(tmp), raw_tag);
615  }
616  }
617  }
618  ++it; // skip newline
619 
620  if (get<0>(tmp).empty())
621  throw format_error{std::string{"The required ID tag in @RG is missing."}};
622 
623  hdr.read_groups.emplace_back(std::move(tmp));
624  break;
625  }
626 
627  case make_tag('P', 'G'): // PG (program) tag
628  {
629  typename sam_file_header<ref_ids_type>::program_info_t tmp{};
630 
631  // All tags can appear in any order, ID is the only required tag
632  while (is_char<'\t'>(*it))
633  {
634  ++it; // skip tab
635  std::string * program_info_entry{nullptr};
636 
637  switch (parse_and_make_tag())
638  {
639  case make_tag('I', 'D'): // read required ID tag
640  {
641  program_info_entry = std::addressof(tmp.id);
642  break;
643  }
644  case make_tag('P', 'N'): // PN (program name) tag
645  {
646  program_info_entry = std::addressof(tmp.name);
647  break;
648  }
649  case make_tag('P', 'P'): // PP (previous program) tag
650  {
651  program_info_entry = std::addressof(tmp.previous);
652  break;
653  }
654  case make_tag('C', 'L'): // CL (command line) tag
655  {
656  program_info_entry = std::addressof(tmp.command_line_call);
657  break;
658  }
659  case make_tag('D', 'S'): // DS (description) tag
660  {
661  program_info_entry = std::addressof(tmp.description);
662  break;
663  }
664  case make_tag('V', 'N'): // VN (version) tag
665  {
666  program_info_entry = std::addressof(tmp.version);
667  break;
668  }
669  default: // unsupported header tag
670  {
671  print_cerr_of_unspported_tag("PG", raw_tag);
672  }
673  }
674 
675  if (program_info_entry != nullptr)
676  parse_tag_value(*program_info_entry);
677  else
678  skip_until_predicate(is_char<'\t'> || is_char<'\n'>);
679  }
680  ++it; // skip newline
681 
682  if (tmp.id.empty())
683  throw format_error{std::string{"The required ID tag in @PG is missing."}};
684 
685  hdr.program_infos.emplace_back(std::move(tmp));
686  break;
687  }
688 
689  case make_tag('C', 'O'): // CO (comment) tag
690  {
691  ++it; // skip tab
692  std::string tmp;
693  take_until_predicate(is_char<'\n'>);
694  read_field(string_buffer, tmp);
695  ++it; // skip newline
696  hdr.comments.emplace_back(std::move(tmp));
697  break;
698  }
699 
700  default:
701  throw format_error{std::string{"Illegal SAM header tag starting with:"} + *it};
702  }
703  }
704 }
705 
722 template <typename stream_t, typename ref_ids_type>
723 inline void format_sam_base::write_header(stream_t & stream,
724  sam_file_output_options const & options,
725  sam_file_header<ref_ids_type> & header)
726 {
727  // -----------------------------------------------------------------
728  // Check Header
729  // -----------------------------------------------------------------
730 
731  // (@HD) Check header line
732  // The format version string will be taken from the local member variable
733  if (!header.sorting.empty() &&
734  !(header.sorting == "unknown" ||
735  header.sorting == "unsorted" ||
736  header.sorting == "queryname" ||
737  header.sorting == "coordinate" ))
738  throw format_error{"SAM format error: The header.sorting member must be "
739  "one of [unknown, unsorted, queryname, coordinate]."};
740 
741  if (!header.grouping.empty() &&
742  !(header.grouping == "none" ||
743  header.grouping == "query" ||
744  header.grouping == "reference"))
745  throw format_error{"SAM format error: The header.grouping member must be "
746  "one of [none, query, reference]."};
747 
748  // (@SQ) Check Reference Sequence Dictionary lines
749 
750  // TODO
751 
752  // - sorting order be one of ...
753  // - grouping can be one of ...
754  // - reference names must be unique
755  // - ids of read groups must be unique
756  // - program ids need to be unique
757  // many more small semantic things, like fits REGEX
758 
759  // -----------------------------------------------------------------
760  // Write Header
761  // -----------------------------------------------------------------
762  std::cpp20::ostreambuf_iterator stream_it{stream};
763 
764  // (@HD) Write header line [required].
765  stream << "@HD\tVN:";
766  std::ranges::copy(format_version, stream_it);
767 
768  if (!header.sorting.empty())
769  stream << "\tSO:" << header.sorting;
770 
771  if (!header.subsorting.empty())
772  stream << "\tSS:" << header.subsorting;
773 
774  if (!header.grouping.empty())
775  stream << "\tGO:" << header.grouping;
776 
777  detail::write_eol(stream_it, options.add_carriage_return);
778 
779  // (@SQ) Write Reference Sequence Dictionary lines [required].
780  for (auto const & [ref_name, ref_info] : views::zip(header.ref_ids(), header.ref_id_info))
781  {
782  stream << "@SQ\tSN:";
783 
784  std::ranges::copy(ref_name, stream_it);
785 
786  stream << "\tLN:" << get<0>(ref_info);
787 
788  if (!get<1>(ref_info).empty())
789  stream << "\t" << get<1>(ref_info);
790 
791  detail::write_eol(stream_it, options.add_carriage_return);
792  }
793 
794  // Write read group (@RG) lines if specified.
795  for (auto const & read_group : header.read_groups)
796  {
797  stream << "@RG"
798  << "\tID:" << get<0>(read_group);
799 
800  if (!get<1>(read_group).empty())
801  stream << "\t" << get<1>(read_group);
802 
803  detail::write_eol(stream_it, options.add_carriage_return);
804  }
805 
806  // Write program (@PG) lines if specified.
807  for (auto const & program : header.program_infos)
808  {
809  stream << "@PG"
810  << "\tID:" << program.id;
811 
812  if (!program.name.empty())
813  stream << "\tPN:" << program.name;
814 
815  if (!program.command_line_call.empty())
816  stream << "\tCL:" << program.command_line_call;
817 
818  if (!program.previous.empty())
819  stream << "\tPP:" << program.previous;
820 
821  if (!program.description.empty())
822  stream << "\tDS:" << program.description;
823 
824  if (!program.version.empty())
825  stream << "\tVN:" << program.version;
826 
827  detail::write_eol(stream_it, options.add_carriage_return);
828  }
829 
830  // Write comment (@CO) lines if specified.
831  for (auto const & comment : header.comments)
832  {
833  stream << "@CO\t" << comment;
834  detail::write_eol(stream_it, options.add_carriage_return);
835  }
836 }
837 
838 } // namespace seqan3::detail
T addressof(T... args)
Provides seqan3::views::char_to.
T begin(T... args)
Provides various utility functions.
Provides seqan3::debug_stream and related types.
T empty(T... args)
T end(T... args)
T find(T... args)
T from_chars(T... args)
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition: concept.hpp:523
@ comment
Comment field of arbitrary content, usually a string.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ id
The identifier, usually a string.
auto search(queries_t &&queries, index_t const &index, configuration_t const &cfg=search_cfg::default_configuration)
Search a query or a range of queries in an index.
Definition: search.hpp:108
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:471
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:189
constexpr auto zip
A zip view.
Definition: zip.hpp:29
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:95
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:74
Provides various utility functions.
Auxiliary functions for the alignment IO.
T push_back(T... args)
Adaptations of concepts from the Ranges TS.
Provides the seqan3::sam_file_header class.
Provides seqan3::sam_file_output_format and auxiliary classes.
T size(T... args)
Provides traits to inspect some information of a type, for example its name.
Provides seqan3::views::repeat_n.
Provides seqan3::views::slice.
Provides seqan3::views::zip.