69 template <
typename ref_
id_type,
typename ref_
id_tmp_type,
typename header_type,
typename ref_seqs_type>
71 ref_id_tmp_type & ref_id_tmp,
77 template <
typename stream_view_type, std::ranges::forward_range target_range_type>
80 template <std::ranges::forward_range target_range_type>
83 template <arithmetic arithmetic_target_type>
86 template <
typename stream_view_type,
typename ref_
ids_type,
typename ref_seqs_type,
typename seq_legal_alph_type>
92 template <
typename stream_t,
typename header_type>
106template <
typename ref_
id_type,
typename ref_
id_tmp_type,
typename header_type,
typename ref_seqs_type>
108 ref_id_tmp_type & ref_id_tmp,
109 header_type & header,
112 if (!std::ranges::empty(ref_id_tmp))
114 auto search = header.ref_dict.find(ref_id_tmp);
116 if (
search == header.ref_dict.end())
118 if constexpr (detail::decays_to_ignore_v<ref_seqs_type>)
122 throw format_error{
"Unknown reference id found in record which is not present in the header."};
126 header.ref_ids().push_back(ref_id_tmp);
127 auto pos = std::ranges::size(header.ref_ids()) - 1;
128 header.ref_dict[header.ref_ids()[pos]] = pos;
134 throw format_error{
"Unknown reference id found in record which is not present in the given ids."};
152 auto soft_clipping_at = [&](
size_t const index)
154 return cigar_vector[index] ==
'S'_cigar_operation;
157 auto hard_clipping_at = [&](
size_t const index)
159 return cigar_vector[index] ==
'H'_cigar_operation;
162 auto vector_size_at_least = [&](
size_t const min_size)
164 return cigar_vector.
size() >= min_size;
167 auto cigar_count_at = [&](
size_t const index)
169 return get<0>(cigar_vector[index]);
173 if (vector_size_at_least(1) && soft_clipping_at(0))
174 sc_front = cigar_count_at(0);
175 else if (vector_size_at_least(2) && hard_clipping_at(0) && soft_clipping_at(1))
176 sc_front = cigar_count_at(1);
188template <
typename stream_view_type, std::ranges::forward_range target_range_type>
191 using target_range_value_t = std::ranges::range_value_t<target_range_type>;
192 using begin_iterator_t = std::ranges::iterator_t<stream_view_type>;
193 using end_iterator_t = std::ranges::sentinel_t<stream_view_type>;
200 if (
char c = *it; !(++it == std::ranges::end(stream_view) && c ==
'*'))
203 std::ranges::copy(std::ranges::subrange<begin_iterator_t, end_iterator_t>{it, std::ranges::end(stream_view)}
204 | views::char_to<target_range_value_t>,
216template <std::ranges::forward_range target_range_type>
219 if (str.
size() == 1 && str[0] ==
'*')
222 if constexpr (std::assignable_from<target_range_type, std::string_view>)
228 target.resize(str.
size());
229 for (
size_t i = 0; i < str.
size(); ++i)
230 target[i] =
assign_char_to(str[i], std::ranges::range_value_t<target_range_type>{});
243template <arithmetic arithmetic_target_type>
245 arithmetic_target_type & arithmetic_target)
249 if (res.ec == std::errc::invalid_argument || res.ptr != str.
end())
251 +
"' could not be cast into type " + detail::type_name_as_string<arithmetic_target_type>};
253 if (res.ec == std::errc::result_out_of_range)
255 +
"' into type " + detail::type_name_as_string<arithmetic_target_type>
256 +
" would cause an overflow."};
278template <
typename stream_view_type,
typename ref_
ids_type,
typename ref_seqs_type,
typename seq_legal_alph_type>
285 auto end = std::ranges::end(stream_view);
288 auto make_tag = [](uint8_t char1, uint8_t char2)
constexpr
290 return static_cast<uint16_t
>(char1) | (
static_cast<uint16_t
>(char2) << CHAR_BIT);
295 auto parse_and_make_tag = [&]()
301 return make_tag(raw_tag[0], raw_tag[1]);
304 auto take_until_predicate = [&it, &string_buffer](
auto const & predicate)
306 string_buffer.clear();
307 while (!predicate(*it))
309 string_buffer.push_back(*it);
314 auto skip_until_predicate = [&it](
auto const & predicate)
316 while (!predicate(*it))
320 auto copy_next_tag_value_into_buffer = [&]()
322 skip_until_predicate(
is_char<
':'>);
344 auto print_cerr_of_unspported_tag = [&options](
char const *
const header_tag,
std::array<char, 2> raw_tag)
349 *options.
stream_warnings_to <<
"Unsupported SAM header tag in @" << header_tag <<
": " << raw_tag[0]
350 << raw_tag[1] <<
'\n';
353 while (it != end &&
is_char<
'@'>(*it))
357 switch (parse_and_make_tag())
359 case make_tag(
'H',
'D'):
367 switch (parse_and_make_tag())
369 case make_tag(
'V',
'N'):
374 case make_tag(
'S',
'O'):
379 case make_tag(
'S',
'S'):
384 case make_tag(
'G',
'O'):
391 print_cerr_of_unspported_tag(
"HD", raw_tag);
395 if (header_entry !=
nullptr)
397 copy_next_tag_value_into_buffer();
411 case make_tag(
'S',
'Q'):
414 std::ranges::range_value_t<
decltype(hdr.
ref_ids())>
id;
423 switch (parse_and_make_tag())
425 case make_tag(
'S',
'N'):
427 copy_next_tag_value_into_buffer();
431 case make_tag(
'L',
'N'):
433 int32_t sequence_length_tmp{};
434 copy_next_tag_value_into_buffer();
436 sequence_length = sequence_length_tmp;
441 parse_and_append_unhandled_tag_to_string(get<1>(info), raw_tag);
449 if (!sequence_length.has_value())
451 if (sequence_length.value() <= 0)
454 get<0>(info) = sequence_length.value();
457 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
464 "' found in SAM header ",
465 "(header.ref_ids(): ",
469 auto & given_ref_info = hdr.
ref_id_info[id_it->second];
471 if (std::get<0>(given_ref_info) != std::get<0>(info))
472 throw format_error{
"Provided and header-based reference length differ."};
478 static_assert(!detail::is_type_specialisation_of_v<
decltype(hdr.
ref_ids()),
std::deque>,
479 "The range over reference ids must be of type std::deque such that pointers are not "
489 case make_tag(
'R',
'G'):
498 switch (parse_and_make_tag())
500 case make_tag(
'I',
'D'):
502 copy_next_tag_value_into_buffer();
508 parse_and_append_unhandled_tag_to_string(get<1>(tmp), raw_tag);
514 if (get<0>(tmp).empty())
521 case make_tag(
'P',
'G'):
531 switch (parse_and_make_tag())
533 case make_tag(
'I',
'D'):
538 case make_tag(
'P',
'N'):
543 case make_tag(
'P',
'P'):
548 case make_tag(
'C',
'L'):
553 case make_tag(
'D',
'S'):
558 case make_tag(
'V',
'N'):
565 print_cerr_of_unspported_tag(
"PG", raw_tag);
569 if (program_info_entry !=
nullptr)
571 copy_next_tag_value_into_buffer();
586 case make_tag(
'C',
'O'):
590 take_until_predicate(
is_char<
'\n'>);
619template <
typename stream_t,
typename header_type>
623 if constexpr (!detail::decays_to_ignore_v<header_type>)
631 if (!header.sorting.empty()
632 && !(header.sorting ==
"unknown" || header.sorting ==
"unsorted" || header.sorting ==
"queryname"
633 || header.sorting ==
"coordinate"))
634 throw format_error{
"SAM format error: The header.sorting member must be "
635 "one of [unknown, unsorted, queryname, coordinate]."};
637 if (!header.grouping.empty()
638 && !(header.grouping ==
"none" || header.grouping ==
"query" || header.grouping ==
"reference"))
639 throw format_error{
"SAM format error: The header.grouping member must be "
640 "one of [none, query, reference]."};
659 stream <<
"@HD\tVN:";
662 if (!header.sorting.empty())
663 stream <<
"\tSO:" << header.sorting;
665 if (!header.subsorting.empty())
666 stream <<
"\tSS:" << header.subsorting;
668 if (!header.grouping.empty())
669 stream <<
"\tGO:" << header.grouping;
674 for (
auto const & [ref_name, ref_info] :
views::zip(header.ref_ids(), header.ref_id_info))
676 stream <<
"@SQ\tSN:";
680 stream <<
"\tLN:" << get<0>(ref_info);
682 if (!get<1>(ref_info).empty())
683 stream <<
"\t" << get<1>(ref_info);
689 for (
auto const & read_group : header.read_groups)
692 <<
"\tID:" << get<0>(read_group);
694 if (!get<1>(read_group).empty())
695 stream <<
"\t" << get<1>(read_group);
701 for (
auto const & program : header.program_infos)
704 <<
"\tID:" << program.id;
706 if (!program.name.empty())
707 stream <<
"\tPN:" << program.name;
709 if (!program.command_line_call.empty())
710 stream <<
"\tCL:" << program.command_line_call;
712 if (!program.previous.empty())
713 stream <<
"\tPP:" << program.previous;
715 if (!program.description.empty())
716 stream <<
"\tDS:" << program.description;
718 if (!program.version.empty())
719 stream <<
"\tVN:" << program.version;
725 for (
auto const &
comment : header.comments)
T back_inserter(T... args)
Provides seqan3::views::char_to.
Provides various utility functions.
T emplace_back(T... args)
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition alphabet/concept.hpp:521
constexpr void write_eol(it_t &it, bool const add_cr)
Write "\n" or "\r\n" to the stream iterator, depending on arguments.
Definition io/detail/misc.hpp:46
@ comment
Comment field of arbitrary content, usually a string.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
constexpr auto is_char
Checks whether a given letter is the same as the template non-type argument.
Definition predicate.hpp:60
seqan::stl::views::zip zip
A view adaptor that takes several views and returns tuple-like values from every i-th element of each...
Definition zip.hpp:24
Provides various utility functions.
Auxiliary functions for the SAM IO.
The internal SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
std::string to_string(value_type &&... values)
Streams all parameters via the seqan3::debug_stream and returns a concatenated string.
Definition to_string.hpp:26
Provides seqan3::debug_stream and related types.
Provides seqan3::views::repeat_n.
Provides seqan3::views::slice.
Object storing information for a search (of a search scheme).
Definition search_scheme_precomputed.hpp:25
The options type defines various option members that influence the behavior of all or some formats.
Definition sam_file/output_options.hpp:23
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition sam_file/output_options.hpp:27
Stores information of the program/tool that was used to create a SAM/BAM file.
Definition header.hpp:30
Provides traits to inspect some information of a type, for example its name.
Provides seqan3::views::zip.