33namespace seqan3::detail
50 format_sam_base() =
default;
51 format_sam_base(format_sam_base
const &) =
default;
52 format_sam_base & operator=(format_sam_base
const &) =
default;
53 format_sam_base(format_sam_base &&) =
default;
54 format_sam_base & operator=(format_sam_base &&) =
default;
55 ~format_sam_base() =
default;
60 static constexpr std::array format_version{
'1',
'.',
'6'};
66 bool header_was_written{
false};
69 bool ref_info_present_in_header{
false};
71 template <
typename ref_
id_type,
typename ref_
id_tmp_type,
typename header_type,
typename ref_seqs_type>
72 void check_and_assign_ref_id(ref_id_type &
ref_id,
73 ref_id_tmp_type & ref_id_tmp,
79 template <
typename stream_view_type, std::ranges::forward_range target_range_type>
80 void read_forward_range_field(stream_view_type && stream_view, target_range_type & target);
82 template <std::ranges::forward_range target_range_type>
83 void read_forward_range_field(
std::string_view const str, target_range_type & target);
85 template <arithmetic arithmetic_target_type>
86 void read_arithmetic_field(
std::string_view const & str, arithmetic_target_type & arithmetic_target);
88 template <
typename stream_view_type,
typename ref_
ids_type,
typename ref_seqs_type>
89 void read_header(stream_view_type && stream_view,
90 sam_file_header<ref_ids_type> & hdr,
93 template <
typename stream_t,
typename header_type>
94 void write_header(stream_t & stream, sam_file_output_options
const & options, header_type & header);
107template <
typename ref_
id_type,
typename ref_
id_tmp_type,
typename header_type,
typename ref_seqs_type>
108inline void format_sam_base::check_and_assign_ref_id(ref_id_type &
ref_id,
109 ref_id_tmp_type & ref_id_tmp,
110 header_type & header,
113 if (!std::ranges::empty(ref_id_tmp))
115 auto search = header.ref_dict.find(ref_id_tmp);
117 if (search == header.ref_dict.end())
119 if constexpr (detail::decays_to_ignore_v<ref_seqs_type>)
121 if (ref_info_present_in_header)
123 throw format_error{
"Unknown reference id found in record which is not present in the header."};
127 header.ref_ids().push_back(ref_id_tmp);
128 auto pos = std::ranges::size(header.ref_ids()) - 1;
129 header.ref_dict[header.ref_ids()[pos]] = pos;
135 throw format_error{
"Unknown reference id found in record which is not present in the given ids."};
148inline int32_t format_sam_base::soft_clipping_at_front(
std::vector<cigar> const & cigar_vector)
const
153 auto soft_clipping_at = [&](
size_t const index)
155 return cigar_vector[index] ==
'S'_cigar_operation;
158 auto hard_clipping_at = [&](
size_t const index)
160 return cigar_vector[index] ==
'H'_cigar_operation;
163 auto vector_size_at_least = [&](
size_t const min_size)
165 return cigar_vector.
size() >= min_size;
168 auto cigar_count_at = [&](
size_t const index)
170 return get<0>(cigar_vector[index]);
174 if (vector_size_at_least(1) && soft_clipping_at(0))
175 sc_front = cigar_count_at(0);
176 else if (vector_size_at_least(2) && hard_clipping_at(0) && soft_clipping_at(1))
177 sc_front = cigar_count_at(1);
189template <
typename stream_view_type, std::ranges::forward_range target_range_type>
190inline void format_sam_base::read_forward_range_field(stream_view_type && stream_view, target_range_type & target)
192 using target_range_value_t = std::ranges::range_value_t<target_range_type>;
193 using begin_iterator_t = std::ranges::iterator_t<stream_view_type>;
194 using end_iterator_t = std::ranges::sentinel_t<stream_view_type>;
201 if (
char c = *it; !(++it == std::ranges::end(stream_view) && c ==
'*'))
204 std::ranges::copy(std::ranges::subrange<begin_iterator_t, end_iterator_t>{it, std::ranges::end(stream_view)}
205 | views::char_to<target_range_value_t>,
217template <std::ranges::forward_range target_range_type>
218inline void format_sam_base::read_forward_range_field(
std::string_view const str, target_range_type & target)
220 if (str.
size() == 1 && str[0] ==
'*')
223 if constexpr (std::assignable_from<target_range_type, std::string_view>)
229 target.resize(str.
size());
230 for (
size_t i = 0; i < str.
size(); ++i)
231 target[i] =
assign_char_to(str[i], std::ranges::range_value_t<target_range_type>{});
244template <arithmetic arithmetic_target_type>
245inline void format_sam_base::read_arithmetic_field(
std::string_view const & str,
246 arithmetic_target_type & arithmetic_target)
250 if (res.ec == std::errc::invalid_argument || res.ptr != str.
end())
252 +
"' could not be cast into type " + detail::type_name_as_string<arithmetic_target_type>};
254 if (res.ec == std::errc::result_out_of_range)
256 +
"' into type " + detail::type_name_as_string<arithmetic_target_type>
257 +
" would cause an overflow."};
276template <
typename stream_view_type,
typename ref_
ids_type,
typename ref_seqs_type>
277inline void format_sam_base::read_header(stream_view_type && stream_view,
278 sam_file_header<ref_ids_type> & hdr,
282 auto end = std::ranges::end(stream_view);
285 auto make_tag = [](uint8_t char1, uint8_t char2)
constexpr
287 return static_cast<uint16_t
>(char1) | (
static_cast<uint16_t
>(char2) << CHAR_BIT);
292 auto parse_and_make_tag = [&]()
298 return make_tag(raw_tag[0], raw_tag[1]);
301 auto take_until_predicate = [&it, &string_buffer](
auto const & predicate)
303 string_buffer.clear();
304 while (!predicate(*it))
306 string_buffer.push_back(*it);
311 auto skip_until_predicate = [&it](
auto const & predicate)
313 while (!predicate(*it))
317 auto copy_next_tag_value_into_buffer = [&]()
319 skip_until_predicate(
is_char<
':'>);
338 read_forward_range_field(string_buffer, value);
341 auto print_cerr_of_unspported_tag = [&it](
char const *
const header_tag,
std::array<char, 2> raw_tag)
343 std::cerr <<
"Unsupported SAM header tag in @" << header_tag <<
": " << raw_tag[0] << raw_tag[1] <<
'\n';
346 while (it != end &&
is_char<
'@'>(*it))
350 switch (parse_and_make_tag())
352 case make_tag(
'H',
'D'):
360 switch (parse_and_make_tag())
362 case make_tag(
'V',
'N'):
367 case make_tag(
'S',
'O'):
372 case make_tag(
'S',
'S'):
377 case make_tag(
'G',
'O'):
384 print_cerr_of_unspported_tag(
"HD", raw_tag);
388 if (header_entry !=
nullptr)
390 copy_next_tag_value_into_buffer();
391 read_forward_range_field(string_buffer, *header_entry);
398 if (hdr.format_version.empty())
399 throw format_error{
std::string{
"The required VN tag in @HD is missing."}};
404 case make_tag(
'S',
'Q'):
406 ref_info_present_in_header =
true;
407 std::ranges::range_value_t<
decltype(hdr.ref_ids())>
id;
416 switch (parse_and_make_tag())
418 case make_tag(
'S',
'N'):
420 copy_next_tag_value_into_buffer();
421 read_forward_range_field(string_buffer,
id);
424 case make_tag(
'L',
'N'):
426 int32_t sequence_length_tmp{};
427 copy_next_tag_value_into_buffer();
428 read_arithmetic_field(string_buffer, sequence_length_tmp);
429 sequence_length = sequence_length_tmp;
434 parse_and_append_unhandled_tag_to_string(get<1>(info), raw_tag);
441 throw format_error{
std::string{
"The required SN tag in @SQ is missing."}};
442 if (!sequence_length.has_value())
443 throw format_error{
std::string{
"The required LN tag in @SQ is missing."}};
444 if (sequence_length.value() <= 0)
445 throw format_error{
std::string{
"The value of LN in @SQ must be positive."}};
447 get<0>(info) = sequence_length.value();
450 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
452 auto id_it = hdr.ref_dict.
find(
id);
454 if (id_it == hdr.ref_dict.end())
455 throw format_error{detail::to_string(
"Unknown reference name '",
457 "' found in SAM header ",
458 "(header.ref_ids(): ",
462 auto & given_ref_info = hdr.ref_id_info[id_it->second];
464 if (std::get<0>(given_ref_info) != std::get<0>(info))
465 throw format_error{
"Provided and header-based reference length differ."};
467 hdr.ref_id_info[id_it->second] = std::move(info);
471 static_assert(!detail::is_type_specialisation_of_v<
decltype(hdr.ref_ids()),
std::deque>,
472 "The range over reference ids must be of type std::deque such that pointers are not "
475 hdr.ref_ids().push_back(
id);
476 hdr.ref_id_info.push_back(info);
477 hdr.ref_dict[(hdr.ref_ids())[(hdr.ref_ids()).
size() - 1]] = (hdr.ref_ids()).
size() - 1;
482 case make_tag(
'R',
'G'):
491 switch (parse_and_make_tag())
493 case make_tag(
'I',
'D'):
495 copy_next_tag_value_into_buffer();
496 read_forward_range_field(string_buffer, get<0>(tmp));
501 parse_and_append_unhandled_tag_to_string(get<1>(tmp), raw_tag);
507 if (get<0>(tmp).
empty())
508 throw format_error{
std::string{
"The required ID tag in @RG is missing."}};
510 hdr.read_groups.emplace_back(std::move(tmp));
514 case make_tag(
'P',
'G'):
524 switch (parse_and_make_tag())
526 case make_tag(
'I',
'D'):
531 case make_tag(
'P',
'N'):
536 case make_tag(
'P',
'P'):
541 case make_tag(
'C',
'L'):
546 case make_tag(
'D',
'S'):
551 case make_tag(
'V',
'N'):
558 print_cerr_of_unspported_tag(
"PG", raw_tag);
562 if (program_info_entry !=
nullptr)
564 copy_next_tag_value_into_buffer();
565 read_forward_range_field(string_buffer, *program_info_entry);
573 throw format_error{
std::string{
"The required ID tag in @PG is missing."}};
575 hdr.program_infos.emplace_back(std::move(tmp));
579 case make_tag(
'C',
'O'):
583 take_until_predicate(
is_char<
'\n'>);
584 read_forward_range_field(string_buffer, tmp);
586 hdr.comments.emplace_back(std::move(tmp));
591 throw format_error{
std::string{
"Illegal SAM header tag starting with:"} + *it};
612template <
typename stream_t,
typename header_type>
614format_sam_base::write_header(stream_t & stream, sam_file_output_options
const & options, header_type & header)
616 if constexpr (!detail::decays_to_ignore_v<header_type>)
624 if (!header.sorting.empty()
625 && !(header.sorting ==
"unknown" || header.sorting ==
"unsorted" || header.sorting ==
"queryname"
626 || header.sorting ==
"coordinate"))
627 throw format_error{
"SAM format error: The header.sorting member must be "
628 "one of [unknown, unsorted, queryname, coordinate]."};
630 if (!header.grouping.empty()
631 && !(header.grouping ==
"none" || header.grouping ==
"query" || header.grouping ==
"reference"))
632 throw format_error{
"SAM format error: The header.grouping member must be "
633 "one of [none, query, reference]."};
652 stream <<
"@HD\tVN:";
655 if (!header.sorting.empty())
656 stream <<
"\tSO:" << header.sorting;
658 if (!header.subsorting.empty())
659 stream <<
"\tSS:" << header.subsorting;
661 if (!header.grouping.empty())
662 stream <<
"\tGO:" << header.grouping;
664 detail::write_eol(stream_it, options.add_carriage_return);
667 for (
auto const & [ref_name, ref_info] :
views::zip(header.ref_ids(), header.ref_id_info))
669 stream <<
"@SQ\tSN:";
673 stream <<
"\tLN:" << get<0>(ref_info);
675 if (!get<1>(ref_info).
empty())
676 stream <<
"\t" << get<1>(ref_info);
678 detail::write_eol(stream_it, options.add_carriage_return);
682 for (
auto const & read_group : header.read_groups)
685 <<
"\tID:" << get<0>(read_group);
687 if (!get<1>(read_group).
empty())
688 stream <<
"\t" << get<1>(read_group);
690 detail::write_eol(stream_it, options.add_carriage_return);
694 for (
auto const & program : header.program_infos)
697 <<
"\tID:" << program.id;
699 if (!program.name.empty())
700 stream <<
"\tPN:" << program.name;
702 if (!program.command_line_call.empty())
703 stream <<
"\tCL:" << program.command_line_call;
705 if (!program.previous.empty())
706 stream <<
"\tPP:" << program.previous;
708 if (!program.description.empty())
709 stream <<
"\tDS:" << program.description;
711 if (!program.version.empty())
712 stream <<
"\tVN:" << program.version;
714 detail::write_eol(stream_it, options.add_carriage_return);
718 for (
auto const &
comment : header.comments)
721 detail::write_eol(stream_it, options.add_carriage_return);
T back_inserter(T... args)
Provides seqan3::views::char_to.
Provides various utility functions.
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition: alphabet/concept.hpp:524
@ comment
Comment field of arbitrary content, usually a string.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
constexpr auto is_char
Checks whether a given letter is the same as the template non-type argument.
Definition: predicate.hpp:63
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
seqan::std::views::zip zip
A view adaptor that takes several views and returns tuple-like values from every i-th element of each...
Definition: zip.hpp:27
Provides various utility functions.
Auxiliary functions for the SAM IO.
Provides seqan3::debug_stream and related types.
Provides seqan3::views::repeat_n.
Provides seqan3::views::slice.
Provides traits to inspect some information of a type, for example its name.
Provides seqan3::views::zip.