30namespace seqan3::detail
47 format_sam_base() =
default;
48 format_sam_base(format_sam_base
const &) =
default;
49 format_sam_base & operator=(format_sam_base
const &) =
default;
50 format_sam_base(format_sam_base &&) =
default;
51 format_sam_base & operator=(format_sam_base &&) =
default;
52 ~format_sam_base() =
default;
57 static constexpr std::array format_version{
'1',
'.',
'6'};
63 bool header_was_written{
false};
66 bool ref_info_present_in_header{
false};
68 template <
typename ref_
id_type,
typename ref_
id_tmp_type,
typename header_type,
typename ref_seqs_type>
69 void check_and_assign_ref_id(ref_id_type &
ref_id,
70 ref_id_tmp_type & ref_id_tmp,
76 template <
typename stream_view_type, std::ranges::forward_range target_range_type>
77 void read_forward_range_field(stream_view_type && stream_view, target_range_type & target);
79 template <std::ranges::forward_range target_range_type>
80 void read_forward_range_field(
std::string_view const str, target_range_type & target);
82 template <arithmetic arithmetic_target_type>
83 void read_arithmetic_field(
std::string_view const & str, arithmetic_target_type & arithmetic_target);
85 template <
typename stream_view_type,
typename ref_
ids_type,
typename ref_seqs_type>
86 void read_header(stream_view_type && stream_view,
87 sam_file_header<ref_ids_type> & hdr,
90 template <
typename stream_t,
typename header_type>
91 void write_header(stream_t & stream, sam_file_output_options
const & options, header_type & header);
104template <
typename ref_
id_type,
typename ref_
id_tmp_type,
typename header_type,
typename ref_seqs_type>
105inline void format_sam_base::check_and_assign_ref_id(ref_id_type &
ref_id,
106 ref_id_tmp_type & ref_id_tmp,
107 header_type & header,
110 if (!std::ranges::empty(ref_id_tmp))
112 auto search = header.ref_dict.find(ref_id_tmp);
114 if (search == header.ref_dict.end())
116 if constexpr (detail::decays_to_ignore_v<ref_seqs_type>)
118 if (ref_info_present_in_header)
120 throw format_error{
"Unknown reference id found in record which is not present in the header."};
124 header.ref_ids().push_back(ref_id_tmp);
125 auto pos = std::ranges::size(header.ref_ids()) - 1;
126 header.ref_dict[header.ref_ids()[pos]] = pos;
132 throw format_error{
"Unknown reference id found in record which is not present in the given ids."};
145inline int32_t format_sam_base::soft_clipping_at_front(
std::vector<cigar> const & cigar_vector)
const
150 auto soft_clipping_at = [&](
size_t const index)
152 return cigar_vector[index] ==
'S'_cigar_operation;
155 auto hard_clipping_at = [&](
size_t const index)
157 return cigar_vector[index] ==
'H'_cigar_operation;
160 auto vector_size_at_least = [&](
size_t const min_size)
162 return cigar_vector.
size() >= min_size;
165 auto cigar_count_at = [&](
size_t const index)
167 return get<0>(cigar_vector[index]);
171 if (vector_size_at_least(1) && soft_clipping_at(0))
172 sc_front = cigar_count_at(0);
173 else if (vector_size_at_least(2) && hard_clipping_at(0) && soft_clipping_at(1))
174 sc_front = cigar_count_at(1);
186template <
typename stream_view_type, std::ranges::forward_range target_range_type>
187inline void format_sam_base::read_forward_range_field(stream_view_type && stream_view, target_range_type & target)
189 using target_range_value_t = std::ranges::range_value_t<target_range_type>;
190 using begin_iterator_t = std::ranges::iterator_t<stream_view_type>;
191 using end_iterator_t = std::ranges::sentinel_t<stream_view_type>;
198 if (
char c = *it; !(++it == std::ranges::end(stream_view) && c ==
'*'))
201 std::ranges::copy(std::ranges::subrange<begin_iterator_t, end_iterator_t>{it, std::ranges::end(stream_view)}
202 | views::char_to<target_range_value_t>,
214template <std::ranges::forward_range target_range_type>
215inline void format_sam_base::read_forward_range_field(
std::string_view const str, target_range_type & target)
217 if (str.
size() == 1 && str[0] ==
'*')
220 if constexpr (std::assignable_from<target_range_type, std::string_view>)
226 target.resize(str.
size());
227 for (
size_t i = 0; i < str.
size(); ++i)
228 target[i] =
assign_char_to(str[i], std::ranges::range_value_t<target_range_type>{});
241template <arithmetic arithmetic_target_type>
242inline void format_sam_base::read_arithmetic_field(
std::string_view const & str,
243 arithmetic_target_type & arithmetic_target)
247 if (res.ec == std::errc::invalid_argument || res.ptr != str.
end())
249 +
"' could not be cast into type " + detail::type_name_as_string<arithmetic_target_type>};
251 if (res.ec == std::errc::result_out_of_range)
253 +
"' into type " + detail::type_name_as_string<arithmetic_target_type>
254 +
" would cause an overflow."};
278template <
typename stream_view_type,
typename ref_
ids_type,
typename ref_seqs_type>
279inline void format_sam_base::read_header(stream_view_type && stream_view,
280 sam_file_header<ref_ids_type> & hdr,
284 auto end = std::ranges::end(stream_view);
287 auto make_tag = [](uint8_t char1, uint8_t char2)
constexpr
289 return static_cast<uint16_t
>(char1) | (
static_cast<uint16_t
>(char2) << CHAR_BIT);
294 auto parse_and_make_tag = [&]()
300 return make_tag(raw_tag[0], raw_tag[1]);
303 auto take_until_predicate = [&it, &string_buffer](
auto const & predicate)
305 string_buffer.clear();
306 while (!predicate(*it))
308 string_buffer.push_back(*it);
313 auto skip_until_predicate = [&it](
auto const & predicate)
315 while (!predicate(*it))
319 auto copy_next_tag_value_into_buffer = [&]()
321 skip_until_predicate(
is_char<
':'>);
340 read_forward_range_field(string_buffer, value);
343 while (it != end &&
is_char<
'@'>(*it))
347 switch (parse_and_make_tag())
349 case make_tag(
'H',
'D'):
357 switch (parse_and_make_tag())
359 case make_tag(
'V',
'N'):
364 case make_tag(
'S',
'O'):
369 case make_tag(
'S',
'S'):
374 case make_tag(
'G',
'O'):
381 parse_and_append_unhandled_tag_to_string(hdr.user_tags, raw_tag);
385 if (header_entry !=
nullptr)
387 copy_next_tag_value_into_buffer();
388 read_forward_range_field(string_buffer, *header_entry);
393 if (hdr.format_version.empty())
394 throw format_error{
std::string{
"The required VN tag in @HD is missing."}};
399 case make_tag(
'S',
'Q'):
401 ref_info_present_in_header =
true;
402 std::ranges::range_value_t<
decltype(hdr.ref_ids())>
id;
411 switch (parse_and_make_tag())
413 case make_tag(
'S',
'N'):
415 copy_next_tag_value_into_buffer();
416 read_forward_range_field(string_buffer,
id);
419 case make_tag(
'L',
'N'):
421 int32_t sequence_length_tmp{};
422 copy_next_tag_value_into_buffer();
423 read_arithmetic_field(string_buffer, sequence_length_tmp);
424 sequence_length = sequence_length_tmp;
429 parse_and_append_unhandled_tag_to_string(get<1>(info), raw_tag);
436 throw format_error{
std::string{
"The required SN tag in @SQ is missing."}};
437 if (!sequence_length.has_value())
438 throw format_error{
std::string{
"The required LN tag in @SQ is missing."}};
439 if (sequence_length.value() <= 0)
440 throw format_error{
std::string{
"The value of LN in @SQ must be positive."}};
442 get<0>(info) = sequence_length.value();
445 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
447 auto id_it = hdr.ref_dict.
find(
id);
449 if (id_it == hdr.ref_dict.end())
450 throw format_error{detail::to_string(
"Unknown reference name '",
452 "' found in SAM header ",
453 "(header.ref_ids(): ",
457 auto & given_ref_info = hdr.ref_id_info[id_it->second];
459 if (std::get<0>(given_ref_info) != std::get<0>(info))
460 throw format_error{
"Provided and header-based reference length differ."};
462 hdr.ref_id_info[id_it->second] = std::move(info);
466 static_assert(!detail::is_type_specialisation_of_v<
decltype(hdr.ref_ids()),
std::deque>,
467 "The range over reference ids must be of type std::deque such that pointers are not "
470 hdr.ref_ids().push_back(
id);
471 hdr.ref_id_info.push_back(info);
472 hdr.ref_dict[(hdr.ref_ids())[(hdr.ref_ids()).
size() - 1]] = (hdr.ref_ids()).
size() - 1;
477 case make_tag(
'R',
'G'):
486 switch (parse_and_make_tag())
488 case make_tag(
'I',
'D'):
490 copy_next_tag_value_into_buffer();
491 read_forward_range_field(string_buffer, get<0>(tmp));
496 parse_and_append_unhandled_tag_to_string(get<1>(tmp), raw_tag);
502 if (get<0>(tmp).
empty())
503 throw format_error{
std::string{
"The required ID tag in @RG is missing."}};
505 hdr.read_groups.emplace_back(std::move(tmp));
509 case make_tag(
'P',
'G'):
519 switch (parse_and_make_tag())
521 case make_tag(
'I',
'D'):
526 case make_tag(
'P',
'N'):
531 case make_tag(
'P',
'P'):
536 case make_tag(
'C',
'L'):
541 case make_tag(
'D',
'S'):
546 case make_tag(
'V',
'N'):
553 parse_and_append_unhandled_tag_to_string(tmp.user_tags, raw_tag);
557 if (program_info_entry !=
nullptr)
559 copy_next_tag_value_into_buffer();
560 read_forward_range_field(string_buffer, *program_info_entry);
566 throw format_error{
std::string{
"The required ID tag in @PG is missing."}};
568 hdr.program_infos.emplace_back(std::move(tmp));
572 case make_tag(
'C',
'O'):
576 take_until_predicate(
is_char<
'\n'>);
577 read_forward_range_field(string_buffer, tmp);
579 hdr.comments.emplace_back(std::move(tmp));
584 throw format_error{
std::string{
"Illegal SAM header tag starting with:"} + *it};
605template <
typename stream_t,
typename header_type>
607format_sam_base::write_header(stream_t & stream, sam_file_output_options
const & options, header_type & header)
609 if constexpr (!detail::decays_to_ignore_v<header_type>)
617 if (!header.sorting.empty()
618 && !(header.sorting ==
"unknown" || header.sorting ==
"unsorted" || header.sorting ==
"queryname"
619 || header.sorting ==
"coordinate"))
620 throw format_error{
"SAM format error: The header.sorting member must be "
621 "one of [unknown, unsorted, queryname, coordinate]."};
623 if (!header.grouping.empty()
624 && !(header.grouping ==
"none" || header.grouping ==
"query" || header.grouping ==
"reference"))
625 throw format_error{
"SAM format error: The header.grouping member must be "
626 "one of [none, query, reference]."};
645 stream <<
"@HD\tVN:";
648 if (!header.sorting.empty())
649 stream <<
"\tSO:" << header.sorting;
651 if (!header.subsorting.empty())
652 stream <<
"\tSS:" << header.subsorting;
654 if (!header.grouping.empty())
655 stream <<
"\tGO:" << header.grouping;
657 if (!header.user_tags.empty())
658 stream <<
'\t' << header.user_tags;
660 detail::write_eol(stream_it, options.add_carriage_return);
663 for (
auto const & [ref_name, ref_info] : views::
zip(header.ref_ids(), header.ref_id_info))
665 stream <<
"@SQ\tSN:";
669 stream <<
"\tLN:" << get<0>(ref_info);
671 if (!get<1>(ref_info).
empty())
672 stream <<
"\t" << get<1>(ref_info);
674 detail::write_eol(stream_it, options.add_carriage_return);
678 for (
auto const & read_group : header.read_groups)
681 <<
"\tID:" << get<0>(read_group);
683 if (!get<1>(read_group).
empty())
684 stream <<
"\t" << get<1>(read_group);
686 detail::write_eol(stream_it, options.add_carriage_return);
690 for (
auto const & program : header.program_infos)
693 <<
"\tID:" << program.id;
695 if (!program.name.empty())
696 stream <<
"\tPN:" << program.name;
698 if (!program.command_line_call.empty())
699 stream <<
"\tCL:" << program.command_line_call;
701 if (!program.previous.empty())
702 stream <<
"\tPP:" << program.previous;
704 if (!program.description.empty())
705 stream <<
"\tDS:" << program.description;
707 if (!program.version.empty())
708 stream <<
"\tVN:" << program.version;
710 if (!program.user_tags.empty())
711 stream <<
'\t' << program.user_tags;
713 detail::write_eol(stream_it, options.add_carriage_return);
717 for (
auto const &
comment : header.comments)
720 detail::write_eol(stream_it, options.add_carriage_return);
T back_inserter(T... args)
Provides seqan3::views::char_to.
Provides various utility functions.
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition alphabet/concept.hpp:517
@ comment
Comment field of arbitrary content, usually a string.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
constexpr auto is_char
Checks whether a given letter is the same as the template non-type argument.
Definition predicate.hpp:60
constexpr size_t size
The size of a type pack.
Definition type_pack/traits.hpp:143
seqan::stl::views::zip zip
A view adaptor that takes several views and returns tuple-like values from every i-th element of each...
Definition zip.hpp:24
Provides various utility functions.
Auxiliary functions for the SAM IO.
Provides seqan3::debug_stream and related types.
Provides seqan3::views::repeat_n.
Provides seqan3::views::slice.
Provides traits to inspect some information of a type, for example its name.
Provides seqan3::views::zip.