147 template <
typename stream_type,
148 typename seq_legal_alph_type,
bool seq_qual_combined,
156 qual_type & qualities);
158 template <
typename stream_type,
166 qual_type && qualities);
168 template <
typename stream_type,
169 typename seq_legal_alph_type,
170 typename ref_seqs_type,
171 typename ref_ids_type,
174 typename offset_type,
175 typename ref_seq_type,
176 typename ref_id_type,
177 typename ref_offset_type,
184 typename tag_dict_type,
185 typename e_value_type,
186 typename bit_score_type>
189 ref_seqs_type & ref_seqs,
194 offset_type & offset,
195 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
196 ref_id_type & ref_id,
197 ref_offset_type & ref_offset,
199 cigar_type & cigar_vector,
203 tag_dict_type & tag_dict,
204 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
205 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score));
207 template <
typename stream_type,
208 typename header_type,
211 typename ref_seq_type,
212 typename ref_id_type,
216 typename tag_dict_type,
217 typename e_value_type,
218 typename bit_score_type>
221 header_type && header,
225 int32_t
const offset,
226 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
227 ref_id_type && ref_id,
234 tag_dict_type && tag_dict,
235 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
236 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score));
246 alignment_file_header<> default_header{};
249 bool ref_info_present_in_header{
false};
258 template <
typename t>
259 decltype(
auto) default_or(t && v) const noexcept
261 return std::forward<t>(v);
264 using format_sam_base::read_field;
266 template <
typename stream_view_type,
typename value_type>
268 stream_view_type && stream_view,
271 template <
typename stream_view_type>
272 void read_field(stream_view_type && stream_view, sam_tag_dictionary & target);
274 template <
typename stream_it_t, std::ranges::forward_range field_type>
275 void write_range(stream_it_t & stream_it, field_type && field_value);
277 template <
typename stream_it_t>
278 void write_range(stream_it_t & stream_it,
char const *
const field_value);
280 template <
typename stream_t, arithmetic field_type>
281 void write_field(stream_t & stream, field_type field_value);
283 template <
typename stream_t>
284 void write_tag_fields(stream_t & stream, sam_tag_dictionary
const & tag_dict,
char const separator);
288 template <
typename stream_type,
289 typename seq_legal_alph_type,
bool seq_qual_combined,
297 qual_type & qualities)
301 if constexpr (seq_qual_combined)
305 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
306 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
308 for (
auto sit = tmp_qual.
begin(), dit = std::ranges::begin(
sequence); sit != tmp_qual.
end(); ++sit, ++dit)
309 get<1>(*dit).assign_char(*sit);
314 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
315 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
318 if constexpr (!detail::decays_to_ignore_v<seq_type>)
319 if (std::ranges::distance(
sequence) == 0)
320 throw parse_error{
"The sequence information must not be empty."};
321 if constexpr (!detail::decays_to_ignore_v<id_type>)
322 if (std::ranges::distance(
id) == 0)
323 throw parse_error{
"The id information must not be empty."};
330 template <
typename stream_type,
338 qual_type && qualities)
349 default_or(qualities),
366 template <
typename stream_type,
367 typename seq_legal_alph_type,
368 typename ref_seqs_type,
369 typename ref_ids_type,
372 typename offset_type,
373 typename ref_seq_type,
374 typename ref_id_type,
375 typename ref_offset_type,
382 typename tag_dict_type,
383 typename e_value_type,
384 typename bit_score_type>
387 ref_seqs_type & ref_seqs,
392 offset_type & offset,
393 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
394 ref_id_type & ref_id,
395 ref_offset_type & ref_offset,
397 cigar_type & cigar_vector,
401 tag_dict_type & tag_dict,
402 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
403 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
405 static_assert(detail::decays_to_ignore_v<ref_offset_type> ||
406 detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
407 "The ref_offset must be a specialisation of std::optional.");
413 int32_t ref_offset_tmp{};
414 std::ranges::range_value_t<decltype(header.
ref_ids())> ref_id_tmp{};
415 [[maybe_unused]] int32_t offset_tmp{};
416 [[maybe_unused]] int32_t soft_clipping_end{};
418 [[maybe_unused]] int32_t ref_length{0}, seq_length{0};
422 if (is_char<'@'>(*std::ranges::begin(stream_view)))
424 read_header(stream_view, header, ref_seqs);
426 if (std::ranges::begin(stream_view) == std::ranges::end(stream_view))
432 read_field(field_view,
id);
434 uint16_t flag_integral{};
435 read_field(field_view, flag_integral);
438 read_field(field_view, ref_id_tmp);
439 check_and_assign_ref_id(ref_id, ref_id_tmp, header, ref_seqs);
441 read_field(field_view, ref_offset_tmp);
444 if (ref_offset_tmp == -1)
445 ref_offset = std::nullopt;
446 else if (ref_offset_tmp > -1)
447 ref_offset = ref_offset_tmp;
448 else if (ref_offset_tmp < -1)
449 throw format_error{
"No negative values are allowed for field::ref_offset."};
451 read_field(field_view, mapq);
455 if constexpr (!detail::decays_to_ignore_v<align_type> || !detail::decays_to_ignore_v<cigar_type>)
457 if (!is_char<'*'>(*std::ranges::begin(stream_view)))
459 std::tie(tmp_cigar_vector, ref_length, seq_length) = parse_cigar(field_view);
460 transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
465 std::ranges::next(std::ranges::begin(field_view));
470 detail::consume(field_view);
477 if constexpr (!detail::decays_to_ignore_v<mate_type>)
479 std::ranges::range_value_t<decltype(header.
ref_ids())> tmp_mate_ref_id{};
480 read_field(field_view, tmp_mate_ref_id);
482 if (tmp_mate_ref_id ==
"=")
484 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
485 get<0>(mate) = ref_id;
487 check_and_assign_ref_id(get<0>(mate), ref_id_tmp, header, ref_seqs);
491 check_and_assign_ref_id(get<0>(mate), tmp_mate_ref_id, header, ref_seqs);
495 read_field(field_view, tmp_pnext);
498 get<1>(mate) = --tmp_pnext;
499 else if (tmp_pnext < 0)
500 throw format_error{
"No negative values are allowed at the mate mapping position."};
503 read_field(field_view, get<2>(mate));
507 for (
size_t i = 0; i < 3u; ++i)
509 detail::consume(field_view);
515 if (!is_char<'*'>(*std::ranges::begin(stream_view)))
517 auto constexpr is_legal_alph = is_in_alphabet<seq_legal_alph_type>;
520 if (!is_legal_alph(c))
523 " evaluated to false on " +
524 detail::make_printable(c)};
528 if constexpr (detail::decays_to_ignore_v<seq_type>)
530 if constexpr (!detail::decays_to_ignore_v<align_type>)
533 "If you want to read ALIGNMENT but not SEQ, the alignment"
534 " object must store a sequence container at the second (query) position.");
536 if (!tmp_cigar_vector.empty())
539 auto tmp_iter = std::ranges::begin(seq_stream);
540 std::ranges::advance(tmp_iter, offset_tmp);
542 for (; seq_length > 0; --seq_length)
544 get<1>(align).push_back(std::ranges::range_value_t<decltype(get<1>(align))>{}.assign_char(*tmp_iter));
548 std::ranges::advance(tmp_iter, soft_clipping_end);
557 detail::consume(seq_stream);
562 read_field(seq_stream,
seq);
564 if constexpr (!detail::decays_to_ignore_v<align_type>)
566 if (!tmp_cigar_vector.empty())
568 assign_unaligned(get<1>(align),
577 std::ranges::next(std::ranges::begin(field_view));
582 auto const tab_or_end = is_char<'\t'> || is_char<'\r'> || is_char<'\n'>;
585 if constexpr (!detail::decays_to_ignore_v<seq_type> && !detail::decays_to_ignore_v<qual_type>)
587 if (std::ranges::distance(
seq) != 0 && std::ranges::distance(qual) != 0 &&
588 std::ranges::distance(
seq) != std::ranges::distance(qual))
590 throw format_error{detail::to_string(
"Sequence length (", std::ranges::distance(
seq),
591 ") and quality length (", std::ranges::distance(qual),
592 ") must be the same.")};
598 while (is_char<'\t'>(*std::ranges::begin(stream_view)))
600 std::ranges::next(std::ranges::begin(stream_view));
604 detail::consume(stream_view |
views::take_until(!(is_char<'\r'> || is_char<'\n'>)));
610 if constexpr (!detail::decays_to_ignore_v<align_type>)
612 int32_t ref_idx{(ref_id_tmp.empty()) ? -1 : 0};
614 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
616 if (!ref_id_tmp.empty())
618 assert(header.
ref_dict.count(ref_id_tmp) != 0);
619 ref_idx = header.
ref_dict[ref_id_tmp];
623 construct_alignment(align, tmp_cigar_vector, ref_idx, ref_seqs, ref_offset_tmp, ref_length);
626 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
627 std::swap(cigar_vector, tmp_cigar_vector);
631 template <
typename stream_type,
632 typename header_type,
635 typename ref_seq_type,
636 typename ref_id_type,
640 typename tag_dict_type,
641 typename e_value_type,
642 typename bit_score_type>
645 header_type && header,
649 int32_t
const offset,
650 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
651 ref_id_type && ref_id,
658 tag_dict_type && tag_dict,
659 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
660 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score))
678 static_assert((std::ranges::forward_range<seq_type> &&
679 alphabet<std::ranges::range_reference_t<seq_type>>),
680 "The seq object must be a std::ranges::forward_range over "
681 "letters that model seqan3::alphabet.");
683 static_assert((std::ranges::forward_range<id_type> &&
684 alphabet<std::ranges::range_reference_t<id_type>>),
685 "The id object must be a std::ranges::forward_range over "
686 "letters that model seqan3::alphabet.");
688 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
690 static_assert((std::ranges::forward_range<ref_id_type> ||
693 "The ref_id object must be a std::ranges::forward_range "
694 "over letters that model seqan3::alphabet.");
698 static_assert(!detail::decays_to_ignore_v<header_type>,
699 "If you give indices as reference id information the header must also be present.");
703 "The align object must be a std::pair of two ranges whose "
704 "value_type is comparable to seqan3::gap");
707 std::equality_comparable_with<
gap, std::ranges::range_reference_t<decltype(std::get<0>(align))>> &&
708 std::equality_comparable_with<
gap, std::ranges::range_reference_t<decltype(std::get<1>(align))>>),
709 "The align object must be a std::pair of two ranges whose "
710 "value_type is comparable to seqan3::gap");
712 static_assert((std::ranges::forward_range<qual_type> &&
713 alphabet<std::ranges::range_reference_t<qual_type>>),
714 "The qual object must be a std::ranges::forward_range "
715 "over letters that model seqan3::alphabet.");
718 "The mate object must be a std::tuple of size 3 with "
719 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
720 "2) a std::integral or std::optional<std::integral>, and "
721 "3) a std::integral.");
723 static_assert(((std::ranges::forward_range<decltype(std::get<0>(mate))> ||
729 "The mate object must be a std::tuple of size 3 with "
730 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
731 "2) a std::integral or std::optional<std::integral>, and "
732 "3) a std::integral.");
736 static_assert(!detail::decays_to_ignore_v<header_type>,
737 "If you give indices as mate reference id information the header must also be present.");
740 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
745 if constexpr (!detail::decays_to_ignore_v<header_type> &&
746 !detail::decays_to_ignore_v<ref_id_type> &&
755 if constexpr (std::ranges::contiguous_range<decltype(ref_id)> &&
756 std::ranges::sized_range<decltype(ref_id)> &&
757 std::ranges::borrowed_range<decltype(ref_id)>)
766 "The ref_id type is not convertible to the reference id information stored in the "
767 "reference dictionary of the header object.");
769 id_it = header.
ref_dict.find(ref_id);
773 throw format_error{detail::to_string(
"The ref_id '", ref_id,
"' was not in the list of references:",
779 throw format_error{
"The ref_offset object must be an std::integral >= 0."};
784 if constexpr (!detail::decays_to_ignore_v<header_type>)
788 write_header(stream, options, header);
789 header_was_written =
true;
796 std::cpp20::ostreambuf_iterator stream_it{stream};
797 char const separator{
'\t'};
799 write_range(stream_it, std::forward<id_type>(
id));
803 stream << static_cast<uint16_t>(flag) << separator;
805 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
809 write_range(stream_it, (header.
ref_ids())[ref_id]);
813 if (ref_id.has_value())
814 write_range(stream_it, (header.
ref_ids())[ref_id.value()]);
820 write_range(stream_it, std::forward<ref_id_type>(ref_id));
831 stream << (ref_offset.
value_or(-1) + 1) << separator;
833 stream << static_cast<unsigned>(mapq) << separator;
835 if (!std::ranges::empty(cigar_vector))
837 for (
auto & c : cigar_vector)
838 stream << c.to_string();
840 else if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align)))
847 for (
auto chr : get<1>(align))
852 write_range(stream_it, detail::get_cigar_string(std::forward<align_type>(align), offset, off_end));
863 write_range(stream_it, (header.
ref_ids())[get<0>(mate)]);
867 if (get<0>(mate).has_value())
870 write_range(stream_it, header.
ref_ids()[get<0>(mate).value_or(0)]);
876 write_range(stream_it, get<0>(mate));
884 stream << (get<1>(mate).value_or(-1) + 1) << separator;
888 stream << get<1>(mate) << separator;
891 stream << get<2>(mate) << separator;
893 write_range(stream_it, std::forward<seq_type>(
seq));
897 write_range(stream_it, std::forward<qual_type>(qual));
899 write_tag_fields(stream, std::forward<tag_dict_type>(tag_dict), separator);
922 template <
typename stream_view_type,
typename value_type>
924 stream_view_type && stream_view,
928 while (std::ranges::begin(stream_view) != ranges::end(stream_view))
933 if (is_char<','>(*std::ranges::begin(stream_view)))
934 std::ranges::next(std::ranges::begin(stream_view));
956 template <
typename stream_view_type>
957 inline void format_sam::read_field(stream_view_type && stream_view, sam_tag_dictionary & target)
965 std::ranges::next(std::ranges::begin(stream_view));
967 std::ranges::next(std::ranges::begin(stream_view));
968 std::ranges::next(std::ranges::begin(stream_view));
970 std::ranges::next(std::ranges::begin(stream_view));
971 std::ranges::next(std::ranges::begin(stream_view));
978 std::ranges::next(std::ranges::begin(stream_view));
984 read_field(stream_view, tmp);
991 read_field(stream_view, tmp);
997 target[tag] = stream_view | views::to<std::string>;
1008 std::ranges::next(std::ranges::begin(stream_view));
1009 std::ranges::next(std::ranges::begin(stream_view));
1011 switch (array_value_type_id)
1014 read_sam_dict_vector(target[tag], stream_view, int8_t{});
1017 read_sam_dict_vector(target[tag], stream_view, uint8_t{});
1020 read_sam_dict_vector(target[tag], stream_view, int16_t{});
1023 read_sam_dict_vector(target[tag], stream_view, uint16_t{});
1026 read_sam_dict_vector(target[tag], stream_view, int32_t{});
1029 read_sam_dict_vector(target[tag], stream_view, uint32_t{});
1032 read_sam_dict_vector(target[tag], stream_view,
float{});
1035 throw format_error{
std::string(
"The first character in the numerical ") +
1036 "id of a SAM tag must be one of [cCsSiIf] but '" + array_value_type_id +
1042 throw format_error{
std::string(
"The second character in the numerical id of a "
1043 "SAM tag must be one of [A,i,Z,H,B,f] but '") + type_id +
"' was given."};
1054 template <
typename stream_it_t, std::ranges::forward_range field_type>
1055 inline void format_sam::write_range(stream_it_t & stream_it, field_type && field_value)
1057 if (std::ranges::empty(field_value))
1069 template <
typename stream_it_t>
1070 inline void format_sam::write_range(stream_it_t & stream_it,
char const *
const field_value)
1080 template <
typename stream_t, arithmetic field_type>
1081 inline void format_sam::write_field(stream_t & stream, field_type field_value)
1084 if constexpr (std::same_as<field_type, int8_t> || std::same_as<field_type, uint8_t>)
1085 stream << static_cast<int16_t>(field_value);
1087 stream << field_value;
1097 template <
typename stream_t>
1098 inline void format_sam::write_tag_fields(stream_t & stream, sam_tag_dictionary
const & tag_dict,
char const separator)
1100 auto stream_variant_fn = [
this, &stream] (
auto && arg)
1104 if constexpr (!
container<T> || std::same_as<T, std::string>)
1110 if (arg.begin() != arg.end())
1112 for (
auto it = arg.begin(); it != (arg.end() - 1); ++it)
1114 write_field(stream, *it);
1118 write_field(stream, *(arg.end() - 1));
1123 for (
auto & [tag, variant] : tag_dict)
1125 stream << separator;
1127 char char0 = tag / 256;
1128 char char1 = tag % 256;
1130 stream << char0 << char1 <<
':' << detail::sam_tag_type_char[variant.
index()] <<
':';
1132 if (detail::sam_tag_type_char_extra[variant.
index()] !=
'\0')
1133 stream << detail::sam_tag_type_char_extra[variant.
index()] <<
',';