146 template <
typename stream_type,
147 typename seq_legal_alph_type,
bool seq_qual_combined,
155 qual_type & qualities);
157 template <
typename stream_type,
163 seq_type && sequence,
165 qual_type && qualities);
167 template <
typename stream_type,
168 typename seq_legal_alph_type,
169 typename ref_seqs_type,
170 typename ref_ids_type,
173 typename offset_type,
174 typename ref_seq_type,
175 typename ref_id_type,
176 typename ref_offset_type,
183 typename tag_dict_type,
184 typename e_value_type,
185 typename bit_score_type>
188 ref_seqs_type & ref_seqs,
194 ref_seq_type & SEQAN3_DOXYGEN_ONLY(
ref_seq),
198 cigar_type & cigar_vector,
202 tag_dict_type & tag_dict,
203 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
204 bit_score_type & SEQAN3_DOXYGEN_ONLY(
bit_score));
206 template <
typename stream_type,
207 typename header_type,
210 typename ref_seq_type,
211 typename ref_id_type,
215 typename tag_dict_type,
216 typename e_value_type,
217 typename bit_score_type>
220 header_type && header,
225 ref_seq_type && SEQAN3_DOXYGEN_ONLY(
ref_seq),
233 tag_dict_type && tag_dict,
234 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
235 bit_score_type && SEQAN3_DOXYGEN_ONLY(
bit_score));
245 alignment_file_header<> default_header{};
248 bool ref_info_present_in_header{
false};
257 template <
typename t>
258 decltype(
auto) default_or(t && v) const noexcept
260 return std::forward<t>(v);
263 using format_sam_base::read_field;
265 template <
typename stream_view_type,
typename value_type>
267 stream_view_type && stream_view,
270 template <
typename stream_view_type>
271 void read_field(stream_view_type && stream_view, sam_tag_dictionary & target);
273 template <
typename stream_it_t, std::ranges::forward_range field_type>
274 void write_range(stream_it_t & stream_it, field_type && field_value);
276 template <
typename stream_it_t>
277 void write_range(stream_it_t & stream_it,
char const *
const field_value);
279 template <
typename stream_t, arithmetic field_type>
280 void write_field(stream_t & stream, field_type field_value);
282 template <
typename stream_t>
283 void write_tag_fields(stream_t & stream, sam_tag_dictionary
const & tag_dict,
char const separator);
287 template <
typename stream_type,
288 typename seq_legal_alph_type,
bool seq_qual_combined,
296 qual_type & qualities)
300 if constexpr (seq_qual_combined)
304 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
305 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
307 for (
auto sit = tmp_qual.
begin(), dit = std::ranges::begin(sequence); sit != tmp_qual.
end(); ++sit, ++dit)
308 get<1>(*dit).assign_char(*sit);
313 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
314 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
317 if constexpr (!detail::decays_to_ignore_v<seq_type>)
318 if (std::ranges::distance(sequence) == 0)
319 throw parse_error{
"The sequence information must not be empty."};
320 if constexpr (!detail::decays_to_ignore_v<id_type>)
321 if (std::ranges::distance(
id) == 0)
322 throw parse_error{
"The id information must not be empty."};
329 template <
typename stream_type,
335 seq_type && sequence,
337 qual_type && qualities)
347 default_or(sequence),
348 default_or(qualities),
365 template <
typename stream_type,
366 typename seq_legal_alph_type,
367 typename ref_seqs_type,
368 typename ref_ids_type,
371 typename offset_type,
372 typename ref_seq_type,
373 typename ref_id_type,
374 typename ref_offset_type,
381 typename tag_dict_type,
382 typename e_value_type,
383 typename bit_score_type>
386 ref_seqs_type & ref_seqs,
391 offset_type & offset,
392 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
393 ref_id_type & ref_id,
394 ref_offset_type & ref_offset,
396 cigar_type & cigar_vector,
400 tag_dict_type & tag_dict,
401 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
402 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
404 static_assert(detail::decays_to_ignore_v<ref_offset_type> ||
405 detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
406 "The ref_offset must be a specialisation of std::optional.");
412 int32_t ref_offset_tmp{};
414 [[maybe_unused]] int32_t offset_tmp{};
415 [[maybe_unused]] int32_t soft_clipping_end{};
417 [[maybe_unused]] int32_t ref_length{0}, seq_length{0};
421 if (is_char<'@'>(*std::ranges::begin(stream_view)))
423 read_header(stream_view, header, ref_seqs);
425 if (std::ranges::begin(stream_view) == std::ranges::end(stream_view))
431 read_field(field_view,
id);
433 uint16_t flag_integral{};
434 read_field(field_view, flag_integral);
437 read_field(field_view, ref_id_tmp);
438 check_and_assign_ref_id(
ref_id, ref_id_tmp, header, ref_seqs);
440 read_field(field_view, ref_offset_tmp);
443 if (ref_offset_tmp == -1)
445 else if (ref_offset_tmp > -1)
447 else if (ref_offset_tmp < -1)
448 throw format_error{
"No negative values are allowed for field::ref_offset."};
450 read_field(field_view,
mapq);
454 if constexpr (!detail::decays_to_ignore_v<align_type> || !detail::decays_to_ignore_v<cigar_type>)
456 if (!is_char<'*'>(*std::ranges::begin(stream_view)))
458 std::tie(tmp_cigar_vector, ref_length, seq_length) = parse_cigar(field_view);
459 transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
464 std::ranges::next(std::ranges::begin(field_view));
469 detail::consume(field_view);
476 if constexpr (!detail::decays_to_ignore_v<mate_type>)
479 read_field(field_view, tmp_mate_ref_id);
481 if (tmp_mate_ref_id ==
"=")
483 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
486 check_and_assign_ref_id(get<0>(
mate), ref_id_tmp, header, ref_seqs);
490 check_and_assign_ref_id(get<0>(
mate), tmp_mate_ref_id, header, ref_seqs);
494 read_field(field_view, tmp_pnext);
497 get<1>(
mate) = --tmp_pnext;
498 else if (tmp_pnext < 0)
499 throw format_error{
"No negative values are allowed at the mate mapping position."};
502 read_field(field_view, get<2>(
mate));
506 for (
size_t i = 0; i < 3u; ++i)
508 detail::consume(field_view);
514 if (!is_char<'*'>(*std::ranges::begin(stream_view)))
516 auto constexpr is_legal_alph = is_in_alphabet<seq_legal_alph_type>;
519 if (!is_legal_alph(c))
522 " evaluated to false on " +
523 detail::make_printable(c)};
527 if constexpr (detail::decays_to_ignore_v<seq_type>)
529 if constexpr (!detail::decays_to_ignore_v<align_type>)
532 "If you want to read ALIGNMENT but not SEQ, the alignment"
533 " object must store a sequence container at the second (query) position.");
535 if (!tmp_cigar_vector.empty())
538 auto tmp_iter = std::ranges::begin(seq_stream);
539 std::ranges::advance(tmp_iter, offset_tmp);
541 for (; seq_length > 0; --seq_length)
543 get<1>(align).push_back(
value_type_t<decltype(get<1>(align))>{}.assign_char(*tmp_iter));
547 std::ranges::advance(tmp_iter, soft_clipping_end);
556 detail::consume(seq_stream);
561 read_field(seq_stream,
seq);
563 if constexpr (!detail::decays_to_ignore_v<align_type>)
565 if (!tmp_cigar_vector.empty())
576 std::ranges::next(std::ranges::begin(field_view));
581 auto const tab_or_end = is_char<'\t'> || is_char<'\r'> || is_char<'\n'>;
584 if constexpr (!detail::decays_to_ignore_v<seq_type> && !detail::decays_to_ignore_v<qual_type>)
586 if (std::ranges::distance(
seq) != 0 && std::ranges::distance(
qual) != 0 &&
587 std::ranges::distance(
seq) != std::ranges::distance(
qual))
589 throw format_error{detail::to_string(
"Sequence length (", std::ranges::distance(
seq),
590 ") and quality length (", std::ranges::distance(
qual),
591 ") must be the same.")};
597 while (is_char<'\t'>(*std::ranges::begin(stream_view)))
599 std::ranges::next(std::ranges::begin(stream_view));
603 detail::consume(stream_view |
views::take_until(!(is_char<'\r'> || is_char<'\n'>)));
609 if constexpr (!detail::decays_to_ignore_v<align_type>)
611 int32_t ref_idx{(ref_id_tmp.empty()) ? -1 : 0};
613 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
615 if (!ref_id_tmp.empty())
617 assert(header.
ref_dict.count(ref_id_tmp) != 0);
618 ref_idx = header.
ref_dict[ref_id_tmp];
622 construct_alignment(align, tmp_cigar_vector, ref_idx, ref_seqs, ref_offset_tmp, ref_length);
625 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
626 std::swap(cigar_vector, tmp_cigar_vector);
630 template <
typename stream_type,
631 typename header_type,
634 typename ref_seq_type,
635 typename ref_id_type,
639 typename tag_dict_type,
640 typename e_value_type,
641 typename bit_score_type>
644 header_type && header,
648 int32_t
const offset,
649 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
650 ref_id_type && ref_id,
657 tag_dict_type && tag_dict,
658 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
659 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score))
677 static_assert((std::ranges::forward_range<seq_type> &&
679 "The seq object must be a std::ranges::forward_range over "
680 "letters that model seqan3::alphabet.");
682 static_assert((std::ranges::forward_range<id_type> &&
684 "The id object must be a std::ranges::forward_range over "
685 "letters that model seqan3::alphabet.");
687 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
689 static_assert((std::ranges::forward_range<ref_id_type> ||
692 "The ref_id object must be a std::ranges::forward_range "
693 "over letters that model seqan3::alphabet.");
697 static_assert(!detail::decays_to_ignore_v<header_type>,
698 "If you give indices as reference id information the header must also be present.");
702 "The align object must be a std::pair of two ranges whose "
703 "value_type is comparable to seqan3::gap");
708 "The align object must be a std::pair of two ranges whose "
709 "value_type is comparable to seqan3::gap");
711 static_assert((std::ranges::forward_range<qual_type> &&
713 "The qual object must be a std::ranges::forward_range "
714 "over letters that model seqan3::alphabet.");
717 "The mate object must be a std::tuple of size 3 with "
718 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
719 "2) a std::integral or std::optional<std::integral>, and "
720 "3) a std::integral.");
722 static_assert(((std::ranges::forward_range<decltype(std::get<0>(
mate))> ||
728 "The mate object must be a std::tuple of size 3 with "
729 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
730 "2) a std::integral or std::optional<std::integral>, and "
731 "3) a std::integral.");
735 static_assert(!detail::decays_to_ignore_v<header_type>,
736 "If you give indices as mate reference id information the header must also be present.");
739 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
744 if constexpr (!detail::decays_to_ignore_v<header_type> &&
745 !detail::decays_to_ignore_v<ref_id_type> &&
754 if constexpr (std::ranges::contiguous_range<decltype(
ref_id)> &&
755 std::ranges::sized_range<decltype(
ref_id)> &&
765 "The ref_id type is not convertible to the reference id information stored in the "
766 "reference dictionary of the header object.");
772 throw format_error{detail::to_string(
"The ref_id '",
ref_id,
"' was not in the list of references:",
778 throw format_error{
"The ref_offset object must be an std::integral >= 0."};
783 if constexpr (!detail::decays_to_ignore_v<header_type>)
787 write_header(stream, options, header);
788 header_was_written =
true;
796 char const separator{
'\t'};
798 write_range(stream_it, std::forward<id_type>(
id));
802 stream << static_cast<uint16_t>(
flag) << separator;
804 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
819 write_range(stream_it, std::forward<ref_id_type>(
ref_id));
830 stream << (
ref_offset.value_or(-1) + 1) << separator;
832 stream << static_cast<unsigned>(
mapq) << separator;
834 if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align)))
841 for (
auto chr : get<1>(align))
846 write_range(stream_it, detail::get_cigar_string(std::forward<align_type>(align),
offset, off_end));
848 else if (!cigar_vector.
empty())
850 for (
auto & c : cigar_vector)
851 stream << c.to_string();
862 write_range(stream_it, (header.
ref_ids())[get<0>(
mate)]);
866 if (get<0>(
mate).has_value())
869 write_range(stream_it, header.
ref_ids()[get<0>(
mate).value_or(0)]);
875 write_range(stream_it, get<0>(
mate));
883 stream << (get<1>(
mate).value_or(-1) + 1) << separator;
887 stream << get<1>(
mate) << separator;
890 stream << get<2>(
mate) << separator;
892 write_range(stream_it, std::forward<seq_type>(
seq));
896 write_range(stream_it, std::forward<qual_type>(
qual));
898 write_tag_fields(stream, std::forward<tag_dict_type>(tag_dict), separator);
921 template <
typename stream_view_type,
typename value_type>
923 stream_view_type && stream_view,
927 while (std::ranges::begin(stream_view) != ranges::end(stream_view))
932 if (is_char<','>(*std::ranges::begin(stream_view)))
933 std::ranges::next(std::ranges::begin(stream_view));
955 template <
typename stream_view_type>
956 inline void format_sam::read_field(stream_view_type && stream_view, sam_tag_dictionary & target)
963 uint16_t tag = static_cast<uint16_t>(*std::ranges::begin(stream_view)) << 8;
964 std::ranges::next(std::ranges::begin(stream_view));
965 tag += static_cast<uint16_t>(*std::ranges::begin(stream_view));
966 std::ranges::next(std::ranges::begin(stream_view));
967 std::ranges::next(std::ranges::begin(stream_view));
969 std::ranges::next(std::ranges::begin(stream_view));
970 std::ranges::next(std::ranges::begin(stream_view));
976 target[tag] = static_cast<char>(*std::ranges::begin(stream_view));
977 std::ranges::next(std::ranges::begin(stream_view));
983 read_field(stream_view, tmp);
990 read_field(stream_view, tmp);
996 target[tag] = stream_view | views::to<std::string>;
1007 std::ranges::next(std::ranges::begin(stream_view));
1008 std::ranges::next(std::ranges::begin(stream_view));
1010 switch (array_value_type_id)
1013 read_sam_dict_vector(target[tag], stream_view, int8_t{});
1016 read_sam_dict_vector(target[tag], stream_view, uint8_t{});
1019 read_sam_dict_vector(target[tag], stream_view, int16_t{});
1022 read_sam_dict_vector(target[tag], stream_view, uint16_t{});
1025 read_sam_dict_vector(target[tag], stream_view, int32_t{});
1028 read_sam_dict_vector(target[tag], stream_view, uint32_t{});
1031 read_sam_dict_vector(target[tag], stream_view,
float{});
1034 throw format_error{
std::string(
"The first character in the numerical ") +
1035 "id of a SAM tag must be one of [cCsSiIf] but '" + array_value_type_id +
1041 throw format_error{
std::string(
"The second character in the numerical id of a "
1042 "SAM tag must be one of [A,i,Z,H,B,f] but '") + type_id +
"' was given."};
1053 template <
typename stream_it_t, std::ranges::forward_range field_type>
1054 inline void format_sam::write_range(stream_it_t & stream_it, field_type && field_value)
1056 if (std::ranges::empty(field_value))
1068 template <
typename stream_it_t>
1069 inline void format_sam::write_range(stream_it_t & stream_it,
char const *
const field_value)
1079 template <
typename stream_t, arithmetic field_type>
1080 inline void format_sam::write_field(stream_t & stream, field_type field_value)
1084 stream << static_cast<int16_t>(field_value);
1086 stream << field_value;
1096 template <
typename stream_t>
1097 inline void format_sam::write_tag_fields(stream_t & stream, sam_tag_dictionary
const & tag_dict,
char const separator)
1099 auto stream_variant_fn = [
this, &stream] (
auto && arg)
1109 if (arg.begin() != arg.end())
1111 for (
auto it = arg.begin(); it != (arg.end() - 1); ++it)
1113 write_field(stream, *it);
1117 write_field(stream, *(arg.end() - 1));
1122 for (
auto & [tag, variant] : tag_dict)
1124 stream << separator;
1126 char char0 = tag / 256;
1127 char char1 = tag % 256;
1129 stream << char0 << char1 <<
':' << detail::sam_tag_type_char[variant.
index()] <<
':';
1131 if (detail::sam_tag_type_char_extra[variant.
index()] !=
'\0')
1132 stream << detail::sam_tag_type_char_extra[variant.
index()] <<
',';