137 #ifdef SEQAN3_DEPRECATED_310
138 template <
typename stream_type,
139 typename seq_legal_alph_type,
bool seq_qual_combined,
147 qual_type & qualities);
149 template <
typename stream_type,
150 typename seq_legal_alph_type,
158 qual_type & qualities);
161 template <
typename stream_type,
169 qual_type && qualities);
171 template <
typename stream_type,
172 typename seq_legal_alph_type,
173 typename ref_seqs_type,
174 typename ref_ids_type,
177 typename offset_type,
178 typename ref_seq_type,
179 typename ref_id_type,
180 typename ref_offset_type,
187 typename tag_dict_type,
188 typename e_value_type,
189 typename bit_score_type>
192 ref_seqs_type & ref_seqs,
197 offset_type & offset,
198 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
199 ref_id_type & ref_id,
200 ref_offset_type & ref_offset,
202 cigar_type & cigar_vector,
206 tag_dict_type & tag_dict,
207 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
208 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score));
210 template <
typename stream_type,
211 typename header_type,
214 typename ref_seq_type,
215 typename ref_id_type,
219 typename tag_dict_type,
220 typename e_value_type,
221 typename bit_score_type>
224 header_type && header,
228 int32_t
const offset,
229 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
230 ref_id_type && ref_id,
237 tag_dict_type && tag_dict,
238 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
239 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score));
249 sam_file_header<> default_header{};
252 bool ref_info_present_in_header{
false};
261 template <
typename t>
262 decltype(
auto) default_or(t && v) const noexcept
264 return std::forward<t>(v);
267 using format_sam_base::read_field;
269 template <
typename stream_view_type,
typename value_type>
271 stream_view_type && stream_view,
274 template <
typename stream_view_type>
276 stream_view_type && stream_view);
278 template <
typename stream_view_type>
279 void read_field(stream_view_type && stream_view, sam_tag_dictionary & target);
281 template <
typename stream_it_t, std::ranges::forward_range field_type>
282 void write_range_or_asterisk(stream_it_t & stream_it, field_type && field_value);
284 template <
typename stream_it_t>
285 void write_range_or_asterisk(stream_it_t & stream_it,
char const *
const field_value);
287 template <
typename stream_it_t>
288 void write_tag_fields(stream_it_t & stream, sam_tag_dictionary
const & tag_dict,
char const separator);
292 #ifdef SEQAN3_DEPRECATED_310
293 template <
typename stream_type,
294 typename seq_legal_alph_type,
bool seq_qual_combined,
302 qual_type & qualities)
304 template <
typename stream_type,
305 typename seq_legal_alph_type,
313 qual_type & qualities)
318 #ifdef SEQAN3_DEPRECATED_310
319 if constexpr (seq_qual_combined)
322 read_alignment_record(stream, align_options, std::ignore, default_header,
sequence, tmp_qual,
id,
323 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
324 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
326 for (
auto sit = tmp_qual.begin(), dit = std::ranges::begin(
sequence); sit != tmp_qual.end(); ++sit, ++dit)
327 get<1>(*dit).assign_char(*sit);
332 read_alignment_record(stream, align_options, std::ignore, default_header,
sequence, qualities,
id,
333 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore,
334 std::ignore, std::ignore, std::ignore, std::ignore, std::ignore, std::ignore);
337 if constexpr (!detail::decays_to_ignore_v<seq_type>)
338 if (std::ranges::distance(
sequence) == 0)
339 throw parse_error{
"The sequence information must not be empty."};
340 if constexpr (!detail::decays_to_ignore_v<id_type>)
341 if (std::ranges::distance(
id) == 0)
342 throw parse_error{
"The id information must not be empty."};
345 id =
id | detail::take_until_and_consume(
is_space) | views::to<id_type>;
349 template <
typename stream_type,
357 qual_type && qualities)
368 default_or(qualities),
385 template <
typename stream_type,
386 typename seq_legal_alph_type,
387 typename ref_seqs_type,
388 typename ref_ids_type,
391 typename offset_type,
392 typename ref_seq_type,
393 typename ref_id_type,
394 typename ref_offset_type,
401 typename tag_dict_type,
402 typename e_value_type,
403 typename bit_score_type>
406 ref_seqs_type & ref_seqs,
411 offset_type & offset,
412 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
413 ref_id_type & ref_id,
414 ref_offset_type & ref_offset,
416 cigar_type & cigar_vector,
420 tag_dict_type & tag_dict,
421 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
422 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
424 static_assert(detail::decays_to_ignore_v<ref_offset_type> ||
425 detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
426 "The ref_offset must be a specialisation of std::optional.");
428 auto stream_view = detail::istreambuf(stream);
429 auto field_view = stream_view | detail::take_until_or_throw_and_consume(is_char<'\t'>);
432 int32_t ref_offset_tmp{};
433 std::ranges::range_value_t<decltype(header.
ref_ids())> ref_id_tmp{};
434 [[maybe_unused]] int32_t offset_tmp{};
435 [[maybe_unused]] int32_t soft_clipping_end{};
437 [[maybe_unused]] int32_t ref_length{0}, seq_length{0};
441 if (is_char<'@'>(*std::ranges::begin(stream_view)))
443 read_header(stream_view, header, ref_seqs);
445 if (std::ranges::begin(stream_view) == std::ranges::end(stream_view))
451 read_field(field_view,
id);
453 uint16_t flag_integral{};
454 read_field(field_view, flag_integral);
457 read_field(field_view, ref_id_tmp);
458 check_and_assign_ref_id(
ref_id, ref_id_tmp, header, ref_seqs);
460 read_field(field_view, ref_offset_tmp);
463 if (ref_offset_tmp == -1)
465 else if (ref_offset_tmp > -1)
467 else if (ref_offset_tmp < -1)
468 throw format_error{
"No negative values are allowed for field::ref_offset."};
470 read_field(field_view,
mapq);
474 if constexpr (!detail::decays_to_ignore_v<align_type> || !detail::decays_to_ignore_v<cigar_type>)
476 if (!is_char<'*'>(*std::ranges::begin(stream_view)))
478 std::tie(tmp_cigar_vector, ref_length, seq_length) = detail::parse_cigar(field_view);
479 transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
484 std::ranges::next(std::ranges::begin(field_view));
489 detail::consume(field_view);
496 if constexpr (!detail::decays_to_ignore_v<mate_type>)
498 std::ranges::range_value_t<decltype(header.
ref_ids())> tmp_mate_ref_id{};
499 read_field(field_view, tmp_mate_ref_id);
501 if (tmp_mate_ref_id ==
"=")
503 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
506 check_and_assign_ref_id(get<0>(
mate), ref_id_tmp, header, ref_seqs);
510 check_and_assign_ref_id(get<0>(
mate), tmp_mate_ref_id, header, ref_seqs);
514 read_field(field_view, tmp_pnext);
517 get<1>(
mate) = --tmp_pnext;
518 else if (tmp_pnext < 0)
519 throw format_error{
"No negative values are allowed at the mate mapping position."};
522 read_field(field_view, get<2>(
mate));
526 for (
size_t i = 0; i < 3u; ++i)
528 detail::consume(field_view);
534 if (!is_char<'*'>(*std::ranges::begin(stream_view)))
536 auto constexpr is_legal_alph = char_is_valid_for<seq_legal_alph_type>;
539 if (!is_legal_alph(c))
541 "char_is_valid_for<" +
542 detail::type_name_as_string<seq_legal_alph_type> +
543 "> evaluated to false on " +
544 detail::make_printable(c)};
548 if constexpr (detail::decays_to_ignore_v<seq_type>)
550 if constexpr (!detail::decays_to_ignore_v<align_type>)
553 "If you want to read ALIGNMENT but not SEQ, the alignment"
554 " object must store a sequence container at the second (query) position.");
556 if (!tmp_cigar_vector.empty())
559 auto tmp_iter = std::ranges::begin(seq_stream);
560 std::ranges::advance(tmp_iter, offset_tmp);
562 for (; seq_length > 0; --seq_length)
564 get<1>(align).push_back(std::ranges::range_value_t<decltype(get<1>(align))>{}.assign_char(*tmp_iter));
568 std::ranges::advance(tmp_iter, soft_clipping_end);
577 detail::consume(seq_stream);
582 read_field(seq_stream,
seq);
584 if constexpr (!detail::decays_to_ignore_v<align_type>)
586 if (!tmp_cigar_vector.empty())
588 assign_unaligned(get<1>(align),
597 std::ranges::next(std::ranges::begin(field_view));
602 auto const tab_or_end = is_char<'\t'> || is_char<'\r'> || is_char<'\n'>;
603 read_field(stream_view | detail::take_until_or_throw(tab_or_end),
qual);
605 if constexpr (!detail::decays_to_ignore_v<seq_type> && !detail::decays_to_ignore_v<qual_type>)
607 if (std::ranges::distance(
seq) != 0 && std::ranges::distance(
qual) != 0 &&
608 std::ranges::distance(
seq) != std::ranges::distance(
qual))
610 throw format_error{detail::to_string(
"Sequence length (", std::ranges::distance(
seq),
611 ") and quality length (", std::ranges::distance(
qual),
612 ") must be the same.")};
618 while (is_char<'\t'>(*std::ranges::begin(stream_view)))
620 std::ranges::next(std::ranges::begin(stream_view));
621 read_field(stream_view | detail::take_until_or_throw(tab_or_end), tag_dict);
624 detail::consume(stream_view | detail::take_until(!(is_char<'\r'> || is_char<'\n'>)));
630 if constexpr (!detail::decays_to_ignore_v<align_type>)
632 int32_t ref_idx{(ref_id_tmp.empty()) ? -1 : 0};
634 if constexpr (!detail::decays_to_ignore_v<ref_seqs_type>)
636 if (!ref_id_tmp.empty())
638 assert(header.
ref_dict.count(ref_id_tmp) != 0);
639 ref_idx = header.
ref_dict[ref_id_tmp];
643 construct_alignment(align, tmp_cigar_vector, ref_idx, ref_seqs, ref_offset_tmp, ref_length);
646 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
647 std::swap(cigar_vector, tmp_cigar_vector);
651 template <
typename stream_type,
652 typename header_type,
655 typename ref_seq_type,
656 typename ref_id_type,
660 typename tag_dict_type,
661 typename e_value_type,
662 typename bit_score_type>
665 header_type && header,
669 int32_t
const offset,
670 ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
671 ref_id_type && ref_id,
678 tag_dict_type && tag_dict,
679 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
680 bit_score_type && SEQAN3_DOXYGEN_ONLY(bit_score))
698 static_assert((std::ranges::forward_range<seq_type> &&
699 alphabet<std::ranges::range_reference_t<seq_type>>),
700 "The seq object must be a std::ranges::forward_range over "
701 "letters that model seqan3::alphabet.");
703 static_assert((std::ranges::forward_range<id_type> &&
704 alphabet<std::ranges::range_reference_t<id_type>>),
705 "The id object must be a std::ranges::forward_range over "
706 "letters that model seqan3::alphabet.");
708 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
710 static_assert((std::ranges::forward_range<ref_id_type> ||
713 "The ref_id object must be a std::ranges::forward_range "
714 "over letters that model seqan3::alphabet.");
718 static_assert(!detail::decays_to_ignore_v<header_type>,
719 "If you give indices as reference id information the header must also be present.");
723 "The align object must be a std::pair of two ranges whose "
724 "value_type is comparable to seqan3::gap");
727 std::equality_comparable_with<
gap, std::ranges::range_reference_t<decltype(std::get<0>(align))>> &&
728 std::equality_comparable_with<
gap, std::ranges::range_reference_t<decltype(std::get<1>(align))>>),
729 "The align object must be a std::pair of two ranges whose "
730 "value_type is comparable to seqan3::gap");
732 static_assert((std::ranges::forward_range<qual_type> &&
733 alphabet<std::ranges::range_reference_t<qual_type>>),
734 "The qual object must be a std::ranges::forward_range "
735 "over letters that model seqan3::alphabet.");
738 "The mate object must be a std::tuple of size 3 with "
739 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
740 "2) a std::integral or std::optional<std::integral>, and "
741 "3) a std::integral.");
743 static_assert(((std::ranges::forward_range<decltype(std::get<0>(
mate))> ||
749 "The mate object must be a std::tuple of size 3 with "
750 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
751 "2) a std::integral or std::optional<std::integral>, and "
752 "3) a std::integral.");
756 static_assert(!detail::decays_to_ignore_v<header_type>,
757 "If you give indices as mate reference id information the header must also be present.");
760 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
765 if constexpr (!detail::decays_to_ignore_v<header_type> &&
766 !detail::decays_to_ignore_v<ref_id_type> &&
775 if constexpr (std::ranges::contiguous_range<decltype(
ref_id)> &&
776 std::ranges::sized_range<decltype(
ref_id)> &&
777 std::ranges::borrowed_range<decltype(
ref_id)>)
786 "The ref_id type is not convertible to the reference id information stored in the "
787 "reference dictionary of the header object.");
793 throw format_error{detail::to_string(
"The ref_id '",
ref_id,
"' was not in the list of references:",
799 throw format_error{
"The ref_offset object must be an std::integral >= 0."};
804 if constexpr (!detail::decays_to_ignore_v<header_type>)
808 write_header(stream, options, header);
809 header_was_written =
true;
817 detail::fast_ostreambuf_iterator stream_it{*stream.rdbuf()};
818 constexpr
char separator{
'\t'};
820 write_range_or_asterisk(stream_it,
id);
821 *stream_it = separator;
823 stream_it.write_number(
static_cast<uint16_t
>(
flag));
824 *stream_it = separator;
826 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
830 write_range_or_asterisk(stream_it, (header.
ref_ids())[
ref_id]);
835 write_range_or_asterisk(stream_it, (header.
ref_ids())[
ref_id.value()]);
841 write_range_or_asterisk(stream_it,
ref_id);
849 *stream_it = separator;
852 stream_it.write_number(
ref_offset.value_or(-1) + 1);
853 *stream_it = separator;
855 stream_it.write_number(
static_cast<unsigned>(
mapq));
856 *stream_it = separator;
858 if (!std::ranges::empty(cigar_vector))
860 for (
auto & c : cigar_vector)
861 stream_it.write_range(c.to_string());
863 else if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align)))
870 for (
auto chr : get<1>(align))
878 write_range_or_asterisk(stream_it, detail::get_cigar_string(align,
offset, off_end));
885 *stream_it = separator;
889 write_range_or_asterisk(stream_it, (header.
ref_ids())[get<0>(
mate)]);
893 if (get<0>(
mate).has_value())
896 write_range_or_asterisk(stream_it, header.
ref_ids()[get<0>(
mate).value_or(0)]);
902 write_range_or_asterisk(stream_it, get<0>(
mate));
905 *stream_it = separator;
910 stream_it.write_number(get<1>(
mate).value_or(-1) + 1);
911 *stream_it = separator;
915 stream_it.write_number(get<1>(
mate));
916 *stream_it = separator;
919 stream_it.write_number(get<2>(
mate));
920 *stream_it = separator;
922 write_range_or_asterisk(stream_it,
seq);
923 *stream_it = separator;
925 write_range_or_asterisk(stream_it,
qual);
927 write_tag_fields(stream_it, tag_dict, separator);
950 template <
typename stream_view_type,
typename value_type>
952 stream_view_type && stream_view,
956 while (std::ranges::begin(stream_view) != ranges::end(stream_view))
958 read_field(stream_view | detail::take_until(is_char<','>), value);
961 if (is_char<','>(*std::ranges::begin(stream_view)))
962 std::ranges::next(std::ranges::begin(stream_view));
980 template <
typename stream_view_type>
982 stream_view_type && stream_view)
987 while (std::ranges::begin(stream_view) != ranges::end(stream_view))
991 read_field(stream_view | detail::take_exactly_or_throw(2), value);
995 throw format_error{
"Hexadecimal tag has an uneven number of digits!"};
1021 template <
typename stream_view_type>
1022 inline void format_sam::read_field(stream_view_type && stream_view, sam_tag_dictionary & target)
1030 std::ranges::next(std::ranges::begin(stream_view));
1032 std::ranges::next(std::ranges::begin(stream_view));
1033 std::ranges::next(std::ranges::begin(stream_view));
1035 std::ranges::next(std::ranges::begin(stream_view));
1036 std::ranges::next(std::ranges::begin(stream_view));
1043 std::ranges::next(std::ranges::begin(stream_view));
1049 read_field(stream_view, tmp);
1056 read_field(stream_view, tmp);
1062 target[tag] = stream_view | views::to<std::string>;
1067 read_sam_byte_vector(target[tag], stream_view);
1073 std::ranges::next(std::ranges::begin(stream_view));
1074 std::ranges::next(std::ranges::begin(stream_view));
1076 switch (array_value_type_id)
1079 read_sam_dict_vector(target[tag], stream_view, int8_t{});
1082 read_sam_dict_vector(target[tag], stream_view, uint8_t{});
1085 read_sam_dict_vector(target[tag], stream_view, int16_t{});
1088 read_sam_dict_vector(target[tag], stream_view, uint16_t{});
1091 read_sam_dict_vector(target[tag], stream_view, int32_t{});
1094 read_sam_dict_vector(target[tag], stream_view, uint32_t{});
1097 read_sam_dict_vector(target[tag], stream_view,
float{});
1100 throw format_error{
std::string(
"The first character in the numerical ") +
1101 "id of a SAM tag must be one of [cCsSiIf] but '" + array_value_type_id +
1107 throw format_error{
std::string(
"The second character in the numerical id of a "
1108 "SAM tag must be one of [A,i,Z,H,B,f] but '") + type_id +
"' was given."};
1119 template <
typename stream_it_t, std::ranges::forward_range field_type>
1120 inline void format_sam::write_range_or_asterisk(stream_it_t & stream_it, field_type && field_value)
1122 if (std::ranges::empty(field_value))
1128 if constexpr (std::same_as<
std::remove_cvref_t<std::ranges::range_reference_t<field_type>>,
char>)
1129 stream_it.write_range(field_value);
1131 stream_it.write_range(field_value | views::
to_char);
1141 template <typename stream_it_t>
1142 inline
void format_sam::write_range_or_asterisk(stream_it_t & stream_it,
char const * const field_value)
1154 template <
typename stream_it_t>
1155 inline void format_sam::write_tag_fields(stream_it_t & stream_it, sam_tag_dictionary
const & tag_dict,
char const separator)
1157 auto const stream_variant_fn = [&stream_it] (
auto && arg)
1161 if constexpr (std::ranges::input_range<T>)
1165 stream_it.write_range(arg);
1169 if (!std::ranges::empty(arg))
1171 stream_it.write_number(std::to_integer<uint8_t>(*std::ranges::begin(arg)));
1176 stream_it.write_number(std::to_integer<uint8_t>(elem));
1182 if (!std::ranges::empty(arg))
1184 stream_it.write_number(*std::ranges::begin(arg));
1189 stream_it.write_number(elem);
1200 stream_it.write_number(arg);
1204 for (
auto & [tag, variant] : tag_dict)
1206 *stream_it = separator;
1208 char const char0 = tag / 256;
1209 char const char1 = tag % 256;
1214 *stream_it = detail::sam_tag_type_char[variant.
index()];
1217 if (detail::sam_tag_type_char_extra[variant.
index()] !=
'\0')
1219 *stream_it = detail::sam_tag_type_char_extra[variant.
index()];
Core alphabet concept and free function/type trait wrappers.
Provides seqan3::views::to_char.
The alphabet of a gap character '-'.
Definition: gap.hpp:39
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:332
Provides seqan3::detail::fast_ostreambuf_iterator.
constexpr auto to_char
Return the char representation of an alphabet object.
Definition: concept.hpp:384
constexpr auto is_space
Checks whether c is a space character.
Definition: predicate.hpp:146
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: sam_flag.hpp:73
@ none
None of the flags below are set.
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
typename decltype(detail::split_after< i >(list_t{}))::second_type drop
Return a seqan3::type_list of the types in the input type list, except the first n.
Definition: traits.hpp:388
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:471
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:151
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:189
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:74
The generic alphabet concept that covers most data types used in ranges.
Resolves to std::ranges::implicitly_convertible_to<type1, type2>(). <dl class="no-api">This entity i...
A more refined container concept than seqan3::container.
The generic concept for a (biological) sequence.
Whether a type behaves like a tuple.
Auxiliary functions for the alignment IO.
Provides seqan3::detail::istreambuf.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Adaptations of concepts from the Ranges TS.
Provides seqan3::sam_file_output_options.
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
Provides helper data structures for the seqan3::sam_file_output.
Provides seqan3::sequence_file_output_options.
Thrown if there is a parse error, such as reading an unexpected character from an input stream.
Definition: exception.hpp:48
The options type defines various option members that influence the behavior of all or some formats.
Definition: output_options.hpp:23
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition: output_options.hpp:27
bool sam_require_header
Whether to require a header for SAM files.
Definition: output_options.hpp:41
The options type defines various option members that influence the behaviour of all or some formats.
Definition: output_options.hpp:22
Exposes the value_type of another type.
Definition: pre.hpp:58
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
Provides traits to inspect some information of a type, for example its name.
Provides seqan3::tuple_like.
Provides seqan3::views::slice.
Provides seqan3::views::to.