126 template <
typename stream_type,
127 typename seq_legal_alph_type,
128 typename stream_pos_type,
134 stream_pos_type & position_buffer,
137 qual_type & qualities);
139 template <
typename stream_type,
147 qual_type && qualities);
149 template <
typename stream_type,
150 typename seq_legal_alph_type,
151 typename ref_seqs_type,
152 typename ref_ids_type,
153 typename stream_pos_type,
156 typename ref_seq_type,
157 typename ref_id_type,
158 typename ref_offset_type,
164 typename tag_dict_type,
165 typename e_value_type,
166 typename bit_score_type>
169 ref_seqs_type & ref_seqs,
171 stream_pos_type & position_buffer,
175 ref_seq_type & SEQAN3_DOXYGEN_ONLY(
ref_seq),
178 cigar_type & cigar_vector,
182 tag_dict_type & tag_dict,
183 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
184 bit_score_type & SEQAN3_DOXYGEN_ONLY(
bit_score));
186 template <
typename stream_type,
187 typename header_type,
190 typename ref_seq_type,
191 typename ref_id_type,
194 typename tag_dict_type,
195 typename e_value_type,
196 typename bit_score_type>
199 header_type && header,
203 ref_seq_type && SEQAN3_DOXYGEN_ONLY(
ref_seq),
210 tag_dict_type && tag_dict,
211 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
212 bit_score_type && SEQAN3_DOXYGEN_ONLY(
bit_score));
222 sam_file_header<> default_header{};
234 template <
typename t>
235 decltype(
auto) default_or(t && v)
const noexcept
237 return std::forward<t>(v);
240 template <arithmetic value_type>
245 void read_sam_dict(
std::string_view const tag_str, sam_tag_dictionary & target);
247 template <
typename stream_it_t, std::ranges::forward_range field_type>
248 void write_range_or_asterisk(stream_it_t & stream_it, field_type && field_value);
250 template <
typename stream_it_t>
251 void write_range_or_asterisk(stream_it_t & stream_it,
char const *
const field_value);
253 template <
typename stream_it_t>
254 void write_tag_fields(stream_it_t & stream, sam_tag_dictionary
const & tag_dict,
char const separator);
258template <
typename stream_type,
259 typename seq_legal_alph_type,
260 typename stream_pos_type,
266 stream_pos_type & position_buffer,
269 qual_type & qualities)
294 if constexpr (!detail::decays_to_ignore_v<seq_type>)
295 if (std::ranges::distance(
sequence) == 0)
296 throw parse_error{
"The sequence information must not be empty."};
297 if constexpr (!detail::decays_to_ignore_v<id_type>)
299 if (std::ranges::distance(
id) == 0)
300 throw parse_error{
"The id information must not be empty."};
307template <
typename stream_type,
315 qual_type && qualities)
325 default_or(qualities),
340template <
typename stream_type,
341 typename seq_legal_alph_type,
342 typename ref_seqs_type,
343 typename ref_ids_type,
344 typename stream_pos_type,
347 typename ref_seq_type,
348 typename ref_id_type,
349 typename ref_offset_type,
355 typename tag_dict_type,
356 typename e_value_type,
357 typename bit_score_type>
361 ref_seqs_type & ref_seqs,
363 stream_pos_type & position_buffer,
367 ref_seq_type & SEQAN3_DOXYGEN_ONLY(
ref_seq),
370 cigar_type & cigar_vector,
374 tag_dict_type & tag_dict,
375 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
376 bit_score_type & SEQAN3_DOXYGEN_ONLY(
bit_score))
378 static_assert(detail::decays_to_ignore_v<ref_offset_type>
379 || detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
380 "The ref_offset must be a specialisation of std::optional.");
382 auto stream_it = detail::fast_istreambuf_iterator{*stream.rdbuf()};
384 auto stream_view = detail::istreambuf(stream);
386 int32_t ref_offset_tmp{};
387 std::ranges::range_value_t<
decltype(header.
ref_ids())> ref_id_tmp{};
393 read_header(stream_view, header, ref_seqs);
400 position_buffer = stream.tellg();
405 stream_it.cache_record_into(
'\n',
'\t', raw_record);
409 if constexpr (!detail::decays_to_ignore_v<id_type>)
410 read_forward_range_field(raw_record[0],
id);
412 uint16_t flag_integral{};
413 read_arithmetic_field(raw_record[1], flag_integral);
416 read_forward_range_field(raw_record[2], ref_id_tmp);
417 check_and_assign_ref_id(
ref_id, ref_id_tmp, header, ref_seqs);
419 read_arithmetic_field(raw_record[3], ref_offset_tmp);
422 if (ref_offset_tmp == -1)
424 else if (ref_offset_tmp > -1)
426 else if (ref_offset_tmp < -1)
427 throw format_error{
"No negative values are allowed for field::ref_offset."};
429 if constexpr (!detail::decays_to_ignore_v<mapq_type>)
430 read_arithmetic_field(raw_record[4],
mapq);
434 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
435 cigar_vector = detail::parse_cigar(raw_record[5]);
439 if constexpr (!detail::decays_to_ignore_v<mate_type>)
441 std::ranges::range_value_t<
decltype(header.
ref_ids())> tmp_mate_ref_id{};
442 read_forward_range_field(raw_record[6], tmp_mate_ref_id);
444 if (tmp_mate_ref_id ==
"=")
446 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
449 check_and_assign_ref_id(get<0>(
mate), ref_id_tmp, header, ref_seqs);
453 check_and_assign_ref_id(get<0>(
mate), tmp_mate_ref_id, header, ref_seqs);
457 read_arithmetic_field(raw_record[7], tmp_pnext);
460 get<1>(
mate) = --tmp_pnext;
461 else if (tmp_pnext < 0)
462 throw format_error{
"No negative values are allowed at the mate mapping position."};
465 read_arithmetic_field(raw_record[8], get<2>(
mate));
470 if constexpr (!detail::decays_to_ignore_v<seq_type>)
477 constexpr auto is_legal_alph = char_is_valid_for<seq_legal_alph_type>;
479 for (
size_t i = 0; i < seq_str.
size(); ++i)
481 if (!is_legal_alph(seq_str[i]))
483 + detail::type_name_as_string<seq_legal_alph_type>
484 +
"> evaluated to false on " + detail::make_printable(seq_str[i])};
496 size_t tag_begin_pos = raw_record[10].find(
'\t');
499 (tag_begin_pos == std::string_view::npos) ? raw_record[10] : raw_record[10].substr(0, tag_begin_pos);
501 if constexpr (!detail::decays_to_ignore_v<qual_type>)
502 read_forward_range_field(qualities,
qual);
504 if constexpr (!detail::decays_to_ignore_v<seq_type> && !detail::decays_to_ignore_v<qual_type>)
506 if (std::ranges::distance(
seq) != 0 && std::ranges::distance(
qual) != 0
507 && std::ranges::distance(
seq) != std::ranges::distance(
qual))
509 throw format_error{detail::to_string(
"Sequence length (",
510 std::ranges::distance(
seq),
511 ") and quality length (",
512 std::ranges::distance(
qual),
513 ") must be the same.")};
519 if constexpr (!detail::decays_to_ignore_v<tag_dict_type>)
521 while (tag_begin_pos != std::string_view::npos)
524 size_t const tag_end_pos = raw_record[10].find(
'\t', tag_begin_pos);
526 char const * tag_begin = raw_record[10].
begin() + tag_begin_pos;
527 char const * tag_end =
528 (tag_end_pos == std::string_view::npos) ? raw_record[10].end() : raw_record[10].
begin() + tag_end_pos;
532 tag_begin_pos = tag_end_pos;
536 assert(stream_it == std::default_sentinel_t{} || *stream_it ==
'\n');
541template <
typename stream_type,
542 typename header_type,
545 typename ref_seq_type,
546 typename ref_id_type,
549 typename tag_dict_type,
550 typename e_value_type,
551 typename bit_score_type>
554 header_type && header,
558 ref_seq_type && SEQAN3_DOXYGEN_ONLY(
ref_seq),
565 tag_dict_type && tag_dict,
566 e_value_type && SEQAN3_DOXYGEN_ONLY(e_value),
567 bit_score_type && SEQAN3_DOXYGEN_ONLY(
bit_score))
586 "The seq object must be a std::ranges::forward_range over "
587 "letters that model seqan3::alphabet.");
590 "The id object must be a std::ranges::forward_range over "
591 "letters that model seqan3::alphabet.");
593 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
595 static_assert((std::ranges::forward_range<ref_id_type> || std::integral<std::remove_reference_t<ref_id_type>>
596 || detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>,
std::optional>),
597 "The ref_id object must be a std::ranges::forward_range "
598 "over letters that model seqan3::alphabet.");
600 if constexpr (std::integral<std::remove_cvref_t<ref_id_type>>
601 || detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>,
std::optional>)
602 static_assert(!detail::decays_to_ignore_v<header_type>,
603 "If you give indices as reference id information the header must also be present.");
607 "The qual object must be a std::ranges::forward_range "
608 "over letters that model seqan3::alphabet.");
611 "The mate object must be a std::tuple of size 3 with "
612 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
613 "2) a std::integral or std::optional<std::integral>, and "
614 "3) a std::integral.");
617 ((std::ranges::forward_range<decltype(std::get<0>(
mate))>
620 && (std::integral<std::remove_cvref_t<decltype(std::get<1>(
mate))>>
622 && std::integral<std::remove_cvref_t<decltype(std::get<2>(
mate))>>),
623 "The mate object must be a std::tuple of size 3 with "
624 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
625 "2) a std::integral or std::optional<std::integral>, and "
626 "3) a std::integral.");
628 if constexpr (std::integral<std::remove_cvref_t<decltype(std::get<0>(
mate))>>
631 static_assert(!detail::decays_to_ignore_v<header_type>,
632 "If you give indices as mate reference id information the header must also be present.");
635 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
640 if constexpr (!detail::decays_to_ignore_v<header_type> && !detail::decays_to_ignore_v<ref_id_type>
641 && !std::integral<std::remove_reference_t<ref_id_type>>
642 && !detail::is_type_specialisation_of_v<std::remove_reference_t<ref_id_type>,
std::optional>)
649 if constexpr (std::ranges::contiguous_range<
decltype(
ref_id)> && std::ranges::sized_range<
decltype(
ref_id)>
650 && std::ranges::borrowed_range<
decltype(
ref_id)>)
659 "The ref_id type is not convertible to the reference id information stored in the "
660 "reference dictionary of the header object.");
668 "' was not in the list of references:",
674 throw format_error{
"The ref_offset object must be a std::integral >= 0."};
679 if constexpr (!detail::decays_to_ignore_v<header_type>)
683 write_header(stream, options, header);
684 header_was_written =
true;
692 detail::fast_ostreambuf_iterator stream_it{*stream.rdbuf()};
693 constexpr char separator{
'\t'};
695 write_range_or_asterisk(stream_it,
id);
696 *stream_it = separator;
698 stream_it.write_number(
static_cast<uint16_t
>(
flag));
699 *stream_it = separator;
701 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
703 if constexpr (std::integral<std::remove_reference_t<ref_id_type>>)
705 write_range_or_asterisk(stream_it, (header.
ref_ids())[
ref_id]);
707 else if constexpr (detail::is_type_specialisation_of_v<std::remove_reference_t<ref_id_type>,
std::optional>)
710 write_range_or_asterisk(stream_it, (header.
ref_ids())[
ref_id.value()]);
716 write_range_or_asterisk(stream_it,
ref_id);
724 *stream_it = separator;
727 stream_it.write_number(
ref_offset.value_or(-1) + 1);
728 *stream_it = separator;
730 stream_it.write_number(
static_cast<unsigned>(
mapq));
731 *stream_it = separator;
733 if (!std::ranges::empty(cigar_vector))
735 for (
auto & c : cigar_vector)
736 stream_it.write_range(c.to_string());
743 *stream_it = separator;
745 if constexpr (std::integral<std::remove_reference_t<decltype(get<0>(
mate))>>)
747 write_range_or_asterisk(stream_it, (header.
ref_ids())[get<0>(
mate)]);
749 else if constexpr (detail::is_type_specialisation_of_v<std::remove_reference_t<decltype(get<0>(
mate))>,
752 if (get<0>(
mate).has_value())
753 write_range_or_asterisk(stream_it, header.
ref_ids()[get<0>(
mate).value()]);
759 write_range_or_asterisk(stream_it, get<0>(
mate));
762 *stream_it = separator;
764 if constexpr (detail::is_type_specialisation_of_v<std::remove_cvref_t<decltype(get<1>(
mate))>,
std::optional>)
767 stream_it.write_number(get<1>(
mate).value_or(-1) + 1);
768 *stream_it = separator;
772 stream_it.write_number(get<1>(
mate));
773 *stream_it = separator;
776 stream_it.write_number(get<2>(
mate));
777 *stream_it = separator;
779 write_range_or_asterisk(stream_it,
seq);
780 *stream_it = separator;
782 write_range_or_asterisk(stream_it,
qual);
784 write_tag_fields(stream_it, tag_dict, separator);
805template <arithmetic value_type>
814 while (start_pos != std::string_view::npos)
816 end_pos = str.
find(
',', start_pos);
817 auto end = (end_pos == std::string_view::npos) ? str.
end() : str.
begin() + end_pos;
819 tmp_vector.push_back(value);
821 start_pos = (end_pos == std::string_view::npos) ? end_pos : end_pos + 1;
823 variant = std::move(tmp_vector);
841 uint8_t dummy_byte{};
843 if (str.
size() % 2 != 0)
844 throw format_error{
"[CORRUPTED SAM FILE] Hexadecimal tag must have even number of digits."};
848 for (
auto hex_begin = str.
begin(), hex_end = str.
begin() + 2; hex_begin != str.
end(); hex_begin += 2, hex_end += 2)
852 if (res.ec == std::errc::invalid_argument)
854 +
"' could not be cast into type uint8_t."};
856 if (res.ec == std::errc::result_out_of_range)
858 +
"' into type uint8_t would cause an overflow."};
863 variant = std::move(tmp_vector);
881inline void format_sam::read_sam_dict(
std::string_view const tag_str, sam_tag_dictionary & target)
888 assert(tag_str.
size() > 5);
890 uint16_t tag =
static_cast<uint16_t
>(tag_str[0]) << 8;
891 tag +=
static_cast<uint16_t
>(tag_str[1]);
893 char type_id = tag_str[3];
899 assert(tag_str.
size() == 6);
900 target[tag] = tag_str[5];
906 read_arithmetic_field(tag_str.
substr(5), tmp);
913 read_arithmetic_field(tag_str.
substr(5), tmp);
924 read_sam_byte_vector(target[tag], tag_str.
substr(5));
929 assert(tag_str.
size() > 6);
930 char array_value_type_id = tag_str[5];
932 switch (array_value_type_id)
935 read_sam_dict_vector(target[tag], tag_str.
substr(7), int8_t{});
938 read_sam_dict_vector(target[tag], tag_str.
substr(7), uint8_t{});
941 read_sam_dict_vector(target[tag], tag_str.
substr(7), int16_t{});
944 read_sam_dict_vector(target[tag], tag_str.
substr(7), uint16_t{});
947 read_sam_dict_vector(target[tag], tag_str.
substr(7), int32_t{});
950 read_sam_dict_vector(target[tag], tag_str.
substr(7), uint32_t{});
953 read_sam_dict_vector(target[tag], tag_str.
substr(7),
float{});
956 throw format_error{
std::string(
"The first character in the numerical ")
957 +
"id of a SAM tag must be one of [cCsSiIf] but '" + array_value_type_id
963 throw format_error{
std::string(
"The second character in the numerical id of a "
964 "SAM tag ([TAG]:[TYPE_ID]:[VALUE]) must be one of [A,i,Z,H,B,f] but '")
965 + type_id +
"' was given."};
976template <
typename stream_it_t, std::ranges::forward_range field_type>
977inline void format_sam::write_range_or_asterisk(stream_it_t & stream_it, field_type && field_value)
979 if (std::ranges::empty(field_value))
985 if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<field_type>>,
char>)
986 stream_it.write_range(field_value);
998template <
typename stream_it_t>
999inline void format_sam::write_range_or_asterisk(stream_it_t & stream_it,
char const *
const field_value)
1011template <
typename stream_it_t>
1013format_sam::write_tag_fields(stream_it_t & stream_it, sam_tag_dictionary
const & tag_dict,
char const separator)
1015 auto const stream_variant_fn = [&stream_it](
auto && arg)
1019 if constexpr (std::ranges::input_range<T>)
1021 if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<T>>,
char>)
1023 stream_it.write_range(arg);
1025 else if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<T>>,
std::byte>)
1027 if (!std::ranges::empty(arg))
1031 for (
auto && elem : arg |
std::views::
drop(1))
1034 stream_it.write_number(std::to_integer<uint8_t>(elem));
1040 if (!std::ranges::empty(arg))
1044 for (
auto && elem : arg |
std::views::
drop(1))
1047 stream_it.write_number(elem);
1052 else if constexpr (std::same_as<std::remove_cvref_t<T>,
char>)
1058 stream_it.write_number(arg);
1062 for (
auto & [tag, variant] : tag_dict)
1064 *stream_it = separator;
1066 char const char0 = tag / 256;
1067 char const char1 = tag % 256;
1072 *stream_it = detail::sam_tag_type_char[variant.
index()];
1075 if (detail::sam_tag_type_char_extra[variant.
index()] !=
'\0')
1077 *stream_it = detail::sam_tag_type_char_extra[variant.
index()];
Core alphabet concept and free function/type trait wrappers.
The SAM tag dictionary class that stores all optional SAM fields.
Definition sam_tag_dictionary.hpp:327
Provides seqan3::detail::fast_ostreambuf_iterator.
auto const to_char
A view that calls seqan3::to_char() on each element in the input range.
Definition to_char.hpp:60
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition alphabet/concept.hpp:517
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition sam_flag.hpp:73
@ none
None of the flags below are set.
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ ref_seq
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ bit_score
The bit score (statistical significance indicator), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr auto is_char
Checks whether a given letter is the same as the template non-type argument.
Definition predicate.hpp:60
constexpr auto is_space
Checks whether c is a space character.
Definition predicate.hpp:122
seqan::stl::ranges::to to
Converts a range to a container. <dl class="no-api">This entity is not part of the SeqAn API....
Definition to.hpp:23
typename decltype(detail::split_after< i >(list_t{}))::second_type drop
Return a seqan3::type_list of the types in the input type list, except the first n.
Definition type_list/traits.hpp:392
The generic alphabet concept that covers most data types used in ranges.
Checks whether from can be implicityly converted to to.
The generic concept for a (biological) sequence.
Whether a type behaves like a tuple.
Auxiliary functions for the SAM IO.
Provides seqan3::detail::istreambuf.
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
SeqAn specific customisations in the standard namespace.
Provides seqan3::sam_file_output_options.
Provides helper data structures for the seqan3::sam_file_output.
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
Provides seqan3::sequence_file_output_options.
Provides seqan3::views::slice.
Thrown if there is a parse error, such as reading an unexpected character from an input stream.
Definition io/exception.hpp:45
The options type defines various option members that influence the behavior of all or some formats.
Definition sam_file/output_options.hpp:23
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition sam_file/output_options.hpp:27
bool sam_require_header
Whether to require a header for SAM files.
Definition sam_file/output_options.hpp:41
The options type defines various option members that influence the behaviour of all or some formats.
Definition sequence_file/output_options.hpp:23
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
Provides seqan3::ranges::to.
Provides seqan3::views::to_char.
Provides traits to inspect some information of a type, for example its name.
Provides seqan3::tuple_like.