157 typename ref_id_type,
158 typename ref_offset_type,
187 typename header_type,
191 typename ref_id_type,
199 header_type && header,
222 sam_file_header<> default_header{};
234 template <
typename t>
235 decltype(
auto) default_or(t && v)
const noexcept
237 return std::forward<t>(v);
240 template <arithmetic value_type>
245 void read_sam_dict(
std::string_view const tag_str, sam_tag_dictionary & target);
247 template <
typename stream_it_t, std::ranges::forward_range field_type>
248 void write_range_or_asterisk(stream_it_t & stream_it, field_type && field_value);
250 template <
typename stream_it_t>
251 void write_range_or_asterisk(stream_it_t & stream_it,
char const *
const field_value);
253 template <
typename stream_it_t>
254 void write_tag_fields(stream_it_t & stream, sam_tag_dictionary
const & tag_dict,
char const separator);
258template <
typename stream_type,
259 typename seq_legal_alph_type,
260 typename stream_pos_type,
294 if constexpr (!detail::decays_to_ignore_v<seq_type>)
295 if (std::ranges::distance(
sequence) == 0)
296 throw parse_error{
"The sequence information must not be empty."};
297 if constexpr (!detail::decays_to_ignore_v<id_type>)
299 if (std::ranges::distance(
id) == 0)
300 throw parse_error{
"The id information must not be empty."};
301 if (options.truncate_ids)
348 typename ref_id_type,
349 typename ref_offset_type,
378 static_assert(detail::decays_to_ignore_v<ref_offset_type>
379 || detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
380 "The ref_offset must be a specialisation of std::optional.");
382 auto stream_it = detail::fast_istreambuf_iterator{*stream.rdbuf()};
387 std::ranges::range_value_t<
decltype(header.ref_ids())>
ref_id_tmp{};
400 position_buffer = stream.tellg();
405 stream_it.cache_record_into(
'\n',
'\t', raw_record);
409 if constexpr (!detail::decays_to_ignore_v<id_type>)
410 read_forward_range_field(raw_record[0],
id);
416 read_forward_range_field(raw_record[2],
ref_id_tmp);
427 throw format_error{
"No negative values are allowed for field::ref_offset."};
429 if constexpr (!detail::decays_to_ignore_v<mapq_type>)
430 read_arithmetic_field(raw_record[4],
mapq);
434 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
439 if constexpr (!detail::decays_to_ignore_v<mate_type>)
441 std::ranges::range_value_t<
decltype(header.ref_ids())>
tmp_mate_ref_id{};
446 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
457 read_arithmetic_field(raw_record[7],
tmp_pnext);
462 throw format_error{
"No negative values are allowed at the mate mapping position."};
465 read_arithmetic_field(raw_record[8],
get<2>(
mate));
470 if constexpr (!detail::decays_to_ignore_v<seq_type>)
483 + detail::type_name_as_string<seq_legal_alph_type>
484 +
"> evaluated to false on " + detail::make_printable(
seq_str[
i])};
501 if constexpr (!detail::decays_to_ignore_v<qual_type>)
504 if constexpr (!detail::decays_to_ignore_v<seq_type> && !detail::decays_to_ignore_v<qual_type>)
506 if (std::ranges::distance(
seq) != 0 && std::ranges::distance(
qual) != 0
507 && std::ranges::distance(
seq) != std::ranges::distance(
qual))
509 throw format_error{detail::to_string(
"Sequence length (",
510 std::ranges::distance(
seq),
511 ") and quality length (",
512 std::ranges::distance(
qual),
513 ") must be the same.")};
519 if constexpr (!detail::decays_to_ignore_v<tag_dict_type>)
542 typename header_type,
546 typename ref_id_type,
554 header_type && header,
586 "The seq object must be a std::ranges::forward_range over "
587 "letters that model seqan3::alphabet.");
590 "The id object must be a std::ranges::forward_range over "
591 "letters that model seqan3::alphabet.");
593 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
595 static_assert((std::ranges::forward_range<ref_id_type> || std::integral<std::remove_reference_t<ref_id_type>>
596 || detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>,
std::optional>),
597 "The ref_id object must be a std::ranges::forward_range "
598 "over letters that model seqan3::alphabet.");
600 if constexpr (std::integral<std::remove_cvref_t<ref_id_type>>
601 || detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>,
std::optional>)
602 static_assert(!detail::decays_to_ignore_v<header_type>,
603 "If you give indices as reference id information the header must also be present.");
607 "The qual object must be a std::ranges::forward_range "
608 "over letters that model seqan3::alphabet.");
611 "The mate object must be a std::tuple of size 3 with "
612 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
613 "2) a std::integral or std::optional<std::integral>, and "
614 "3) a std::integral.");
617 ((std::ranges::forward_range<decltype(std::get<0>(
mate))>
620 && (std::integral<std::remove_cvref_t<decltype(std::get<1>(
mate))>>
622 && std::integral<std::remove_cvref_t<decltype(std::get<2>(
mate))>>),
623 "The mate object must be a std::tuple of size 3 with "
624 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
625 "2) a std::integral or std::optional<std::integral>, and "
626 "3) a std::integral.");
628 if constexpr (std::integral<std::remove_cvref_t<decltype(std::get<0>(
mate))>>
631 static_assert(!detail::decays_to_ignore_v<header_type>,
632 "If you give indices as mate reference id information the header must also be present.");
635 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
640 if constexpr (!detail::decays_to_ignore_v<header_type> && !detail::decays_to_ignore_v<ref_id_type>
641 && !std::integral<std::remove_reference_t<ref_id_type>>
642 && !detail::is_type_specialisation_of_v<std::remove_reference_t<ref_id_type>,
std::optional>)
647 auto id_it = header.ref_dict.end();
649 if constexpr (std::ranges::contiguous_range<
decltype(
ref_id)> && std::ranges::sized_range<
decltype(
ref_id)>
650 && std::ranges::borrowed_range<
decltype(
ref_id)>)
659 "The ref_id type is not convertible to the reference id information stored in the "
660 "reference dictionary of the header object.");
665 if (
id_it == header.ref_dict.end())
668 "' was not in the list of references:",
674 throw format_error{
"The ref_offset object must be a std::integral >= 0."};
679 if constexpr (!detail::decays_to_ignore_v<header_type>)
683 write_header(stream, options, header);
684 header_was_written =
true;
692 detail::fast_ostreambuf_iterator
stream_it{*stream.rdbuf()};
701 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
703 if constexpr (std::integral<std::remove_reference_t<ref_id_type>>)
707 else if constexpr (detail::is_type_specialisation_of_v<std::remove_reference_t<ref_id_type>,
std::optional>)
710 write_range_or_asterisk(
stream_it, (header.ref_ids())[
ref_id.value()]);
745 if constexpr (std::integral<std::remove_reference_t<decltype(get<0>(
mate))>>)
749 else if constexpr (detail::is_type_specialisation_of_v<std::remove_reference_t<decltype(get<0>(
mate))>,
764 if constexpr (detail::is_type_specialisation_of_v<std::remove_cvref_t<decltype(get<1>(
mate))>,
std::optional>)
805template <arithmetic value_type>
814 while (start_pos != std::string_view::npos)
816 end_pos = str.
find(
',', start_pos);
817 auto end = (end_pos == std::string_view::npos) ? str.
end() : str.
begin() + end_pos;
819 tmp_vector.push_back(value);
821 start_pos = (end_pos == std::string_view::npos) ? end_pos : end_pos + 1;
823 variant = std::move(tmp_vector);
841 uint8_t dummy_byte{};
843 if (str.
size() % 2 != 0)
844 throw format_error{
"[CORRUPTED SAM FILE] Hexadecimal tag must have even number of digits."};
848 for (
auto hex_begin = str.
begin(), hex_end = str.
begin() + 2; hex_begin != str.
end(); hex_begin += 2, hex_end += 2)
852 if (res.ec == std::errc::invalid_argument)
854 +
"' could not be cast into type uint8_t."};
856 if (res.ec == std::errc::result_out_of_range)
858 +
"' into type uint8_t would cause an overflow."};
863 variant = std::move(tmp_vector);
881inline void format_sam::read_sam_dict(
std::string_view const tag_str, sam_tag_dictionary & target)
888 assert(tag_str.
size() > 5);
890 uint16_t tag =
static_cast<uint16_t
>(tag_str[0]) << 8;
891 tag +=
static_cast<uint16_t
>(tag_str[1]);
893 char type_id = tag_str[3];
899 assert(tag_str.
size() == 6);
900 target[tag] = tag_str[5];
906 read_arithmetic_field(tag_str.
substr(5), tmp);
913 read_arithmetic_field(tag_str.
substr(5), tmp);
924 read_sam_byte_vector(target[tag], tag_str.
substr(5));
929 assert(tag_str.
size() > 6);
930 char array_value_type_id = tag_str[5];
932 switch (array_value_type_id)
935 read_sam_dict_vector(target[tag], tag_str.
substr(7), int8_t{});
938 read_sam_dict_vector(target[tag], tag_str.
substr(7), uint8_t{});
941 read_sam_dict_vector(target[tag], tag_str.
substr(7), int16_t{});
944 read_sam_dict_vector(target[tag], tag_str.
substr(7), uint16_t{});
947 read_sam_dict_vector(target[tag], tag_str.
substr(7), int32_t{});
950 read_sam_dict_vector(target[tag], tag_str.
substr(7), uint32_t{});
953 read_sam_dict_vector(target[tag], tag_str.
substr(7),
float{});
956 throw format_error{
std::string(
"The first character in the numerical ")
957 +
"id of a SAM tag must be one of [cCsSiIf] but '" + array_value_type_id
963 throw format_error{
std::string(
"The second character in the numerical id of a "
964 "SAM tag ([TAG]:[TYPE_ID]:[VALUE]) must be one of [A,i,Z,H,B,f] but '")
965 + type_id +
"' was given."};
976template <
typename stream_it_t, std::ranges::forward_range field_type>
977inline void format_sam::write_range_or_asterisk(stream_it_t & stream_it, field_type && field_value)
979 if (std::ranges::empty(field_value))
985 if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<field_type>>,
char>)
986 stream_it.write_range(field_value);
998template <
typename stream_it_t>
999inline void format_sam::write_range_or_asterisk(stream_it_t & stream_it,
char const *
const field_value)
1011template <
typename stream_it_t>
1013format_sam::write_tag_fields(stream_it_t & stream_it, sam_tag_dictionary
const & tag_dict,
char const separator)
1015 auto const stream_variant_fn = [&stream_it](
auto && arg)
1019 if constexpr (std::ranges::input_range<T>)
1021 if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<T>>,
char>)
1023 stream_it.write_range(arg);
1025 else if constexpr (std::same_as<std::remove_cvref_t<std::ranges::range_reference_t<T>>,
std::byte>)
1027 if (!std::ranges::empty(arg))
1031 for (
auto && elem : arg |
std::views::
drop(1))
1034 stream_it.write_number(std::to_integer<uint8_t>(elem));
1040 if (!std::ranges::empty(arg))
1044 for (
auto && elem : arg |
std::views::
drop(1))
1047 stream_it.write_number(elem);
1052 else if constexpr (std::same_as<std::remove_cvref_t<T>,
char>)
1058 stream_it.write_number(arg);
1062 for (
auto & [tag, variant] : tag_dict)
1064 *stream_it = separator;
1066 char const char0 = tag / 256;
1067 char const char1 = tag % 256;
1072 *stream_it = detail::sam_tag_type_char[variant.
index()];
1075 if (detail::sam_tag_type_char_extra[variant.
index()] !=
'\0')
1077 *stream_it = detail::sam_tag_type_char_extra[variant.
index()];
Core alphabet concept and free function/type trait wrappers.
A "pretty printer" for most SeqAn data structures and related types.
Definition debug_stream_type.hpp:79
The SAM tag dictionary class that stores all optional SAM fields.
Definition sam_tag_dictionary.hpp:327
Provides seqan3::detail::fast_ostreambuf_iterator.
auto const to_char
A view that calls seqan3::to_char() on each element in the input range.
Definition to_char.hpp:60
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition alphabet/concept.hpp:517
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition sam_flag.hpp:73
@ none
None of the flags below are set.
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ ref_seq
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ bit_score
The bit score (statistical significance indicator), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
constexpr auto is_char
Checks whether a given letter is the same as the template non-type argument.
Definition predicate.hpp:60
constexpr auto is_space
Checks whether c is a space character.
Definition predicate.hpp:122
seqan::stl::ranges::to to
Converts a range to a container. <dl class="no-api">This entity is not part of the SeqAn API....
Definition to.hpp:23
typename decltype(detail::split_after< i >(list_t{}))::second_type drop
Return a seqan3::type_list of the types in the input type list, except the first n.
Definition type_list/traits.hpp:392
The generic concept for a (biological) sequence.
Auxiliary functions for the SAM IO.
Provides seqan3::detail::istreambuf.
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
SeqAn specific customisations in the standard namespace.
Provides seqan3::sam_file_output_options.
Provides helper data structures for the seqan3::sam_file_output.
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
Provides seqan3::sequence_file_output_options.
Provides seqan3::views::slice.
Thrown if there is a parse error, such as reading an unexpected character from an input stream.
Definition io/exception.hpp:45
The options type defines various option members that influence the behavior of all or some formats.
Definition sam_file/output_options.hpp:23
bool add_carriage_return
The default plain text line-ending is "\n", but on Windows an additional carriage return is recommend...
Definition sam_file/output_options.hpp:27
bool sam_require_header
Whether to require a header for SAM files.
Definition sam_file/output_options.hpp:41
The options type defines various option members that influence the behaviour of all or some formats.
Definition sequence_file/output_options.hpp:23
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
Provides seqan3::ranges::to.
Provides seqan3::views::to_char.
Provides traits to inspect some information of a type, for example its name.
Provides seqan3::tuple_like.