69 template <
typename stream_type,
70 typename seq_legal_alph_type,
71 typename ref_seqs_type,
72 typename ref_ids_type,
73 typename stream_pos_type,
77 typename ref_seq_type,
79 typename ref_offset_type,
86 typename tag_dict_type,
87 typename e_value_type,
88 typename bit_score_type>
91 ref_seqs_type & ref_seqs,
93 stream_pos_type & position_buffer,
98 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
100 ref_offset_type & ref_offset,
102 cigar_type & cigar_vector,
106 tag_dict_type & tag_dict,
107 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
108 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score));
110 template <
typename stream_type,
111 typename header_type,
114 typename ref_seq_type,
115 typename ref_id_type,
120 typename tag_dict_type>
123 [[maybe_unused]] header_type && header,
124 [[maybe_unused]] seq_type && seq,
125 [[maybe_unused]] qual_type && qual,
126 [[maybe_unused]] id_type &&
id,
127 [[maybe_unused]] int32_t
const offset,
128 [[maybe_unused]] ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
129 [[maybe_unused]] ref_id_type && ref_id,
131 [[maybe_unused]] align_type && align,
132 [[maybe_unused]] cigar_type && cigar_vector,
133 [[maybe_unused]]
sam_flag const flag,
134 [[maybe_unused]] uint8_t
const mapq,
135 [[maybe_unused]] mate_type && mate,
136 [[maybe_unused]] tag_dict_type && tag_dict,
137 [[maybe_unused]]
double SEQAN3_DOXYGEN_ONLY(e_value),
138 [[maybe_unused]]
double SEQAN3_DOXYGEN_ONLY(bit_score));
142 bool header_was_read{
false};
148 struct alignment_record_core
153 uint32_t l_read_name : 8;
156 uint32_t n_cigar_op : 16;
170 ret[
static_cast<index_t
>(
'I')] = 1;
171 ret[
static_cast<index_t
>(
'D')] = 2;
172 ret[
static_cast<index_t
>(
'N')] = 3;
173 ret[
static_cast<index_t
>(
'S')] = 4;
174 ret[
static_cast<index_t
>(
'H')] = 5;
175 ret[
static_cast<index_t
>(
'P')] = 6;
176 ret[
static_cast<index_t
>(
'=')] = 7;
177 ret[
static_cast<index_t
>(
'X')] = 8;
184static uint16_t reg2bin(int32_t beg, int32_t end)
noexcept
187 if (beg >> 14 == end >> 14)
188 return ((1 << 15) - 1) / 7 + (beg >> 14);
189 if (beg >> 17 == end >> 17)
190 return ((1 << 12) - 1) / 7 + (beg >> 17);
191 if (beg >> 20 == end >> 20)
192 return ((1 << 9) - 1) / 7 + (beg >> 20);
193 if (beg >> 23 == end >> 23)
194 return ((1 << 6) - 1) / 7 + (beg >> 23);
195 if (beg >> 26 == end >> 26)
196 return ((1 << 3) - 1) / 7 + (beg >> 26);
206template <
typename stream_view_type, std::
integral number_type>
207void read_integral_byte_field(stream_view_type && stream_view, number_type & target)
217template <
typename stream_view_type>
218void read_float_byte_field(stream_view_type && stream_view,
float & target)
223template <
typename stream_view_type,
typename value_type>
225 stream_view_type && stream_view,
226 value_type
const & SEQAN3_DOXYGEN_ONLY(value));
228template <
typename stream_view_type>
229void read_sam_dict_field(stream_view_type && stream_view, sam_tag_dictionary & target);
231template <
typename cigar_input_type>
232auto parse_binary_cigar(cigar_input_type && cigar_input, uint16_t n_cigar_op)
const;
234static std::string get_tag_dict_str(sam_tag_dictionary
const & tag_dict);
239template <
typename stream_type,
240 typename seq_legal_alph_type,
241 typename ref_seqs_type,
242 typename ref_ids_type,
243 typename stream_pos_type,
246 typename offset_type,
247 typename ref_seq_type,
248 typename ref_id_type,
249 typename ref_offset_type,
256 typename tag_dict_type,
257 typename e_value_type,
258 typename bit_score_type>
262 ref_seqs_type & ref_seqs,
264 stream_pos_type & position_buffer,
268 offset_type & offset,
269 ref_seq_type & SEQAN3_DOXYGEN_ONLY(ref_seq),
270 ref_id_type & ref_id,
271 ref_offset_type & ref_offset,
273 cigar_type & cigar_vector,
277 tag_dict_type & tag_dict,
278 e_value_type & SEQAN3_DOXYGEN_ONLY(e_value),
279 bit_score_type & SEQAN3_DOXYGEN_ONLY(bit_score))
281 static_assert(detail::decays_to_ignore_v<ref_offset_type>
282 || detail::is_type_specialisation_of_v<ref_offset_type, std::optional>,
283 "The ref_offset must be a specialisation of std::optional.");
285 static_assert(detail::decays_to_ignore_v<mapq_type> || std::same_as<mapq_type, uint8_t>,
286 "The type of field::mapq must be uint8_t.");
288 static_assert(detail::decays_to_ignore_v<flag_type> || std::same_as<flag_type, sam_flag>,
289 "The type of field::flag must be seqan3::sam_flag.");
291 auto stream_view = seqan3::detail::istreambuf(stream);
294 [[maybe_unused]] int32_t offset_tmp{};
295 [[maybe_unused]] int32_t soft_clipping_end{};
297 [[maybe_unused]] int32_t ref_length{0}, seq_length{0};
301 if (!header_was_read)
312 read_integral_byte_field(stream_view, l_text);
315 read_header(stream_view | detail::take_exactly_or_throw(l_text), header, ref_seqs);
317 read_integral_byte_field(stream_view, n_ref);
319 for (int32_t ref_idx = 0; ref_idx < n_ref; ++ref_idx)
321 read_integral_byte_field(stream_view, l_name);
323 string_buffer.
resize(l_name - 1);
326 string_buffer.
data());
329 read_integral_byte_field(stream_view, l_ref);
331 if constexpr (detail::decays_to_ignore_v<ref_seqs_type>)
336 auto & reference_ids = header.
ref_ids();
340 reference_ids.push_back(string_buffer);
342 header.
ref_dict.emplace(reference_ids.back(), reference_ids.size() - 1);
347 auto id_it = header.
ref_dict.find(string_buffer);
352 throw format_error{detail::to_string(
"Unknown reference name '" + string_buffer
353 +
"' found in BAM file header (header.ref_ids():",
357 else if (id_it->second != ref_idx)
363 " does not correspond to the position ",
365 " in the header (header.ref_ids():",
369 else if (std::get<0>(header.
ref_id_info[id_it->second]) != l_ref)
371 throw format_error{
"Provided reference has unequal length as specified in the header."};
375 header_was_read =
true;
383 position_buffer = stream.tellg();
384 alignment_record_core core;
385 std::ranges::copy(stream_view | detail::take_exactly_or_throw(
sizeof(core)),
reinterpret_cast<char *
>(&core));
387 if (core.refID >=
static_cast<int32_t
>(header.
ref_ids().size()) || core.refID < -1)
389 throw format_error{detail::to_string(
"Reference id index '",
391 "' is not in range of ",
392 "header.ref_ids(), which has size ",
396 else if (core.refID > -1)
407 if constexpr (!detail::decays_to_ignore_v<mate_type>)
409 if (core.next_refID > -1)
410 get<0>(
mate) = core.next_refID;
412 if (core.next_pos > -1)
413 get<1>(
mate) = core.next_pos;
415 get<2>(
mate) = core.tlen;
420 auto id_view = stream_view | detail::take_exactly_or_throw(core.l_read_name - 1);
421 if constexpr (!detail::decays_to_ignore_v<id_type>)
422 read_forward_range_field(id_view,
id);
424 detail::consume(id_view);
429 if constexpr (!detail::decays_to_ignore_v<align_type> || !detail::decays_to_ignore_v<cigar_type>)
431 std::tie(tmp_cigar_vector, ref_length, seq_length) = parse_binary_cigar(stream_view, core.n_cigar_op);
432 transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
437 detail::consume(stream_view | detail::take_exactly_or_throw(core.n_cigar_op * 4));
446 auto seq_stream = stream_view | detail::take_exactly_or_throw(core.l_seq / 2)
454 if constexpr (detail::decays_to_ignore_v<seq_type>)
456 auto skip_sequence_bytes = [&]()
458 detail::consume(seq_stream);
463 if constexpr (!detail::decays_to_ignore_v<align_type>)
466 "If you want to read ALIGNMENT but not SEQ, the alignment"
467 " object must store a sequence container at the second (query) position.");
469 if (!tmp_cigar_vector.empty())
471 assert(core.l_seq == (seq_length + offset_tmp + soft_clipping_end));
472 using alph_t = std::ranges::range_value_t<decltype(get<1>(align))>;
473 constexpr auto from_dna16 = detail::convert_through_char_representation<dna16sam, alph_t>;
475 get<1>(align).reserve(seq_length);
478 std::ranges::advance(tmp_iter, offset_tmp / 2);
482 get<1>(align).push_back(from_dna16[
to_rank(get<1>(*tmp_iter))]);
487 for (
size_t i = (seq_length / 2); i > 0; --i)
489 get<1>(align).push_back(from_dna16[
to_rank(get<0>(*tmp_iter))]);
490 get<1>(align).push_back(from_dna16[
to_rank(get<1>(*tmp_iter))]);
496 get<1>(align).push_back(from_dna16[
to_rank(get<0>(*tmp_iter))]);
501 std::ranges::advance(tmp_iter, (soft_clipping_end + !(seq_length & 1)) / 2);
505 skip_sequence_bytes();
511 skip_sequence_bytes();
516 using alph_t = std::ranges::range_value_t<
decltype(
seq)>;
517 constexpr auto from_dna16 = detail::convert_through_char_representation<dna16sam, alph_t>;
519 for (
auto [d1, d2] : seq_stream)
533 if constexpr (!detail::decays_to_ignore_v<align_type>)
535 assign_unaligned(get<1>(align),
537 |
views::slice(
static_cast<std::ranges::range_difference_t<seq_type>
>(offset_tmp),
538 std::ranges::distance(
seq) - soft_clipping_end));
545 auto qual_view = stream_view | detail::take_exactly_or_throw(core.l_seq)
549 return static_cast<char>(chr + 33);
551 if constexpr (!detail::decays_to_ignore_v<qual_type>)
552 read_forward_range_field(qual_view,
qual);
554 detail::consume(qual_view);
558 int32_t remaining_bytes = core.block_size - (
sizeof(alignment_record_core) - 4 )
559 - core.l_read_name - core.n_cigar_op * 4 - (core.l_seq + 1) / 2 - core.l_seq;
560 assert(remaining_bytes >= 0);
561 auto tags_view = stream_view | detail::take_exactly_or_throw(remaining_bytes);
563 while (tags_view.size() > 0)
565 if constexpr (!detail::decays_to_ignore_v<tag_dict_type>)
566 read_sam_dict_field(tags_view, tag_dict);
568 detail::consume(tags_view);
573 if constexpr (!detail::decays_to_ignore_v<align_type> || !detail::decays_to_ignore_v<cigar_type>)
578 if (core.l_seq != 0 && offset_tmp == core.l_seq)
580 if constexpr (detail::decays_to_ignore_v<tag_dict_type> | detail::decays_to_ignore_v<seq_type>)
583 detail::to_string(
"The cigar string '",
587 "N' suggests that the cigar string exceeded 65535 elements and was therefore ",
588 "stored in the optional field CG. You need to read in the field::tags and "
589 "field::seq in order to access this information.")};
593 auto it = tag_dict.find(
"CG"_tag);
595 if (it == tag_dict.end())
597 "The cigar string '",
601 "N' suggests that the cigar string exceeded 65535 elements and was therefore ",
602 "stored in the optional field CG but this tag is not present in the given ",
605 auto cigar_view = std::views::all(std::get<std::string>(it->second));
606 std::tie(tmp_cigar_vector, ref_length, seq_length) = detail::parse_cigar(cigar_view);
607 offset_tmp = soft_clipping_end = 0;
608 transfer_soft_clipping_to(tmp_cigar_vector, offset_tmp, soft_clipping_end);
611 if constexpr (!detail::decays_to_ignore_v<align_type>)
616 |
views::slice(
static_cast<std::ranges::range_difference_t<seq_type>
>(offset_tmp),
617 std::ranges::distance(
seq) - soft_clipping_end));
624 if constexpr (!detail::decays_to_ignore_v<align_type>)
625 construct_alignment(align, tmp_cigar_vector, core.refID, ref_seqs, core.pos, ref_length);
627 if constexpr (!detail::decays_to_ignore_v<cigar_type>)
628 std::swap(cigar_vector, tmp_cigar_vector);
632template <
typename stream_type,
633 typename header_type,
636 typename ref_seq_type,
637 typename ref_id_type,
642 typename tag_dict_type>
645 [[maybe_unused]] header_type && header,
646 [[maybe_unused]] seq_type && seq,
647 [[maybe_unused]] qual_type && qual,
648 [[maybe_unused]] id_type &&
id,
649 [[maybe_unused]] int32_t
const offset,
650 [[maybe_unused]] ref_seq_type && SEQAN3_DOXYGEN_ONLY(ref_seq),
651 [[maybe_unused]] ref_id_type && ref_id,
653 [[maybe_unused]] align_type && align,
654 [[maybe_unused]] cigar_type && cigar_vector,
655 [[maybe_unused]]
sam_flag const flag,
656 [[maybe_unused]] uint8_t
const mapq,
657 [[maybe_unused]] mate_type && mate,
658 [[maybe_unused]] tag_dict_type && tag_dict,
659 [[maybe_unused]]
double SEQAN3_DOXYGEN_ONLY(e_value),
660 [[maybe_unused]]
double SEQAN3_DOXYGEN_ONLY(bit_score))
666 "The seq object must be a std::ranges::forward_range over "
667 "letters that model seqan3::alphabet.");
670 "The id object must be a std::ranges::forward_range over "
671 "letters that model seqan3::alphabet.");
674 "The ref_seq object must be a std::ranges::forward_range "
675 "over letters that model seqan3::alphabet.");
677 if constexpr (!detail::decays_to_ignore_v<ref_id_type>)
679 static_assert((std::ranges::forward_range<ref_id_type> || std::integral<std::remove_reference_t<ref_id_type>>
680 || detail::is_type_specialisation_of_v<std::remove_cvref_t<ref_id_type>,
std::optional>),
681 "The ref_id object must be a std::ranges::forward_range "
682 "over letters that model seqan3::alphabet or an integral or a std::optional<integral>.");
686 "The align object must be a std::pair of two ranges whose "
687 "value_type is comparable to seqan3::gap");
689 static_assert((std::tuple_size_v<std::remove_cvref_t<align_type>> == 2
690 && std::equality_comparable_with<gap, std::ranges::range_reference_t<decltype(std::get<0>(align))>>
691 && std::equality_comparable_with<
gap, std::ranges::range_reference_t<
decltype(std::get<1>(align))>>),
692 "The align object must be a std::pair of two ranges whose "
693 "value_type is comparable to seqan3::gap");
696 "The qual object must be a std::ranges::forward_range "
697 "over letters that model seqan3::alphabet.");
700 "The mate object must be a std::tuple of size 3 with "
701 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
702 "2) a std::integral or std::optional<std::integral>, and "
703 "3) a std::integral.");
706 ((std::ranges::forward_range<decltype(std::get<0>(
mate))>
708 || detail::is_type_specialisation_of_v<
710 std::optional>)&&(std::integral<std::remove_cvref_t<decltype(std::get<1>(
mate))>>
711 || detail::is_type_specialisation_of_v<
713 std::optional>)&&std::integral<std::remove_cvref_t<decltype(std::get<2>(
mate))>>),
714 "The mate object must be a std::tuple of size 3 with "
715 "1) a std::ranges::forward_range with a value_type modelling seqan3::alphabet, "
716 "2) a std::integral or std::optional<std::integral>, and "
717 "3) a std::integral.");
720 "The tag_dict object must be of type seqan3::sam_tag_dictionary.");
722 if constexpr (detail::decays_to_ignore_v<header_type>)
724 throw format_error{
"BAM can only be written with a header but you did not provide enough information! "
725 "You can either construct the output file with ref_ids and ref_seqs information and "
726 "the header will be created for you, or you can access the `header` member directly."};
737 detail::fast_ostreambuf_iterator stream_it{*stream.rdbuf()};
742 if (!header_was_written)
746 write_header(os, options, header);
747 int32_t l_text{
static_cast<int32_t
>(os.
str().size())};
752 int32_t n_ref{
static_cast<int32_t
>(header.
ref_ids().size())};
755 for (int32_t ridx = 0; ridx < n_ref; ++ridx)
757 int32_t l_name{
static_cast<int32_t
>(header.
ref_ids()[ridx].size()) + 1};
766 header_was_written =
true;
772 int32_t ref_length{};
776 if (!std::ranges::empty(cigar_vector))
778 int32_t dummy_seq_length{};
779 for (
auto & [
count, operation] : cigar_vector)
780 detail::update_alignment_lengths(ref_length, dummy_seq_length, operation.to_char(),
count);
782 else if (!std::ranges::empty(get<0>(align)) && !std::ranges::empty(get<1>(align)))
784 ref_length = std::ranges::distance(get<1>(align));
790 int32_t off_end{
static_cast<int32_t
>(std::ranges::distance(
seq)) -
offset};
792 for (
auto chr : get<1>(align))
796 off_end -= ref_length;
797 cigar_vector = detail::get_cigar_vector(align,
offset, off_end);
800 if (cigar_vector.size() >= (1 << 16))
802 tag_dict[
"CG"_tag] = detail::get_cigar_string(cigar_vector);
803 cigar_vector.resize(2);
804 cigar_vector[0] =
cigar{
static_cast<uint32_t
>(std::ranges::distance(
seq)),
'S'_cigar_operation};
805 cigar_vector[1] =
cigar{
static_cast<uint32_t
>(std::ranges::distance(get<1>(align))),
'N'_cigar_operation};
808 std::string tag_dict_binary_str = get_tag_dict_str(tag_dict);
815 uint8_t read_name_size = std::min<uint8_t>(std::ranges::distance(
id), 254) + 1;
816 read_name_size +=
static_cast<uint8_t
>(read_name_size == 1);
818 alignment_record_core core{ 0,
824 static_cast<uint16_t
>(cigar_vector.size()),
826 static_cast<int32_t
>(std::ranges::distance(
seq)),
828 get<1>(
mate).value_or(-1),
831 auto check_and_assign_id_to = [&header]([[maybe_unused]]
auto & id_source, [[maybe_unused]]
auto & id_target)
835 if constexpr (!detail::decays_to_ignore_v<id_t>)
837 if constexpr (std::integral<id_t>)
839 id_target = id_source;
841 else if constexpr (detail::is_type_specialisation_of_v<id_t, std::optional>)
843 id_target = id_source.value_or(-1);
847 if (!std::ranges::empty(id_source))
851 if constexpr (std::ranges::contiguous_range<
decltype(id_source)>
852 && std::ranges::sized_range<
decltype(id_source)>
853 && std::ranges::borrowed_range<
decltype(id_source)>)
864 "The ref_id type is not convertible to the reference id information stored in the "
865 "reference dictionary of the header object.");
867 id_it = header.
ref_dict.find(id_source);
872 throw format_error{detail::to_string(
"Unknown reference name '",
875 "not be found in BAM header ref_dict: ",
880 id_target = id_it->second;
887 check_and_assign_id_to(
ref_id, core.refID);
890 check_and_assign_id_to(get<0>(
mate), core.next_refID);
893 core.block_size =
sizeof(core) - 4 + core.l_read_name + core.n_cigar_op * 4
895 (core.l_seq + 1) / 2 +
897 tag_dict_binary_str.
size();
901 if (std::ranges::empty(
id))
908 for (
auto [cigar_count, op] : cigar_vector)
910 cigar_count = cigar_count << 4;
911 cigar_count |=
static_cast<int32_t
>(char_to_sam_rank[op.to_char()]);
916 using alph_t = std::ranges::range_value_t<seq_type>;
917 constexpr auto to_dna16 = detail::convert_through_char_representation<alph_t, dna16sam>;
920 for (int32_t sidx = 0; sidx < ((core.l_seq & 1) ? core.l_seq - 1 : core.l_seq); ++sidx, ++sit)
925 stream_it =
static_cast<char>(compressed_chr);
929 stream_it =
static_cast<char>(
to_rank(to_dna16[
to_rank(*sit)]) << 4);
932 if (std::ranges::empty(
qual))
939 if (std::ranges::distance(
qual) != core.l_seq)
940 throw format_error{detail::to_string(
"Expected quality of same length as sequence with size ",
942 ". Got quality with size ",
943 std::ranges::distance(
qual),
950 return static_cast<char>(
to_rank(chr));
956 stream << tag_dict_binary_str;
961template <
typename stream_view_type,
typename value_type>
963 stream_view_type && stream_view,
964 value_type
const & SEQAN3_DOXYGEN_ONLY(value))
967 read_integral_byte_field(stream_view,
count);
974 if constexpr (std::integral<value_type>)
976 read_integral_byte_field(stream_view, tmp);
978 else if constexpr (std::same_as<value_type, float>)
980 read_float_byte_field(stream_view, tmp);
984 constexpr bool always_false = std::is_same_v<value_type, void>;
985 static_assert(always_false,
"format_bam::read_sam_dict_vector: unsupported value_type");
990 variant = std::move(tmp_vector);
1010template <
typename stream_view_type>
1011inline void format_bam::read_sam_dict_field(stream_view_type && stream_view, sam_tag_dictionary & target)
1020 uint16_t tag =
static_cast<uint16_t
>(*it) << 8;
1023 tag +=
static_cast<uint16_t
>(*it);
1041 read_integral_byte_field(stream_view, tmp);
1042 target[tag] =
static_cast<int32_t
>(tmp);
1048 read_integral_byte_field(stream_view, tmp);
1049 target[tag] =
static_cast<int32_t
>(tmp);
1055 read_integral_byte_field(stream_view, tmp);
1056 target[tag] =
static_cast<int32_t
>(tmp);
1062 read_integral_byte_field(stream_view, tmp);
1063 target[tag] =
static_cast<int32_t
>(tmp);
1069 read_integral_byte_field(stream_view, tmp);
1070 target[tag] = std::move(tmp);
1076 read_integral_byte_field(stream_view, tmp);
1077 target[tag] =
static_cast<int32_t
>(tmp);
1083 read_float_byte_field(stream_view, tmp);
1089 string_buffer.
clear();
1090 while (!is_char<'\0'>(*it))
1096 target[tag] = string_buffer;
1103 while (!is_char<'\0'>(*it))
1105 string_buffer.
clear();
1110 throw format_error{
"Hexadecimal tag has an uneven number of digits!"};
1114 read_byte_field(string_buffer, value);
1118 target[tag] = byte_array;
1123 char array_value_type_id = *it;
1126 switch (array_value_type_id)
1129 read_sam_dict_vector(target[tag], stream_view, int8_t{});
1132 read_sam_dict_vector(target[tag], stream_view, uint8_t{});
1135 read_sam_dict_vector(target[tag], stream_view, int16_t{});
1138 read_sam_dict_vector(target[tag], stream_view, uint16_t{});
1141 read_sam_dict_vector(target[tag], stream_view, int32_t{});
1144 read_sam_dict_vector(target[tag], stream_view, uint32_t{});
1147 read_sam_dict_vector(target[tag], stream_view,
float{});
1150 throw format_error{detail::to_string(
"The first character in the numerical id of a SAM tag ",
1151 "must be one of [cCsSiIf] but '",
1152 array_value_type_id,
1158 throw format_error{detail::to_string(
"The second character in the numerical id of a "
1159 "SAM tag must be one of [A,i,Z,H,B,f] but '",
1179template <
typename cigar_input_type>
1180inline auto format_bam::parse_binary_cigar(cigar_input_type && cigar_input, uint16_t n_cigar_op)
const
1183 char operation{
'\0'};
1185 int32_t ref_length{}, seq_length{};
1186 uint32_t operation_and_count{};
1187 constexpr char const * cigar_mapping =
"MIDNSHP=X*******";
1188 constexpr uint32_t cigar_mask = 0x0f;
1190 if (n_cigar_op == 0)
1191 return std::tuple{operations, ref_length, seq_length};
1195 while (n_cigar_op > 0)
1198 sizeof(operation_and_count),
1199 reinterpret_cast<char *
>(&operation_and_count));
1200 operation = cigar_mapping[operation_and_count & cigar_mask];
1201 count = operation_and_count >> 4;
1203 detail::update_alignment_lengths(ref_length, seq_length, operation,
count);
1208 return std::tuple{operations, ref_length, seq_length};
1214inline std::string format_bam::get_tag_dict_str(sam_tag_dictionary
const & tag_dict)
1218 auto stream_variant_fn = [&result](
auto && arg)
1223 if constexpr (std::same_as<T, int32_t>)
1226 size_t const absolute_arg = std::abs(arg);
1228 bool const negative = arg < 0;
1229 n = n * n + 2 * negative;
1235 result[result.size() - 1] =
'C';
1236 result.append(
reinterpret_cast<char const *
>(&arg), 1);
1241 result[result.size() - 1] =
'S';
1242 result.append(
reinterpret_cast<char const *
>(&arg), 2);
1247 result[result.size() - 1] =
'c';
1248 int8_t tmp =
static_cast<int8_t
>(arg);
1249 result.append(
reinterpret_cast<char const *
>(&tmp), 1);
1254 result[result.size() - 1] =
's';
1255 int16_t tmp =
static_cast<int16_t
>(arg);
1256 result.append(
reinterpret_cast<char const *
>(&tmp), 2);
1261 result.append(
reinterpret_cast<char const *
>(&arg), 4);
1266 else if constexpr (std::same_as<T, std::string>)
1268 result.append(
reinterpret_cast<char const *
>(arg.data()), arg.size() + 1 );
1270 else if constexpr (!std::ranges::range<T>)
1272 result.append(
reinterpret_cast<char const *
>(&arg),
sizeof(arg));
1276 int32_t sz{
static_cast<int32_t
>(arg.size())};
1277 result.append(
reinterpret_cast<char *
>(&sz), 4);
1278 result.append(
reinterpret_cast<char const *
>(arg.data()),
1279 arg.size() *
sizeof(std::ranges::range_value_t<T>));
1283 for (
auto & [tag, variant] : tag_dict)
1285 result.push_back(
static_cast<char>(tag / 256));
1286 result.push_back(
static_cast<char>(tag % 256));
1288 result.push_back(detail::sam_tag_type_char[variant.
index()]);
1290 if (!is_char<'\0'>(detail::sam_tag_type_char_extra[variant.
index()]))
1291 result.push_back(detail::sam_tag_type_char_extra[variant.
index()]);
constexpr derived_type & assign_char(char_type const chr) noexcept
Assign from a character, implicitly converts invalid characters.
Definition: alphabet_base.hpp:163
constexpr derived_type & assign_rank(rank_type const c) noexcept
Assign from a numeric value.
Definition: alphabet_base.hpp:187
The seqan3::cigar semialphabet pairs a counter with a seqan3::cigar::operation letter.
Definition: cigar.hpp:60
exposition_only::cigar_operation operation
The (extended) cigar operation alphabet of M,D,I,H,N,P,S,X,=.
Definition: cigar.hpp:98
A 16 letter DNA alphabet, containing all IUPAC symbols minus the gap and plus an equality sign ('=')....
Definition: dna16sam.hpp:48
The alphabet of a gap character '-'.
Definition: gap.hpp:39
The SAM tag dictionary class that stores all optional SAM fields.
Definition: sam_tag_dictionary.hpp:343
Provides seqan3::dna16sam.
T emplace_back(T... args)
Provides seqan3::detail::fast_ostreambuf_iterator.
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
sam_flag
An enum flag that describes the properties of an aligned read (given as a SAM record).
Definition: sam_flag.hpp:76
@ flag
The alignment flag (bit information), uint16_t value.
@ ref_offset
Sequence (seqan3::field::ref_seq) relative start position (0-based), unsigned value.
@ mapq
The mapping quality of the seqan3::field::seq alignment, usually a Phred-scaled score.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ mate
The mate pair information given as a std::tuple of reference name, offset and template length.
@ ref_id
The identifier of the (reference) sequence that seqan3::field::seq was aligned to.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:470
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: traits.hpp:164
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:146
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:178
constexpr auto repeat_n
A view factory that repeats a given value n times.
Definition: repeat_n.hpp:91
The generic alphabet concept that covers most data types used in ranges.
Checks whether from can be implicityly converted to to.
A more refined container concept than seqan3::container.
Whether a type behaves like a tuple.
Auxiliary functions for the alignment IO.
Provides seqan3::detail::istreambuf.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides seqan3::debug_stream and related types.
Provides helper data structures for the seqan3::sam_file_output.
Provides the seqan3::sam_tag_dictionary class and auxiliaries.
Provides seqan3::views::slice.
The options type defines various option members that influence the behavior of all or some formats.
Definition: output_options.hpp:26
Provides seqan3::views::take_exactly and seqan3::views::take_exactly_or_throw.