SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
detail.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <sstream>
16 
24 #include <seqan3/std/algorithm>
25 #include <seqan3/std/charconv>
26 #include <seqan3/std/concepts>
27 #include <seqan3/std/ranges>
28 
29 #include <range/v3/view/zip.hpp>
30 
31 namespace seqan3::detail
32 {
34 struct view_equality_fn
35 {
37  template <std::ranges::forward_range rng1_type, std::ranges::forward_range rng2_type>
38  constexpr bool operator()(rng1_type && rng1, rng2_type && rng2) const
39  {
40  return std::ranges::equal(rng1, rng2);
41  }
42 };
43 
83 template <typename reference_char_type, typename query_char_type>
84 [[nodiscard]] constexpr cigar_op map_aligned_values_to_cigar_op(reference_char_type const reference_char,
85  query_char_type const query_char,
86  bool const extended_cigar)
88  requires seqan3::detail::weakly_equality_comparable_with<reference_char_type, gap> &&
89  seqan3::detail::weakly_equality_comparable_with<query_char_type, gap>
91 {
92  constexpr std::array<char, 6> operators{'M', 'D', 'I', 'P', 'X', '='}; // contains the possible cigar operators.
93  uint8_t key = (static_cast<uint8_t>(reference_char == gap{}) << 1) | static_cast<uint8_t>(query_char == gap{});
94  if (extended_cigar && (key == 0)) // in extended format refine the substitution operator to match/mismatch.
95  key |= ((1 << 2) | static_cast<uint8_t>(query_char == reference_char)); // maps to [4, 5].
96 
97  return assign_char_to(operators[key], cigar_op{});
98 }
99 
137 template <tuple_like alignment_type>
138 [[nodiscard]] inline std::vector<cigar> get_cigar_vector(alignment_type && alignment,
139  uint32_t const query_start_pos = 0,
140  uint32_t const query_end_pos = 0,
141  bool const extended_cigar = false)
143  requires std::tuple_size_v<remove_cvref_t<alignment_type>> == 2
145 {
146  using std::get;
147 
148  auto & ref_seq = get<0>(alignment);
149  auto & query_seq = get<1>(alignment);
150 
151  if (ref_seq.size() != query_seq.size())
152  throw std::logic_error{"The aligned sequences must have the same length."};
153 
154  std::vector<cigar> result{};
155 
156  if (!ref_seq.size())
157  return result; // return empty string if sequences are empty
158 
159  // Add (S)oft-clipping at the start of the read
160  if (query_start_pos)
161  result.emplace_back(query_start_pos, 'S'_cigar_op);
162 
163  // Create cigar string from alignment
164  // -------------------------------------------------------------------------
165  // initialize first operation and count value:
166  cigar_op operation{map_aligned_values_to_cigar_op(ref_seq[0], query_seq[0], extended_cigar)};
167  uint32_t count{0};
168 
169  // go through alignment columns
170  for (auto column : views::zip(ref_seq, query_seq))
171  {
172  cigar_op next_op = map_aligned_values_to_cigar_op(std::get<0>(column), std::get<1>(column), extended_cigar);
173 
174  if (operation == next_op)
175  {
176  ++count;
177  }
178  else
179  {
180  result.emplace_back(count, operation);
181  operation = next_op;
182  count = 1;
183  }
184  }
185 
186  // append last cigar element
187  result.emplace_back(count, operation);
188 
189  // Add (S)oft-clipping at the end of the read
190  if (query_end_pos)
191  result.emplace_back(query_end_pos, 'S'_cigar_op);
192 
193  return result;
194 }
195 
205 [[nodiscard]] inline std::string get_cigar_string(std::vector<cigar> const & cigar_vector)
206 {
207  std::string result{};
208  std::ranges::for_each(cigar_vector, [&result] (auto & cig) { result.append(cig.to_string()); });
209  return result;
210 }
211 
246 template <tuple_like alignment_type>
247 [[nodiscard]] inline std::string get_cigar_string(alignment_type && alignment,
248  uint32_t const query_start_pos = 0,
249  uint32_t const query_end_pos = 0,
250  bool const extended_cigar = false)
252  requires std::tuple_size_v<remove_cvref_t<alignment_type>> == 2
254 {
255  return get_cigar_string(get_cigar_vector(alignment, query_start_pos, query_end_pos, extended_cigar));
256 }
257 
294 template <std::ranges::forward_range ref_seq_type, std::ranges::forward_range query_seq_type>
295 [[nodiscard]] inline std::string get_cigar_string(ref_seq_type && ref_seq,
296  query_seq_type && query_seq,
297  uint32_t const query_start_pos = 0,
298  uint32_t const query_end_pos = 0,
299  bool const extended_cigar = false)
301  requires std::detail::weakly_equality_comparable_with<gap, reference_t<ref_seq_type>> &&
302  std::detail::weakly_equality_comparable_with<gap, reference_t<query_seq_type>>
304 {
305  return get_cigar_string(std::tie(ref_seq, query_seq), query_start_pos, query_end_pos, extended_cigar);
306 }
307 
331 template <tuple_like alignment_type>
333  requires std::tuple_size_v<remove_cvref_t<alignment_type>> == 2 &&
334  detail::all_satisfy_aligned_seq<detail::tuple_type_list_t<alignment_type>>
336 inline void alignment_from_cigar(alignment_type & alignment, std::vector<cigar> const & cigar_vector)
337 {
338  using std::get;
339  auto current_ref_pos = std::ranges::begin(get<0>(alignment));
340  auto current_read_pos = std::ranges::begin(get<1>(alignment));
341 
342  for (auto [cigar_count, cigar_operation] : cigar_vector)
343  {
344  // ignore since alignment shall contain sliced sequences
345  if (('S'_cigar_op == cigar_operation) || ('H'_cigar_op == cigar_operation))
346  continue;
347 
348  assert(('M'_cigar_op == cigar_operation) || ('='_cigar_op == cigar_operation) ||
349  ('X'_cigar_op == cigar_operation) || ('D'_cigar_op == cigar_operation) ||
350  ('N'_cigar_op == cigar_operation) || ('I'_cigar_op == cigar_operation) ||
351  ('P'_cigar_op == cigar_operation)); // checked during IO
352 
353  if (('M'_cigar_op == cigar_operation) || ('='_cigar_op == cigar_operation) || ('X'_cigar_op == cigar_operation))
354  {
355  std::ranges::advance(current_ref_pos , cigar_count);
356  std::ranges::advance(current_read_pos, cigar_count);
357  }
358  else if (('D'_cigar_op == cigar_operation) || ('N'_cigar_op == cigar_operation)) // insert gaps into read
359  {
360  assert(std::distance(current_read_pos, std::ranges::end(get<1>(alignment))) >= 0);
361  current_read_pos = insert_gap(get<1>(alignment), current_read_pos, cigar_count);
362  ++current_read_pos;
363  std::ranges::advance(current_ref_pos , cigar_count);
364  }
365  else if (('I'_cigar_op == cigar_operation)) // Insert gaps into ref
366  {
367  assert(std::ranges::distance(current_ref_pos, std::ranges::end(get<0>(alignment))) >= 0);
368  current_ref_pos = insert_gap(get<0>(alignment), current_ref_pos, cigar_count);
369  ++current_ref_pos;
370  std::ranges::advance(current_read_pos, cigar_count);
371  }
372  else if (('P'_cigar_op == cigar_operation)) // skip padding
373  {
374  current_ref_pos = insert_gap(get<0>(alignment), current_ref_pos, cigar_count);
375  ++current_ref_pos;
376 
377  current_read_pos = insert_gap(get<1>(alignment), current_read_pos, cigar_count);
378  ++current_read_pos;
379  }
380  }
381 }
382 
384 struct access_restrictor_fn
385 {
387  template <typename chr_t>
388  [[noreturn]] chr_t operator()(chr_t) const
389  {
390  throw std::logic_error{"Access is not allowed because there is no sequence information."};
391  }
392 };
393 
394 } // namespace seqan3::detail
sstream
seqan3::remove_cvref_t
std::remove_cv_t< std::remove_reference_t< t > > remove_cvref_t
Return the input type with const, volatile and references removed (type trait).
Definition: basic.hpp:35
zip.hpp
Provides seqan3::views::zip.
single_pass_input.hpp
Provides seqan3::single_pass_input_view.
take_until.hpp
Provides seqan3::views::take_until and seqan3::views::take_until_or_throw.
std::string
tuple.hpp
Provides seqan3::tuple_like.
seqan3::reference_t
typename reference< t >::type reference_t
Shortcut for seqan3::reference (transformation_trait shortcut).
Definition: pre.hpp:77
charconv
Provides std::from_chars and std::to_chars if not defined in the stl <charconv> header.
seqan3::insert_gap
aligned_seq_t::iterator insert_gap(aligned_seq_t &aligned_seq, typename aligned_seq_t::const_iterator pos_it)
An implementation of seqan3::aligned_sequence::insert_gap for sequence containers.
Definition: aligned_sequence_concept.hpp:248
std::vector
std::distance
T distance(T... args)
seqan3::field::ref_seq
The (reference) "sequence" information, usually a range of nucleotides or amino acids.
algorithm
Adaptations of algorithms from the Ranges TS.
concepts
The Concepts library.
std::tie
T tie(T... args)
seqan3::views::get
const auto get
A view calling std::get on each element in a range.
Definition: get.hpp:65
std::array
std::logic_error
aligned_sequence_concept.hpp
Includes the aligned_sequence and the related insert_gap and erase_gap functions to enable stl contai...
ranges
Adaptations of concepts from the Ranges TS.
std::ranges::begin
T begin(T... args)
std
SeqAn specific customisations in the standard namespace.
cigar.hpp
Provides the seqan3::cigar alphabet.
seqan3::assign_char_to
constexpr auto assign_char_to
Assign a character to an alphabet object.
Definition: concept.hpp:416
to_string.hpp
Auxiliary for pretty printing of exception messages.
seqan3::pack_traits::count
constexpr ptrdiff_t count
Count the occurrences of a type in a pack.
Definition: traits.hpp:134
seqan3::field::alignment
The (pairwise) alignment stored in an seqan3::alignment object.