SeqAn3  3.0.1
The Modern C++ library for sequence analysis.
debug_matrix.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <iomanip>
16 
25 
26 namespace seqan3::detail
27 {
28 
60 template <matrix matrix_t, typename first_sequence_t = std::nullopt_t, typename second_sequence_t = std::nullopt_t>
61 class debug_matrix
62 {
63 protected:
65  static constexpr bool has_first_sequence = !std::is_same_v<std::decay_t<first_sequence_t>, std::nullopt_t>;
67  static constexpr bool has_second_sequence = !std::is_same_v<std::decay_t<second_sequence_t>, std::nullopt_t>;
69  using entry_t = typename std::remove_reference_t<matrix_t>::value_type;
71  static constexpr bool is_traceback_matrix = std::is_same_v<std::decay_t<entry_t>, trace_directions>;
74  static constexpr bool is_optional_score = is_type_specialisation_of_v<entry_t, std::optional>;
75 public:
76 
80  using value_type = std::conditional_t<is_traceback_matrix || is_optional_score,
82  entry_t,
85  using reference = value_type;
87  using const_reference = reference;
89  using size_type = typename std::remove_reference_t<matrix_t>::size_type;
91 
95  debug_matrix() = default;
96  debug_matrix(debug_matrix const &) = default;
97  debug_matrix(debug_matrix &&) = default;
98  debug_matrix & operator=(debug_matrix const &) = default;
99  debug_matrix & operator=(debug_matrix &&) = default;
100  ~debug_matrix() = default;
101 
105  debug_matrix(matrix_t matrix)
106  : debug_matrix(std::forward<matrix_t>(matrix), std::nullopt, std::nullopt)
107  {}
108 
114  debug_matrix(matrix_t matrix, first_sequence_t first_sequence, second_sequence_t second_sequence)
115  : _matrix{std::forward<matrix_t>(matrix)},
116  _first_sequence{std::forward<first_sequence_t>(first_sequence)},
117  _second_sequence{std::forward<second_sequence_t>(second_sequence)}
118  {
119  if constexpr(has_first_sequence)
120  {
121  assert(_matrix.cols() <= _first_sequence.size() + 1u);
122  }
123 
124  if constexpr(has_second_sequence)
125  {
126  assert(_matrix.rows() <= _second_sequence.size() + 1u);
127  }
128  }
130 
132  size_t rows() const noexcept
133  {
134  if (!_transpose)
135  return _rows.value_or(_matrix.rows());
136  else
137  return _cols.value_or(_matrix.cols());
138  }
139 
141  size_t cols() const noexcept
142  {
143  if (!_transpose)
144  return _cols.value_or(_matrix.cols());
145  else
146  return _rows.value_or(_matrix.rows());
147  }
148 
150  first_sequence_t const & first_sequence() const noexcept
151  {
152  if (!_transpose)
153  return _first_sequence;
154  else
155  return _second_sequence;
156  }
157 
159  second_sequence_t const & second_sequence() const noexcept
160  {
161  if (!_transpose)
162  return _second_sequence;
163  else
164  return _first_sequence;
165  }
166 
168  const_reference at(matrix_coordinate const & coordinate) const noexcept
169  {
170  size_t row = coordinate.row;
171  size_t col = coordinate.col;
172 
173  assert(row < rows() && col < cols());
174 
175  row_index_type const _row{!_transpose ? row : col};
176  column_index_type const _col{!_transpose ? col : row};
177  row_index_type const _mask_row{_transpose == _transpose_mask ? row : col};
178  column_index_type const _mask_col{_transpose == _transpose_mask ? col : row};
179 
180  if (!_masking_matrix.has_value() || _masking_matrix.value().at({_mask_row, _mask_col}))
181  {
182  entry_t const & entry = _matrix.at({_row, _col});
183 
184  if (!is_traceback_matrix || !_transpose)
185  return entry;
186 
187  if constexpr(is_traceback_matrix)
188  {
189  trace_directions reverse{};
190  if ((entry & trace_directions::left) == trace_directions::left)
191  reverse |= trace_directions::up;
192  if ((entry & trace_directions::up) == trace_directions::up)
193  reverse |= trace_directions::left;
194  if ((entry & trace_directions::diagonal) == trace_directions::diagonal)
195  reverse |= trace_directions::diagonal;
196  return reverse;
197  }
198  }
199 
200  if constexpr(is_traceback_matrix)
201  return trace_directions::none;
202  else
203  return std::nullopt;
204  }
205 
212  debug_matrix & mask_matrix(row_wise_matrix<bool> masking_matrix) noexcept
213  {
214  assert(masking_matrix.rows() == rows());
215  assert(masking_matrix.cols() == cols());
216  _transpose_mask = _transpose;
217  _masking_matrix = std::move(masking_matrix);
218  return *this;
219  }
220 
225  debug_matrix & mask_matrix(std::vector<bool> masking_vector) noexcept
226  {
227  return mask_matrix(row_wise_matrix<bool>{number_rows{rows()},
228  number_cols{cols()},
229  std::move(masking_vector)});
230  }
231 
237  debug_matrix & sub_matrix(size_t const new_rows, size_t const new_cols) noexcept
238  {
239  assert(new_rows <= rows());
240  assert(new_cols <= cols());
241  if (!_transpose)
242  {
243  _rows = new_rows;
244  _cols = new_cols;
245  }
246  else
247  {
248  _rows = new_cols;
249  _cols = new_rows;
250  }
251  return *this;
252  }
253 
257  debug_matrix & transpose_matrix() noexcept
258  {
259  _transpose = !_transpose;
260  return *this;
261  }
262 
263 protected:
265  struct format_type; // forward declaration
267 
268 public:
278  template <typename ostream_t>
279  void stream_matrix(ostream_t & cout, fmtflags2 const flags) const noexcept
280  {
281  format_type const & symbols = (flags & fmtflags2::utf8) == fmtflags2::utf8 ? unicode : csv;
282  size_t const column_width = this->column_width.has_value() ?
283  this->column_width.value() : auto_column_width(flags);
284 
285  auto char_first_sequence = [&]([[maybe_unused]] size_t const i) -> std::string
286  {
287  if constexpr(!has_first_sequence)
288  return " ";
289  else
290  return as_string(first_sequence()[i], flags);
291  };
292 
293  auto char_second_sequence = [&]([[maybe_unused]] size_t const i) -> std::string
294  {
295  if constexpr(!has_second_sequence)
296  return " ";
297  else
298  return as_string(second_sequence()[i], flags);
299  };
300 
301  auto print_cell = [&](std::string const & symbol)
302  {
303  // deal with unicode chars that mess up std::setw
304  size_t const length_bytes = symbol.size();
305  size_t const length = unicode_str_length(symbol);
306  size_t const offset = length_bytes - length;
307 
308  cout << std::left
309  << std::setw(column_width + offset)
310  << symbol
311  << symbols.col_sep;
312  };
313 
314  auto print_first_cell = [&](std::string const & symbol)
315  {
316  cout << symbol << symbols.col_sep;
317  };
318 
319  // |_|d|a|t|a|b|a|s|e|
320  auto print_first_row = [&]
321  {
322  print_first_cell(" ");
323  print_cell(symbols.epsilon);
324 
325  for (size_t col = 0; col < cols() - 1; ++col)
326  print_cell(char_first_sequence(col));
327 
328  cout << "\n";
329  };
330 
331  // |-|-|-|-|-|-|-|-|-|
332  auto print_divider = [&]
333  {
334  cout << " " << symbols.row_col_sep;
335  for (size_t col = 0; col < cols(); ++col)
336  {
337  for (size_t i = 0; i < column_width; ++i)
338  cout << symbols.row_sep;
339 
340  cout << symbols.row_col_sep;
341  }
342  cout << "\n";
343  };
344 
345  print_first_row();
346  for (size_t row = 0; row < rows(); ++row)
347  {
348  if (symbols.row_sep[0] != '\0')
349  print_divider();
350 
351  // one query letter + one row of scores / traces
352  if (row == 0)
353  print_first_cell(symbols.epsilon);
354  else
355  print_first_cell(char_second_sequence(row - 1));
356 
357  for (size_t col = 0; col < cols(); ++col)
358  print_cell(entry_at({row_index_type{row}, column_index_type{col}}, flags));
359 
360  cout << "\n";
361  }
362  }
363 
365  size_t auto_column_width(fmtflags2 const flags) const noexcept
366  {
367  size_t col_width = 1;
368  for (size_t row = 0; row < rows(); ++row)
369  for (size_t col = 0; col < cols(); ++col)
370  col_width = std::max(col_width,
371  unicode_str_length(entry_at({row_index_type{row}, column_index_type{col}}, flags)));
372 
373  return col_width;
374  }
375 
376 protected:
378  std::string entry_at(matrix_coordinate const coordinate, fmtflags2 flags) const noexcept
379  {
380  format_type const & symbols = (flags & fmtflags2::utf8) == fmtflags2::utf8 ? unicode : csv;
381 
382  value_type const & entry = at(coordinate);
383  if (!is_traceback_matrix && entry == matrix_inf<value_type>)
384  return symbols.inf;
385 
386  return as_string(entry, flags);
387  }
388 
390  template <typename value_type>
391  static std::string as_string(value_type && entry, fmtflags2 const flags) noexcept
392  {
393  std::stringstream strstream;
394  debug_stream_type stream{strstream};
395  stream << flags << entry;
396  return strstream.str();
397  }
398 
401  static size_t unicode_str_length(std::string const & str) noexcept
402  {
403  size_t length = 0u;
404  for (auto it = str.cbegin(); it < str.cend(); ++it, ++length)
405  {
406  uint8_t v = *it;
407  if ((v & 0b11100000) == 0b11000000)
408  ++it;
409  else if ((v & 0b11110000) == 0b11100000)
410  it += 2;
411  else if ((v & 0b11111000) == 0b11110000)
412  it += 3;
413  }
414  return length;
415  }
416 
418  struct format_type
419  {
421  char const * epsilon{};
423  char const * col_sep{};
425  char const * row_sep{};
427  char const * row_col_sep{};
429  char const * inf{};
430  };
431 
433  static constexpr format_type csv{" ", ";", "", "", ""};
435  static constexpr format_type unicode{"ε", "║", "═", "╬", "∞"};
436 
437 public:
439  std::optional<size_t> column_width{std::nullopt};
440 
441 protected:
443  matrix_t _matrix;
445  first_sequence_t _first_sequence;
447  second_sequence_t _second_sequence;
449  std::optional<size_t> _rows{};
451  std::optional<size_t> _cols{};
453  std::optional<row_wise_matrix<bool>> _masking_matrix{};
455  bool _transpose{};
457  bool _transpose_mask{};
458 };
459 
464 template <matrix matrix_t>
466 debug_matrix(matrix_t &&)
467  -> debug_matrix<matrix_t>;
468 
471 template <matrix matrix_t, typename first_sequence_t, typename second_sequence_t>
472 debug_matrix(matrix_t &&, first_sequence_t &&, second_sequence_t &&)
473  -> debug_matrix<matrix_t, first_sequence_t, second_sequence_t>;
475 
476 } // namespace seqan3::detail
477 
478 namespace seqan3
479 {
490 template <detail::matrix alignment_matrix_t, typename char_t>
491 inline debug_stream_type<char_t> & operator<<(debug_stream_type<char_t> & s, alignment_matrix_t && matrix)
492 {
493  detail::debug_matrix debug{std::forward<alignment_matrix_t>(matrix)};
494 
495  std::stringstream sstream{};
496  debug.stream_matrix(sstream, s.flags2());
497  s << sstream.str();
498  return s;
499 }
500 
502 template <std::ranges::input_range alignment_matrix_t, typename char_t>
504  requires detail::debug_stream_range_guard<alignment_matrix_t> && detail::matrix<alignment_matrix_t>
506 inline debug_stream_type<char_t> & operator<<(debug_stream_type<char_t> & s, alignment_matrix_t && matrix)
507 {
508  return s << detail::debug_matrix{std::forward<alignment_matrix_t>(matrix)};
509 }
510 
511 } // namespace seqan3
debug_stream_optional.hpp
Provides seqan3::debug_stream and related types.
seqan3::utf8
Enables use of non-ASCII UTF8 characters in formatted output.
Definition: debug_stream_type.hpp:31
matrix_concept.hpp
Provides seqan3::detail::matrix.
std::string
seqan3::field::offset
Sequence (SEQ) relative start position (0-based), unsigned value.
std::vector
std::stringstream
seqan3::debug_stream_type::flags2
fmtflags2 flags2() const
Retrieve the format flags from the stream.
Definition: debug_stream_type.hpp:198
seqan3::views::move
const auto move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:68
template_inspection.hpp
Provides seqan3::type_list and auxiliary type traits.
std::reverse
T reverse(T... args)
debug_stream_type.hpp
Provides seqan3::debug_stream and related types.
trace_directions.hpp
Provides the declaration of seqan3::detail::trace_directions.
seqan3::debug_stream_type
A "pretty printer" for most SeqAn data structures and related types.
Definition: debug_stream_type.hpp:70
std::forward
T forward(T... args)
seqan3
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:36
debug_stream_alphabet.hpp
Provides seqan3::debug_stream and related types.
debug_stream_range.hpp
Provides seqan3::debug_stream and related types.
seqan3::operator<<
debug_stream_type< char_t > & operator<<(debug_stream_type< char_t > &stream, tuple_t const &alignment)
Stream operator for alignments, which are represented as tuples of aligned sequences.
Definition: aligned_sequence_concept.hpp:559
std::remove_reference_t
std::nullopt_t
std::left
T left(T... args)
iomanip
std
SeqAn specific customisations in the standard namespace.
seqan3::fmtflags2
fmtflags2
Flags that change the behaviour of the seqan3::debug_stream.
Definition: debug_stream_type.hpp:28
std::optional
std::stringstream::str
T str(T... args)
std::setw
T setw(T... args)
std::conditional_t
std::max
T max(T... args)
seqan3::pack_traits::at
typename decltype(detail::at< idx, pack_t... >())::type at
Return the type at given index from the type pack.
Definition: traits.hpp:221
row_wise_matrix.hpp
Provides seqan3::detail::row_wise_matrix.