SeqAn3 3.1.0
The Modern C++ library for sequence analysis.
debug_matrix.hpp
Go to the documentation of this file.
1// -----------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6// -----------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <iomanip>
16
25
26namespace seqan3::detail
27{
28
60template <matrix matrix_t, typename first_sequence_t = std::nullopt_t, typename second_sequence_t = std::nullopt_t>
61class debug_matrix
62{
63protected:
65 static constexpr bool has_first_sequence = !std::is_same_v<std::decay_t<first_sequence_t>, std::nullopt_t>;
67 static constexpr bool has_second_sequence = !std::is_same_v<std::decay_t<second_sequence_t>, std::nullopt_t>;
69 using entry_t = typename std::remove_reference_t<matrix_t>::value_type;
71 static constexpr bool is_traceback_matrix = std::is_same_v<std::decay_t<entry_t>, trace_directions>;
74 static constexpr bool is_optional_score = is_type_specialisation_of_v<entry_t, std::optional>;
75public:
76
81 using value_type = std::conditional_t<is_traceback_matrix || is_optional_score,
82 entry_t,
85 using reference = value_type;
87 using const_reference = reference;
89 using size_type = typename std::remove_reference_t<matrix_t>::size_type;
91
95 debug_matrix() = default;
96 debug_matrix(debug_matrix const &) = default;
97 debug_matrix(debug_matrix &&) = default;
98 debug_matrix & operator=(debug_matrix const &) = default;
99 debug_matrix & operator=(debug_matrix &&) = default;
100 ~debug_matrix() = default;
101
105 debug_matrix(matrix_t matrix)
106 : debug_matrix(std::forward<matrix_t>(matrix), std::nullopt, std::nullopt)
107 {}
108
114 debug_matrix(matrix_t matrix, first_sequence_t first_sequence, second_sequence_t second_sequence)
115 : _matrix{std::forward<matrix_t>(matrix)},
116 _first_sequence{std::forward<first_sequence_t>(first_sequence)},
117 _second_sequence{std::forward<second_sequence_t>(second_sequence)}
118 {
119 if constexpr(has_first_sequence)
120 {
121 assert(_matrix.cols() <= _first_sequence.size() + 1u);
122 }
123
124 if constexpr(has_second_sequence)
125 {
126 assert(_matrix.rows() <= _second_sequence.size() + 1u);
127 }
128 }
130
132 size_t rows() const noexcept
133 {
134 if (!_transpose)
135 return _rows.value_or(_matrix.rows());
136 else
137 return _cols.value_or(_matrix.cols());
138 }
139
141 size_t cols() const noexcept
142 {
143 if (!_transpose)
144 return _cols.value_or(_matrix.cols());
145 else
146 return _rows.value_or(_matrix.rows());
147 }
148
150 first_sequence_t const & first_sequence() const noexcept
151 {
152 if (!_transpose)
153 return _first_sequence;
154 else
155 return _second_sequence;
156 }
157
159 second_sequence_t const & second_sequence() const noexcept
160 {
161 if (!_transpose)
162 return _second_sequence;
163 else
164 return _first_sequence;
165 }
166
168 const_reference at(matrix_coordinate const & coordinate) const noexcept
169 {
170 size_t row = coordinate.row;
171 size_t col = coordinate.col;
172
173 assert(row < rows() && col < cols());
174
175 row_index_type const _row{!_transpose ? row : col};
176 column_index_type const _col{!_transpose ? col : row};
177 row_index_type const _mask_row{_transpose == _transpose_mask ? row : col};
178 column_index_type const _mask_col{_transpose == _transpose_mask ? col : row};
179
180 if (!_masking_matrix.has_value() || _masking_matrix.value().at({_mask_row, _mask_col}))
181 {
182 entry_t const & entry = _matrix.at({_row, _col});
183
184 if (!is_traceback_matrix || !_transpose)
185 return entry;
186
187 if constexpr(is_traceback_matrix)
188 {
189 trace_directions reverse{};
190 if ((entry & trace_directions::left) == trace_directions::left)
191 reverse |= trace_directions::up;
192 if ((entry & trace_directions::up) == trace_directions::up)
193 reverse |= trace_directions::left;
194 if ((entry & trace_directions::diagonal) == trace_directions::diagonal)
195 reverse |= trace_directions::diagonal;
196 return reverse;
197 }
198 }
199
200 if constexpr(is_traceback_matrix)
201 return trace_directions::none;
202 else
203 return std::nullopt;
204 }
205
212 debug_matrix & mask_matrix(row_wise_matrix<bool> masking_matrix) noexcept
213 {
214 assert(masking_matrix.rows() == rows());
215 assert(masking_matrix.cols() == cols());
216 _transpose_mask = _transpose;
217 _masking_matrix = std::move(masking_matrix);
218 return *this;
219 }
220
225 debug_matrix & mask_matrix(std::vector<bool> masking_vector) noexcept
226 {
227 return mask_matrix(row_wise_matrix<bool>{number_rows{rows()},
228 number_cols{cols()},
229 std::move(masking_vector)});
230 }
231
237 debug_matrix & sub_matrix(size_t const new_rows, size_t const new_cols) noexcept
238 {
239 assert(new_rows <= rows());
240 assert(new_cols <= cols());
241 if (!_transpose)
242 {
243 _rows = new_rows;
244 _cols = new_cols;
245 }
246 else
247 {
248 _rows = new_cols;
249 _cols = new_rows;
250 }
251 return *this;
252 }
253
257 debug_matrix & transpose_matrix() noexcept
258 {
259 _transpose = !_transpose;
260 return *this;
261 }
262
263protected:
265 struct format_type; // forward declaration
267
268public:
278 template <typename ostream_t>
279 void stream_matrix(ostream_t & cout, fmtflags2 const flags) const noexcept
280 {
281 format_type const & symbols = (flags & fmtflags2::utf8) == fmtflags2::utf8 ? unicode : csv;
282 size_t const column_width = this->column_width.has_value() ?
283 this->column_width.value() : auto_column_width(flags);
284
285 auto char_first_sequence = [&]([[maybe_unused]] size_t const i) -> std::string
286 {
287 if constexpr(!has_first_sequence)
288 return " ";
289 else
290 return as_string(first_sequence()[i], flags);
291 };
292
293 auto char_second_sequence = [&]([[maybe_unused]] size_t const i) -> std::string
294 {
295 if constexpr(!has_second_sequence)
296 return " ";
297 else
298 return as_string(second_sequence()[i], flags);
299 };
300
301 auto print_cell = [&](std::string const & symbol)
302 {
303 // deal with unicode chars that mess up std::setw
304 size_t const length_bytes = symbol.size();
305 size_t const length = unicode_str_length(symbol);
306 size_t const offset = length_bytes - length;
307
308 cout << std::left
309 << std::setw(column_width + offset)
310 << symbol
311 << symbols.col_sep;
312 };
313
314 auto print_first_cell = [&](std::string const & symbol)
315 {
316 cout << symbol << symbols.col_sep;
317 };
318
319 // |_|d|a|t|a|b|a|s|e|
320 auto print_first_row = [&]
321 {
322 print_first_cell(" ");
323 print_cell(symbols.epsilon);
324
325 for (size_t col = 0; col < cols() - 1; ++col)
326 print_cell(char_first_sequence(col));
327
328 cout << "\n";
329 };
330
331 // |-|-|-|-|-|-|-|-|-|
332 auto print_divider = [&]
333 {
334 cout << " " << symbols.row_col_sep;
335 for (size_t col = 0; col < cols(); ++col)
336 {
337 for (size_t i = 0; i < column_width; ++i)
338 cout << symbols.row_sep;
339
340 cout << symbols.row_col_sep;
341 }
342 cout << "\n";
343 };
344
345 print_first_row();
346 for (size_t row = 0; row < rows(); ++row)
347 {
348 if (symbols.row_sep[0] != '\0')
349 print_divider();
350
351 // one query letter + one row of scores / traces
352 if (row == 0)
353 print_first_cell(symbols.epsilon);
354 else
355 print_first_cell(char_second_sequence(row - 1));
356
357 for (size_t col = 0; col < cols(); ++col)
358 print_cell(entry_at({row_index_type{row}, column_index_type{col}}, flags));
359
360 cout << "\n";
361 }
362 }
363
365 size_t auto_column_width(fmtflags2 const flags) const noexcept
366 {
367 size_t col_width = 1;
368 for (size_t row = 0; row < rows(); ++row)
369 for (size_t col = 0; col < cols(); ++col)
370 col_width = std::max(col_width,
371 unicode_str_length(entry_at({row_index_type{row}, column_index_type{col}}, flags)));
372
373 return col_width;
374 }
375
376protected:
378 std::string entry_at(matrix_coordinate const coordinate, fmtflags2 flags) const noexcept
379 {
380 format_type const & symbols = (flags & fmtflags2::utf8) == fmtflags2::utf8 ? unicode : csv;
381
382 value_type const & entry = at(coordinate);
383 if (!is_traceback_matrix && entry == matrix_inf<value_type>)
384 return symbols.inf;
385
386 return as_string(entry, flags);
387 }
388
390 template <typename value_type>
391 static std::string as_string(value_type && entry, fmtflags2 const flags) noexcept
392 {
393 std::stringstream strstream;
394 debug_stream_type stream{strstream};
395 stream << flags << entry;
396 return strstream.str();
397 }
398
401 static size_t unicode_str_length(std::string const & str) noexcept
402 {
403 size_t length = 0u;
404 for (auto it = str.cbegin(); it < str.cend(); ++it, ++length)
405 {
406 uint8_t v = *it;
407 if ((v & 0b11100000) == 0b11000000)
408 ++it;
409 else if ((v & 0b11110000) == 0b11100000)
410 it += 2;
411 else if ((v & 0b11111000) == 0b11110000)
412 it += 3;
413 }
414 return length;
415 }
416
418 struct format_type
419 {
421 char const * epsilon{};
423 char const * col_sep{};
425 char const * row_sep{};
427 char const * row_col_sep{};
429 char const * inf{};
430 };
431
433 static constexpr format_type csv{" ", ";", "", "", ""};
435 static constexpr format_type unicode{"ε", "║", "═", "╬", "∞"};
436
437public:
439 std::optional<size_t> column_width{std::nullopt};
440
441protected:
443 matrix_t _matrix;
445 first_sequence_t _first_sequence;
447 second_sequence_t _second_sequence;
449 std::optional<size_t> _rows{};
451 std::optional<size_t> _cols{};
453 std::optional<row_wise_matrix<bool>> _masking_matrix{};
455 bool _transpose{};
457 bool _transpose_mask{};
458};
459
465template <matrix matrix_t>
466debug_matrix(matrix_t &&)
467 -> debug_matrix<matrix_t>;
468
471template <matrix matrix_t, typename first_sequence_t, typename second_sequence_t>
472debug_matrix(matrix_t &&, first_sequence_t &&, second_sequence_t &&)
473 -> debug_matrix<matrix_t, first_sequence_t, second_sequence_t>;
475
476} // namespace seqan3::detail
477
478namespace seqan3
479{
490template <typename char_t, detail::matrix alignment_matrix_t>
491inline debug_stream_type<char_t> & operator<<(debug_stream_type<char_t> & s, alignment_matrix_t && matrix)
492{
493 detail::debug_matrix debug{std::forward<alignment_matrix_t>(matrix)};
494
495 std::stringstream sstream{};
496 debug.stream_matrix(sstream, s.flags2());
497 s << sstream.str();
498 return s;
499}
500
502template <typename char_t, std::ranges::input_range alignment_matrix_t>
504 requires detail::debug_stream_range_guard<alignment_matrix_t> && detail::matrix<alignment_matrix_t>
506inline debug_stream_type<char_t> & operator<<(debug_stream_type<char_t> & s, alignment_matrix_t && matrix)
507{
508 return s << detail::debug_matrix{std::forward<alignment_matrix_t>(matrix)};
509}
510
511} // namespace seqan3
Provides seqan3::debug_stream and related types.
Provides seqan3::debug_stream and related types.
T forward(T... args)
debug_stream_type< char_t > & operator<<(debug_stream_type< char_t > &stream, alignment_t &&alignment)
Stream operator for alignments, which are represented as tuples of aligned sequences.
Definition: debug_stream_alignment.hpp:101
fmtflags2
Flags that change the behaviour of the seqan3::debug_stream.
Definition: debug_stream_type.hpp:31
@ utf8
Enables use of non-ASCII UTF8 characters in formatted output.
Definition: debug_stream_type.hpp:33
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
typename decltype(detail::at< idx >(list_t{}))::type at
Return the type at given index from the type list.
Definition: traits.hpp:260
T left(T... args)
Provides seqan3::detail::matrix.
T max(T... args)
The main SeqAn3 namespace.
Definition: cigar_operation_table.hpp:2
SeqAn specific customisations in the standard namespace.
Provides seqan3::debug_stream and related types.
Provides seqan3::debug_stream and related types.
T reverse(T... args)
Provides seqan3::detail::row_wise_matrix.
T setw(T... args)
T str(T... args)
Provides type traits for working with templates.
Provides the declaration of seqan3::detail::trace_directions.