SeqAn3 3.4.0-rc.1
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
io/structure_file/output.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <cassert>
13#include <filesystem>
14#include <fstream>
15#include <optional>
16#include <ranges>
17#include <string>
18#include <type_traits>
19#include <variant>
20#include <vector>
21
24#include <seqan3/io/detail/record.hpp>
27#include <seqan3/io/record.hpp>
37
38namespace seqan3
39{
40
41// ----------------------------------------------------------------------------
42// structure_file_output
43// ----------------------------------------------------------------------------
44
57template <detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::structure>,
58 detail::type_list_of_structure_file_output_formats valid_formats_ = type_list<format_vienna>>
60{
61public:
67 using selected_field_ids = selected_field_ids_;
69 using valid_formats = valid_formats_;
71 using stream_char_type = char;
73
85
86 static_assert(
87 []() constexpr
88 {
89 for (field f : selected_field_ids::as_array)
90 if (!field_ids::contains(f))
91 return false;
92 return true;
93 }(),
94 "You selected a field that is not valid for structure files, please refer to the documentation "
95 "of structure_file_output::field_ids for the accepted values.");
96
97 static_assert(
98 []() constexpr
99 {
100 return !(selected_field_ids::contains(field::structured_seq)
101 && (selected_field_ids::contains(field::seq) || (selected_field_ids::contains(field::structure))));
102 }(),
103 "You may not select field::structured_seq and either of field::seq and field::structure "
104 "at the same time.");
105
112 using value_type = void;
114 using reference = void;
116 using const_reference = void;
118 using size_type = void;
122 using iterator = detail::out_file_iterator<structure_file_output>;
124 using const_iterator = void;
126 using sentinel = std::default_sentinel_t;
128
144
161 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
162 primary_stream{new std::ofstream{}, stream_deleter_default}
163 {
164 primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
165 static_cast<std::basic_ofstream<char> *>(primary_stream.get())
166 ->open(filename, std::ios_base::out | std::ios::binary);
167
168 if (!primary_stream->good())
169 throw file_open_error{"Could not open file " + filename.string() + " for writing."};
170
171 // possibly add intermediate compression stream
172 secondary_stream = detail::make_secondary_ostream(*primary_stream, filename);
173
174 // initialise format handler or throw if format is not found
175 detail::set_format(format, filename);
176 }
177
194 template <output_stream stream_t, structure_file_output_format file_format>
195 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, char>
196 structure_file_output(stream_t & stream,
197 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
198 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
199 primary_stream{&stream, stream_deleter_noop},
200 secondary_stream{&stream, stream_deleter_noop},
201 format{detail::structure_file_output_format_exposer<file_format>{}}
202 {
203 static_assert(list_traits::contains<file_format, valid_formats>,
204 "You selected a format that is not in the valid_formats of this file.");
205 }
206
208 template <output_stream stream_t, structure_file_output_format file_format>
209 requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, char>
210 structure_file_output(stream_t && stream,
211 file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
212 selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
213 primary_stream{new stream_t{std::move(stream)}, stream_deleter_default},
214 secondary_stream{&*primary_stream, stream_deleter_noop},
215 format{detail::structure_file_output_format_exposer<file_format>{}}
216 {
217 static_assert(list_traits::contains<file_format, valid_formats>,
218 "You selected a format that is not in the valid_formats of this file.");
219 }
221
243 iterator begin() noexcept
244 {
245 return {*this};
246 }
247
262 sentinel end() noexcept
263 {
264 return {};
265 }
266
285 template <typename record_t>
286 void push_back(record_t && r)
287 requires detail::record_like<record_t>
288 {
289 write_record(detail::get_or_ignore<field::seq>(r),
290 detail::get_or_ignore<field::id>(r),
291 detail::get_or_ignore<field::bpp>(r),
292 detail::get_or_ignore<field::structure>(r),
293 detail::get_or_ignore<field::structured_seq>(r),
294 detail::get_or_ignore<field::energy>(r),
295 detail::get_or_ignore<field::react>(r),
296 detail::get_or_ignore<field::react_err>(r),
297 detail::get_or_ignore<field::comment>(r),
298 detail::get_or_ignore<field::offset>(r));
299 }
300
322 template <typename tuple_t>
323 void push_back(tuple_t && t)
324 requires tuple_like<tuple_t> && (!detail::record_like<tuple_t>)
325 {
326 // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
327 write_record(detail::get_or_ignore<selected_field_ids::index_of(field::seq)>(t),
328 detail::get_or_ignore<selected_field_ids::index_of(field::id)>(t),
329 detail::get_or_ignore<selected_field_ids::index_of(field::bpp)>(t),
330 detail::get_or_ignore<selected_field_ids::index_of(field::structure)>(t),
331 detail::get_or_ignore<selected_field_ids::index_of(field::structured_seq)>(t),
332 detail::get_or_ignore<selected_field_ids::index_of(field::energy)>(t),
333 detail::get_or_ignore<selected_field_ids::index_of(field::react)>(t),
334 detail::get_or_ignore<selected_field_ids::index_of(field::react_err)>(t),
335 detail::get_or_ignore<selected_field_ids::index_of(field::comment)>(t),
336 detail::get_or_ignore<selected_field_ids::index_of(field::offset)>(t));
337 }
338
362 template <typename arg_t, typename... arg_types>
363 void emplace_back(arg_t && arg, arg_types &&... args)
364 {
365 push_back(std::tie(arg, args...));
366 }
367
389 template <std::ranges::input_range rng_t>
392 {
393 for (auto && record : range)
394 push_back(std::forward<decltype(record)>(record));
395 return *this;
396 }
397
425 template <std::ranges::input_range rng_t>
428 {
429 f = range;
430 return f;
431 }
432
434 template <std::ranges::input_range rng_t>
437 {
438 f = range;
439 return std::move(f);
440 }
442
445
450 {
451 return *secondary_stream;
452 }
454
455protected:
458 std::vector<char> stream_buffer{std::vector<char>(1'000'000)};
459
467 static void stream_deleter_noop(std::basic_ostream<stream_char_type> *)
468 {}
470 static void stream_deleter_default(std::basic_ostream<stream_char_type> * ptr)
471 {
472 delete ptr;
473 }
474
476 stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
478 stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
479
481 using format_type =
482 typename detail::variant_from_tags<valid_formats, detail::structure_file_output_format_exposer>::type;
484 format_type format;
486
488 template <typename seq_type,
489 typename id_type,
490 typename bpp_type,
491 typename structure_type,
492 typename structured_seq_type,
493 typename energy_type,
494 typename react_type,
495 typename comment_type,
496 typename offset_type>
497 void write_record(seq_type && seq,
498 id_type && id,
499 bpp_type && bpp,
500 structure_type && structure,
501 structured_seq_type && structured_seq,
502 energy_type && energy,
503 react_type && react,
504 react_type && react_error,
505 comment_type && comment,
506 offset_type && offset)
507 {
508 static_assert(detail::decays_to_ignore_v<structured_seq_type>
509 || (detail::decays_to_ignore_v<seq_type> && detail::decays_to_ignore_v<structure_type>),
510 "You may not select field::structured_seq and either of field::seq and field::structure "
511 "at the same time.");
512
513 assert(!format.valueless_by_exception());
515 [&](auto & f)
516 {
517 if constexpr (!detail::decays_to_ignore_v<structured_seq_type>)
518 {
519 f.write_structure_record(*secondary_stream,
520 options,
521 structured_seq | views::elements<0>,
522 id,
523 bpp,
524 structured_seq | views::elements<1>,
525 energy,
526 react,
527 react_error,
528 comment,
529 offset);
530 }
531 else
532 {
533 f.write_structure_record(*secondary_stream,
534 options,
535 seq,
536 id,
537 bpp,
538 structure,
539 energy,
540 react,
541 react_error,
542 comment,
543 offset);
544 }
545 },
546 format);
547 }
548
550 friend iterator;
551};
552
559template <output_stream stream_t,
560 structure_file_output_format file_format,
561 detail::fields_specialisation selected_field_ids>
562structure_file_output(stream_t &&, file_format const &, selected_field_ids const &)
564
566template <output_stream stream_t,
568 detail::fields_specialisation selected_field_ids>
569structure_file_output(stream_t &, file_format const &, selected_field_ids const &)
572
573} // namespace seqan3
The generic concept for structure file out formats.
Definition structure_file/output_format_concept.hpp:138
A class for writing structured sequence files, e.g. Stockholm, Connect, Vienna, ViennaRNA bpp matrix ...
Definition io/structure_file/output.hpp:60
std::default_sentinel_t sentinel
The type returned by end().
Definition io/structure_file/output.hpp:126
void push_back(tuple_t &&t)
Write a record in form of a std::tuple to the file.
Definition io/structure_file/output.hpp:323
friend structure_file_output & operator|(rng_t &&range, structure_file_output &f)
Write a range of records (or tuples) to the file.
Definition io/structure_file/output.hpp:426
void const_reference
The const reference type (void).
Definition io/structure_file/output.hpp:116
friend structure_file_output operator|(rng_t &&range, structure_file_output &&f)
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition io/structure_file/output.hpp:435
detail::out_file_iterator< structure_file_output > iterator
The iterator type of this view (an output iterator).
Definition io/structure_file/output.hpp:122
char stream_char_type
Character type of the stream(s).
Definition io/structure_file/output.hpp:71
structure_file_output & operator=(structure_file_output &&)=default
Move assignment is defaulted.
structure_file_output_options options
The options are public and its members can be set directly.
Definition io/structure_file/output.hpp:444
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition io/structure_file/output.hpp:67
void push_back(record_t &&r)
Write a seqan3::record to the file.
Definition io/structure_file/output.hpp:286
structure_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
structure_file_output(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition io/structure_file/output.hpp:160
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition io/structure_file/output.hpp:363
structure_file_output & operator=(rng_t &&range)
Write a range of records (or tuples) to the file.
Definition io/structure_file/output.hpp:390
void size_type
The size type (void).
Definition io/structure_file/output.hpp:118
structure_file_output(stream_t &&, file_format const &, selected_field_ids const &) -> structure_file_output< selected_field_ids, type_list< file_format > >
Deduction of the selected fields, the file format and the stream type.
void reference
The reference type (void).
Definition io/structure_file/output.hpp:114
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition io/structure_file/output.hpp:243
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition io/structure_file/output.hpp:262
structure_file_output(stream_t &, file_format const &, selected_field_ids const &) -> structure_file_output< selected_field_ids, type_list< file_format > >
This is an overloaded member function, provided for convenience. It differs from the above function o...
structure_file_output & operator=(structure_file_output const &)=delete
Copy assignment is explicitly deleted, because you can't have multiple access to the same file.
structure_file_output(structure_file_output &&)=default
Move construction is defaulted.
structure_file_output(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition io/structure_file/output.hpp:210
void value_type
The value type (void).
Definition io/structure_file/output.hpp:112
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition io/structure_file/output.hpp:69
structure_file_output(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition io/structure_file/output.hpp:196
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition io/structure_file/output.hpp:124
~structure_file_output()=default
Destructor is defaulted.
structure_file_output(structure_file_output const &)=delete
Copy construction is explicitly deleted, because you can't have multiple access to the same file.
T data(T... args)
Provides seqan3::views::elements.
Provides the seqan3::format_vienna.
T format(T... args)
T forward(T... args)
T get(T... args)
field
An enumerator for the fields used in file formats.
Definition record.hpp:60
@ energy
Energy of a folded sequence, represented by one float number.
@ comment
Comment field of arbitrary content, usually a string.
@ structure
Fixed interactions, usually a string of structure alphabet characters.
@ bpp
Base pair probability matrix of interactions, usually a matrix of float numbers.
@ react
Reactivity values of the sequence characters given in a vector of float numbers.
@ react_err
Reactivity error values given in a vector corresponding to seqan3::field::react.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
@ structured_seq
Sequence and fixed interactions combined in one range.
@ id
The identifier, usually a string.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
Whether a type behaves like a tuple.
Provides exceptions used in the I/O module.
Stream concepts.
Provides various utility functions required only for output.
The main SeqAn3 namespace.
Definition aligned_sequence_concept.hpp:26
SeqAn specific customisations in the standard namespace.
Provides the seqan3::detail::out_file_iterator class template.
Provides the seqan3::record template and the seqan3::field enum.
Provides seqan3::detail::record_like.
T size(T... args)
A class template that holds a choice of seqan3::field.
Definition record.hpp:125
Thrown if there is an unspecified filesystem or stream error while opening, e.g. permission problem.
Definition io/exception.hpp:36
The class template that file records are based on; behaves like a std::tuple.
Definition record.hpp:190
The options type defines various option members that influence the behaviour of all or some formats.
Definition structure_file/output_options.hpp:23
Provides seqan3::structure_file_output_format and auxiliary classes.
Provides seqan3::structure_file_output_options.
T tie(T... args)
Provides traits for seqan3::type_list.
Provides seqan3::tuple_like.
Provides seqan3::views::convert.
T visit(T... args)
Provides seqan3::views::zip.
Hide me