A class for writing alignment files, e.g. SAM, BAL, BLAST, ... More...
#include <seqan3/io/sam_file/output.hpp>
Public Types | |
using | field_ids = fields< field::seq, field::id, field::offset, field::ref_id, field::ref_offset, field::alignment, field::cigar, field::mapq, field::flag, field::qual, field::mate, field::tags, field::header_ptr > |
The subset of seqan3::field IDs that are valid for this file. | |
Range associated types | |
Most of the range associated types are | |
using | value_type = void |
The value type (void). | |
using | reference = void |
The reference type (void). | |
using | const_reference = void |
The const reference type (void). | |
using | size_type = void |
The size type (void). | |
using | difference_type = std::ptrdiff_t |
A signed integer type, usually std::ptrdiff_t. | |
using | iterator = detail::out_file_iterator< sam_file_output > |
The iterator type of this view (an output iterator). | |
using | const_iterator = void |
The const iterator type is void, because files are not const-iterable. | |
using | sentinel = std::default_sentinel_t |
The type returned by end(). | |
Public Member Functions | |
auto & | header () |
Access the file's header. More... | |
Constructors, destructor and assignment | |
sam_file_output ()=delete | |
Default constructor is explicitly deleted, you need to give a stream or file name. | |
sam_file_output (sam_file_output const &)=delete | |
Copy construction is explicitly deleted, because you can't have multiple access to the same file. | |
sam_file_output & | operator= (sam_file_output const &)=delete |
Copy assignment is explicitly deleted, because you can't have multiple access to the same file. | |
sam_file_output (sam_file_output &&)=default | |
Move construction is defaulted. | |
sam_file_output & | operator= (sam_file_output &&)=default |
Move assignment is defaulted. | |
~sam_file_output ()=default | |
Destructor is defaulted. | |
sam_file_output (std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{}) | |
Construct from filename. More... | |
template<output_stream stream_type, sam_file_output_format file_format> | |
sam_file_output (stream_type &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{}) | |
Construct from an existing stream and with specified format. More... | |
template<output_stream stream_type, sam_file_output_format file_format> | |
sam_file_output (stream_type &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{}) | |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. | |
template<typename ref_ids_type_ , std::ranges::forward_range ref_lengths_type> | |
sam_file_output (std::filesystem::path const &filename, ref_ids_type_ &&ref_ids, ref_lengths_type &&ref_lengths, selected_field_ids const &fields_tag=selected_field_ids{}) | |
Construct from filename. More... | |
template<output_stream stream_type, sam_file_output_format file_format, typename ref_ids_type_ , std::ranges::forward_range ref_lengths_type> | |
sam_file_output (stream_type &&stream, ref_ids_type_ &&ref_ids, ref_lengths_type &&ref_lengths, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{}) | |
Construct from an existing stream and with specified format. More... | |
Public Attributes | |
sam_file_output_options | options |
The options are public and its members can be set directly. | |
Static Public Attributes | |
static constexpr bool | is_default_selected_field_ids = selected_field_ids::size == field_ids::size |
brief Does selected_field_ids contain all fields like in the default case? | |
Related Functions | |
(Note that these are not member functions.) | |
Type deduction guides | |
template<detail::fields_specialisation selected_field_ids> | |
sam_file_output (std::filesystem::path, selected_field_ids const &) -> sam_file_output< selected_field_ids, typename sam_file_output<>::valid_formats, ref_info_not_given > | |
Deduces selected_field_ids from input and sets sam_file_output::ref_ids_type to seqan3::detail::ref_info_not_given. valid_formats is set to the default. | |
template<output_stream stream_type, sam_file_output_format file_format, detail::fields_specialisation selected_field_ids> | |
sam_file_output (stream_type &&, file_format const &, selected_field_ids const &) -> sam_file_output< selected_field_ids, type_list< file_format >, ref_info_not_given > | |
Deduces selected_field_ids, and the valid format from input and sets sam_file_output::ref_ids_type to seqan3::detail::ref_info_not_given. | |
template<output_stream stream_type, sam_file_output_format file_format, detail::fields_specialisation selected_field_ids> | |
sam_file_output (stream_type &, file_format const &, selected_field_ids const &) -> sam_file_output< selected_field_ids, type_list< file_format >, ref_info_not_given > | |
Deduces selected_field_ids, and the valid format from input and sets sam_file_output::ref_ids_type to seqan3::detail::ref_info_not_given. | |
template<output_stream stream_type, sam_file_output_format file_format> | |
sam_file_output (stream_type &&, file_format const &) -> sam_file_output< typename sam_file_output<>::selected_field_ids, type_list< file_format >, ref_info_not_given > | |
Deduces the valid format from input and sets sam_file_output::ref_ids_type to seqan3::detail::ref_info_not_given. selected_field_ids is set to the default. | |
template<output_stream stream_type, sam_file_output_format file_format> | |
sam_file_output (stream_type &, file_format const &) -> sam_file_output< typename sam_file_output<>::selected_field_ids, type_list< file_format >, ref_info_not_given > | |
Deduces the valid format from input and sets sam_file_output::ref_ids_type to seqan3::detail::ref_info_not_given. selected_field_ids is set to the default. | |
template<detail::fields_specialisation selected_field_ids, std::ranges::forward_range ref_ids_type, std::ranges::forward_range ref_lengths_type> | |
sam_file_output (std::filesystem::path const &, ref_ids_type &&, ref_lengths_type &&, selected_field_ids const &) -> sam_file_output< selected_field_ids, typename sam_file_output<>::valid_formats, std::remove_reference_t< ref_ids_type >> | |
Deduces selected_field_ids and ref_ids_type from input. valid_formats is set to the default. | |
template<std::ranges::forward_range ref_ids_type, std::ranges::forward_range ref_lengths_type> | |
sam_file_output (std::filesystem::path const &, ref_ids_type &&, ref_lengths_type &&) -> sam_file_output< typename sam_file_output<>::selected_field_ids, typename sam_file_output<>::valid_formats, std::remove_reference_t< ref_ids_type >> | |
Deduces ref_ids_type from input. Valid formats, and selected_field_ids are set to the default. | |
template<output_stream stream_type, std::ranges::forward_range ref_ids_type, std::ranges::forward_range ref_lengths_type, sam_file_output_format file_format, detail::fields_specialisation selected_field_ids> | |
sam_file_output (stream_type &&, ref_ids_type &&, ref_lengths_type &&, file_format const &, selected_field_ids const &) -> sam_file_output< selected_field_ids, type_list< file_format >, std::remove_reference_t< ref_ids_type >> | |
Deduces selected_field_ids, the valid format, and the ref_ids_type from input. | |
template<output_stream stream_type, std::ranges::forward_range ref_ids_type, std::ranges::forward_range ref_lengths_type, sam_file_output_format file_format, detail::fields_specialisation selected_field_ids> | |
sam_file_output (stream_type &, ref_ids_type &&, ref_lengths_type &&, file_format const &, selected_field_ids const &) -> sam_file_output< selected_field_ids, type_list< file_format >, std::remove_reference_t< ref_ids_type >> | |
Deduces selected_field_ids, the valid format, and the ref_ids_type from input. | |
template<output_stream stream_type, std::ranges::forward_range ref_ids_type, std::ranges::forward_range ref_lengths_type, sam_file_output_format file_format> | |
sam_file_output (stream_type &&, ref_ids_type &&, ref_lengths_type &&, file_format const &) -> sam_file_output< typename sam_file_output<>::selected_field_ids, type_list< file_format >, std::remove_reference_t< ref_ids_type >> | |
Deduces the valid format, and the ref_ids_type from input. selected_field_ids set to the default. | |
template<output_stream stream_type, std::ranges::forward_range ref_ids_type, std::ranges::forward_range ref_lengths_type, sam_file_output_format file_format> | |
sam_file_output (stream_type &, ref_ids_type &&, ref_lengths_type &&, file_format const &) -> sam_file_output< typename sam_file_output<>::selected_field_ids, type_list< file_format >, std::remove_reference_t< ref_ids_type >> | |
Deduces the valid format, and the ref_ids_type from input. selected_field_ids set to the default. | |
Template arguments | |
using | selected_field_ids = selected_field_ids_ |
A seqan3::fields list with the fields selected for the record. | |
using | valid_formats = valid_formats_ |
A seqan3::type_list with the possible formats. | |
using | stream_char_type = char |
Character type of the stream(s). | |
Range interface | |
iterator | begin () noexcept |
Returns an iterator to current position in the file. More... | |
sentinel | end () noexcept |
Returns a sentinel for comparison with iterator. More... | |
template<typename record_t > | |
void | push_back (record_t &&r) |
Write a seqan3::record to the file. More... | |
template<typename tuple_t > | |
void | push_back (tuple_t &&t) |
Write a record in form of a std::tuple to the file. More... | |
template<typename arg_t , typename ... arg_types> | |
void | emplace_back (arg_t &&arg, arg_types &&... args) requires(!is_default_selected_field_ids) |
Write a record to the file by passing individual fields. More... | |
template<typename rng_t > | |
sam_file_output & | operator= (rng_t &&range) |
Write a range of records (or tuples) to the file. More... | |
template<typename rng_t > | |
sam_file_output & | operator| (rng_t &&range, sam_file_output &f) |
Write a range of records (or tuples) to the file. More... | |
template<typename rng_t > | |
sam_file_output | operator| (rng_t &&range, sam_file_output &&f) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. | |
A class for writing alignment files, e.g. SAM, BAL, BLAST, ...
selected_field_ids | A seqan3::fields type with the list and order of fields IDs; only relevant if these can't be deduced. |
valid_formats | A seqan3::type_list of the selectable formats (each must model seqan3::sam_file_output_format). |
Alignment files are primarily used to store pairwise alignments of two biological sequences and often come with many additional information. Well-known formats include the SAM/BAM format used to store read mapping data or the BLAST format that stores the results of a query search against a data base.
The SAM file abstraction supports reading 12 different fields:
There exists one more field for SAM files, the seqan3::field::header_ptr, but this field is mostly used internally. Please see the seqan3::sam_file_output::header member function for details on how to access the seqan3::sam_file_header of the file.
All of these fields are retrieved by default (and in that order). Note that some of the fields are specific to the SAM format (e.g. seqan3::field::flag) while others are specific to BLAST format (e.g. seqan3::field::bit_score). Please see the corresponding formats for more details.
The member functions take any and either of these fields.
This class comes with two constructors, one for construction from a file name and one for construction from an existing stream and a known format. The first one automatically picks the format based on the extension of the file name. The second can be used if you have a non-file stream, like std::cout or std::ostringstream, that you want to read from and/or if you cannot use file-extension based detection, but know that your output file has a certain format.
In most cases the template parameters are deduced completely automatically:
Writing to std::cout:
Note that this is not the same as writing sam_file_output<>
(with angle brackets). In the latter case they are explicitly set to their default values, in the former case automatic deduction happens which chooses different parameters depending on the constructor arguments. For opening from file, sam_file_output<>
would have also worked, but for opening from stream it would not have.
The easiest way to write to an alignment file is to use the push_back() member functions. These work similarly to how they work on an std::vector. You may also use a tuple like interface or the emplace_back() function but this is not recommended since one would have to keep track of the correct order of many fields (14 in total). For the record based interface using push_back please also see the seqan3::record documentation on how to specify a record with the correct field and type lists.
You may also use the output file's iterator for writing, however, this rarely provides an advantage.
If you want to omit non-required parameter or change the order of the parameters, you can pass a non-empty fields trait object to the seqan3::sam_file_output constructor to select the fields that are used for interpreting the arguments.
The following snippets demonstrates the usage of such a field_traits object.
A different way of passing custom fields to the file is to pass a seqan3::record – instead of a tuple – to push_back(). The seqan3::record clearly indicates which of its elements has which seqan3::field so the file will use that information instead of the template argument. This is especially handy when reading from one file and writing to another, because you don't have to configure the output file to match the input file, it will just work:
This will copy the FLAG and REF_OFFSET value into the new output file. Note that the other SAM columns in the output file will have a default value, so unless you specify to read all SAM columns (see seqan3::format_sam) the output file will not be equal to the input file.
You can write multiple records at once, by assigning to the file:
Record-wise writing in batches also works for writing from input files directly to output files, because input files are also input ranges in SeqAn:
This can be combined with file-based views to create I/O pipelines:
We currently support writing the following formats:
|
no-apiinline |
Construct from filename.
[in] | filename | Path to the file you wish to open. |
[in] | fields_tag | A seqan3::fields tag. [optional] |
In addition to the file name, you may specify a custom seqan3::fields type which may be easier than defining all the template parameters.
This constructor transparently applies a compression stream on top of the file stream in case the given file extension suggests the user wants this. See the section on compression and decompression for more information.
In most cases the template parameters are deduced completely automatically:
Writing with custom selected fields:
|
no-apiinline |
Construct from an existing stream and with specified format.
stream_type | The type of stream to write to; must model seqan3::output_stream. |
file_format | The format of the file in the stream, must satisfy seqan3::sam_file_output_format. |
[out] | stream | The stream to write to, must be derived of std::basic_ostream<stream_char_t>. |
[in] | format_tag | The file format tag. |
[in] | fields_tag | A seqan3::fields tag. [optional] |
This constructor does not apply compression transparently (because there is no way to know if the user wants this). However, you can just pass e.g. seqan3::contrib::gz_ostream to this constructor if you explicitly want compression. See the section on compression and decompression for more information.
|
no-apiinline |
Construct from filename.
ref_ids_type_ | The type of range over reference ids; must model std::forward_range. |
ref_lengths_type | The type of range over reference lengths; must model std::forward_range. |
[in] | filename | Path to the file you wish to open. |
[in] | ref_ids | A range over reference ids. |
[in] | ref_lengths | A range over lengths of reference sequences (same order as ref_ids). |
[in] | fields_tag | A seqan3::fields tag. [optional] |
In addition to the file name, you may specify a custom seqan3::fields type which may be easier than defining all the template parameters.
This constructor transparently applies a compression stream on top of the file stream in case the given file extension suggests the user wants this. See the section on compression and decompression for more information.
In most cases the template parameters are deduced completely automatically:
Writing with custom selected fields:
|
no-apiinline |
Construct from an existing stream and with specified format.
stream_type | The type of stream to write to; must model seqan3::output_stream. |
file_format | The format of the file in the stream, must model seqan3::sam_file_output_format. |
ref_ids_type_ | The type of range over reference ids; must model std::forward_range. |
ref_lengths_type | The type of range over reference lengths; must model std::forward_range. |
[in] | stream | The stream to operate on (this must be std::move'd in!). |
[in] | ref_ids | A range over reference ids. |
[in] | ref_lengths | A range over lengths of reference sequences (same order as ref_ids). |
[in] | format_tag | The file format tag. |
[in] | fields_tag | A seqan3::fields tag. [optional] |
This constructor does not apply compression transparently (because there is no way to know if the user wants this). However, you can just pass e.g. seqan3::contrib::gz_ostream to this constructor if you explicitly want compression. See the section on compression and decompression for more information.
|
no-apiinlinenoexcept |
Returns an iterator to current position in the file.
You can write to the file by assigning to the iterator, but using push_back() is usually more intuitive.
Constant.
No-throw guarantee.
|
no-apiinline |
Write a record to the file by passing individual fields.
arg_t | Type of the first field. |
arg_types | Types of further fields. |
[in] | arg | The first field to write. |
[in] | args | Further fields. |
The fields are assumed to correspond to the field IDs given in selected_field_ids, however passing less is accepted if the format does not require all of them.
Constant.
Basic exception safety.
|
no-apiinlinenoexcept |
Returns a sentinel for comparison with iterator.
This element acts as a placeholder; attempting to dereference it results in undefined behaviour. It always compares false against an iterator.
Constant.
No-throw guarantee.
|
no-apiinline |
Access the file's header.
|
no-apiinline |
Write a range of records (or tuples) to the file.
rng_t | Type of the range, must satisfy std::ranges::output_range and have a reference type that satisfies seqan3::tuple_like. |
[in] | range | The range to write. |
This function simply iterates over the argument and calls push_back() on each element.
Linear in the number of records.
Basic exception safety.
|
no-apiinline |
Write a seqan3::record to the file.
record_t | Type of the record, a specialisation of seqan3::record. |
[in] | r | The record to write. |
Constant.
Basic exception safety.
|
no-apiinline |
Write a record in form of a std::tuple to the file.
tuple_t | Type of the record, a specialisation of std::tuple. |
[in] | t | The record to write. |
The fields in the tuple are assumed to correspond to the field IDs given in selected_field_ids, however passing less is accepted if the format does not require all of them.
Constant.
Basic exception safety.
|
no-apifriend |
Write a range of records (or tuples) to the file.
rng_t | Type of the range, must satisfy std::ranges::input_range and have a reference type that satisfies seqan3::tuple_like. |
[in] | range | The range to write. |
[in] | f | The file being written to. |
This operator enables sam_file_output to be at the end of a piping operation. It just calls operator=() internally.
Linear in the number of records.
Basic exception safety.
This is especially useful in combination with file-based filters: