|
template<typename stream_type , typename seq_legal_alph_type , typename ref_seqs_type , typename ref_ids_type , typename stream_pos_type , typename seq_type , typename id_type , typename ref_seq_type , typename ref_id_type , typename ref_offset_type , typename cigar_type , typename flag_type , typename mapq_type , typename qual_type , typename mate_type , typename tag_dict_type , typename e_value_type , typename bit_score_type > |
void | read_alignment_record (stream_type &stream, sam_file_input_options< seq_legal_alph_type > const &options, ref_seqs_type &ref_seqs, sam_file_header< ref_ids_type > &header, stream_pos_type &position_buffer, seq_type &seq, qual_type &qual, id_type &id, ref_seq_type &ref_seq, ref_id_type &ref_id, ref_offset_type &ref_offset, cigar_type &cigar_vector, flag_type &flag, mapq_type &mapq, mate_type &mate, tag_dict_type &tag_dict, e_value_type &e_value, bit_score_type &bit_score) |
| Read from the specified stream and back-insert into the given field buffers.
|
|
template<typename stream_type , typename seq_legal_alph_type , typename stream_pos_type , typename seq_type , typename id_type , typename qual_type > |
void | read_sequence_record (stream_type &stream, sequence_file_input_options< seq_legal_alph_type > const &options, stream_pos_type &position_buffer, seq_type &sequence, id_type &id, qual_type &qualities) |
| Read from the specified stream and back-insert into the given field buffers.
|
|
template<typename stream_type , typename header_type , typename seq_type , typename id_type , typename ref_seq_type , typename ref_id_type , typename qual_type , typename mate_type , typename tag_dict_type , typename e_value_type , typename bit_score_type > |
void | write_alignment_record (stream_type &stream, sam_file_output_options const &options, header_type &&header, seq_type &&seq, qual_type &&qual, id_type &&id, ref_seq_type &&ref_seq, ref_id_type &&ref_id, std::optional< int32_t > ref_offset, std::vector< cigar > const &cigar_vector, sam_flag const flag, uint8_t const mapq, mate_type &&mate, tag_dict_type &&tag_dict, e_value_type &&e_value, bit_score_type &&bit_score) |
| Write the given fields to the specified stream.
|
|
template<typename stream_type , typename seq_type , typename id_type , typename qual_type > |
void | write_sequence_record (stream_type &stream, sequence_file_output_options const &options, seq_type &&sequence, id_type &&id, qual_type &&qualities) |
| Write the given fields to the specified stream.
|
|
The SAM format (tag).
Introduction
SAM is often used for storing alignments of several read sequences against one or more reference sequences. See the article on wikipedia for an introduction of the format or look into the official SAM format specifications. SeqAn implements version 1.6 of the SAM specification.
Take a look at our tutorial SAM Input and Output in SeqAn for a walk through of how to read SAM/BAM files.
fields_specialisation
The SAM format provides the following fields: seqan3::field::seq, seqan3::field::qual, seqan3::field::id, seqan3::field::ref_seq, seqan3::field::ref_id seqan3::field::ref_offset, seqan3::field::offset, seqan3::field::flag, seqan3::field::mapq and seqan3::field::mate. In addition there is the seqan3::field::header_ptr, which is usually only used internally to provide the range-based functionality of the file.
None of the fields are required when writing. If they are not given, a default value of '0' for numeric fields and '*' for other fields is used.
SAM format columns -> fields
Since many users will be accustomed to the columns of the SAM format, here is a mapping of the common SAM format columns to the SeqAn record fields:
Format Check
The format checks are implemented according to the official SAM format specifications in order to ensure correct SAM file output.
If a non-recoverable format violation is encountered on reading, or you specify invalid values/combinations when writing, seqan3::format_error is thrown.
Header implementation
The SAM header (if present) is read/written once in the beginning before the first record is read/written.
template<typename stream_type , typename seq_legal_alph_type , typename ref_seqs_type , typename ref_ids_type , typename stream_pos_type , typename seq_type , typename id_type , typename ref_seq_type , typename ref_id_type , typename ref_offset_type , typename cigar_type , typename flag_type , typename mapq_type , typename qual_type , typename mate_type , typename tag_dict_type , typename e_value_type , typename bit_score_type >
void seqan3::format_sam::read_alignment_record |
( |
stream_type & |
stream, |
|
|
sam_file_input_options< seq_legal_alph_type > const & |
options, |
|
|
ref_seqs_type & |
ref_seqs, |
|
|
sam_file_header< ref_ids_type > & |
header, |
|
|
stream_pos_type & |
position_buffer, |
|
|
seq_type & |
seq, |
|
|
qual_type & |
qual, |
|
|
id_type & |
id, |
|
|
ref_seq_type & |
ref_seq, |
|
|
ref_id_type & |
ref_id, |
|
|
ref_offset_type & |
ref_offset, |
|
|
cigar_type & |
cigar_vector, |
|
|
flag_type & |
flag, |
|
|
mapq_type & |
mapq, |
|
|
mate_type & |
mate, |
|
|
tag_dict_type & |
tag_dict, |
|
|
e_value_type & |
e_value, |
|
|
bit_score_type & |
bit_score |
|
) |
| |
|
inlineprotected |
Read from the specified stream and back-insert into the given field buffers.
- Template Parameters
-
- Parameters
-
Additional requirements
- The function must also accept std::ignore as parameter for any of the fields, except stream, options and header. [This is enforced by the concept checker!]
- In this case the data read for that field shall be discarded by the format.
template<typename stream_type , typename header_type , typename seq_type , typename id_type , typename ref_seq_type , typename ref_id_type , typename qual_type , typename mate_type , typename tag_dict_type , typename e_value_type , typename bit_score_type >
void seqan3::format_sam::write_alignment_record |
( |
stream_type & |
stream, |
|
|
sam_file_output_options const & |
options, |
|
|
header_type && |
header, |
|
|
seq_type && |
seq, |
|
|
qual_type && |
qual, |
|
|
id_type && |
id, |
|
|
ref_seq_type && |
ref_seq, |
|
|
ref_id_type && |
ref_id, |
|
|
std::optional< int32_t > |
ref_offset, |
|
|
std::vector< cigar > const & |
cigar_vector, |
|
|
sam_flag const |
flag, |
|
|
uint8_t const |
mapq, |
|
|
mate_type && |
mate, |
|
|
tag_dict_type && |
tag_dict, |
|
|
e_value_type && |
e_value, |
|
|
bit_score_type && |
bit_score |
|
) |
| |
|
inlineprotected |
Write the given fields to the specified stream.
- Template Parameters
-
- Parameters
-
[in,out] | stream | The output stream to write into. |
[in] | options | File specific options passed to the format. |
[in] | header | A pointer to the header object of the file. |
[in] | seq | The data for seqan3::field::seq, i.e. the query sequence. |
[in] | qual | The data for seqan3::field::qual, e.g. the query quality sequence. |
[in] | id | The data for seqan3::field::id, e.g. the read id. |
[in] | ref_seq | The data for seqan3::field::ref_offset, i.e. the reference sequence. |
[in] | ref_id | The data for seqan3::field::ref_id, e.g. the reference id.. |
[in] | ref_offset | The data for seqan3::field::ref_offset, i.e. the start position of the alignment in ref_seq . |
[in] | cigar_vector | The data for seqan3::field::cigar, e.g. representing the alignment between query and ref. |
[in] | flag | The data for seqan3::field::flag, e.g. the SAM mapping flag value. |
[in] | mapq | The data for seqan3::field::mapq, e.g. the mapping quality value. |
[in] | mate | The data for seqan3::field::mate, e.g. the mate information of paired reads. |
[in] | tag_dict | The data for seqan3::field::tags, e.g. the optional SAM field tag dictionary. |
[in] | e_value | The data for seqan3::field::e_value, e.g. the e-value of the alignment (BLAST). |
[in] | bit_score | The data for seqan3::field::, e.g. the bit score of the alignment (BLAST). |