Raptor
A fast and space-efficient pre-filter
All Classes Namespaces Files Functions Variables Macros Pages Concepts
file_reader.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <seqan3/io/sequence_file/input.hpp>
13#include <seqan3/search/views/minimiser_hash.hpp>
14
17
18namespace raptor
19{
20
21enum class file_types
22{
23 sequence,
24 minimiser
25};
26
27template <file_types file_type>
29{};
30
31template <>
32class file_reader<file_types::sequence>
33{
34public:
35 file_reader() = default;
36 file_reader(file_reader const &) = default;
37 file_reader(file_reader &&) = default; // GCOVR_EXCL_LINE
38 file_reader & operator=(file_reader const &) = default;
39 file_reader & operator=(file_reader &&) = default;
40 ~file_reader() = default;
41
42 explicit file_reader(seqan3::shape const shape, uint32_t const window_size) :
43 minimiser_view{seqan3::views::minimiser_hash(shape,
44 seqan3::window_size{window_size},
45 seqan3::seed{adjust_seed(shape.count())})}
46 {}
47
48 template <std::output_iterator<uint64_t> it_t>
49 void hash_into(std::vector<std::string> const & filenames, it_t target) const
50 {
51 for (auto && filename : filenames)
52 hash_into(filename, target);
53 }
54
55 template <std::output_iterator<uint64_t> it_t>
56 void hash_into(std::string const & filename, it_t target) const
57 {
58 sequence_file_t fin{filename};
59 for (auto && record : fin)
60 std::ranges::copy(record.sequence() | minimiser_view, target);
61 }
62
63 template <std::output_iterator<uint64_t> it_t>
64 void hash_into_if(std::vector<std::string> const & filenames, it_t target, auto && pred) const
65 {
66 for (auto && filename : filenames)
67 hash_into_if(filename, target, pred);
68 }
69
70 template <std::output_iterator<uint64_t> it_t>
71 void hash_into_if(std::string const & filename, it_t target, auto && pred) const
72 {
73 sequence_file_t fin{filename};
74 for (auto && record : fin)
75 std::ranges::copy_if(record.sequence() | minimiser_view, target, pred);
76 }
77
78 void on_hash(std::vector<std::string> const & filenames, auto && callback) const
79 {
80 for (auto && filename : filenames)
81 on_hash(filename, callback);
82 }
83
84 void on_hash(std::string const & filename, auto && callback) const
85 {
86 sequence_file_t fin{filename};
87 for (auto && record : fin)
88 callback(record.sequence() | minimiser_view);
89 }
90
91 void for_each_hash(std::vector<std::string> const & filenames, auto && callback) const
92 {
93 for (auto && filename : filenames)
94 for_each_hash(filename, callback);
95 }
96
97 void for_each_hash(std::string const & filename, auto && callback) const
98 {
99 sequence_file_t fin{filename};
100 for (auto && record : fin)
101 std::ranges::for_each(record.sequence() | minimiser_view, callback);
102 }
103
104private:
105 using sequence_file_t = seqan3::sequence_file_input<dna4_traits, seqan3::fields<seqan3::field::seq>>;
106 using view_t = decltype(seqan3::views::minimiser_hash(seqan3::shape{}, seqan3::window_size{}, seqan3::seed{}));
107 view_t minimiser_view = seqan3::views::minimiser_hash(seqan3::shape{}, seqan3::window_size{}, seqan3::seed{});
108};
109
110template <>
111class file_reader<file_types::minimiser>
112{
113public:
114 file_reader() = default;
115 file_reader(file_reader const &) = default;
116 file_reader(file_reader &&) = default;
117 file_reader & operator=(file_reader const &) = default;
118 file_reader & operator=(file_reader &&) = default;
119 ~file_reader() = default;
120
121 explicit file_reader(seqan3::shape const, uint32_t const)
122 {}
123
124 template <std::output_iterator<uint64_t> it_t>
125 void hash_into(std::vector<std::string> const & filenames, it_t target) const
126 {
127 for (auto && filename : filenames)
128 hash_into(filename, target);
129 }
130
131 template <std::output_iterator<uint64_t> it_t>
132 void hash_into(std::string const & filename, it_t target) const
133 {
134 std::ifstream fin{filename, std::ios::binary};
135 uint64_t value;
136 while (fin.read(reinterpret_cast<char *>(&value), sizeof(value)))
137 {
138 *target = value;
139 ++target;
140 }
141 }
142
143 template <std::output_iterator<uint64_t> it_t>
144 void hash_into_if(std::vector<std::string> const & filenames, it_t target, auto && pred) const
145 {
146 for (auto && filename : filenames)
147 hash_into_if(filename, target, pred);
148 }
149
150 template <std::output_iterator<uint64_t> it_t>
151 void hash_into_if(std::string const & filename, it_t target, auto && pred) const
152 {
153 std::ifstream fin{filename, std::ios::binary};
154 uint64_t value;
155 while (fin.read(reinterpret_cast<char *>(&value), sizeof(value)))
156 if (pred(value))
157 {
158 *target = value;
159 ++target;
160 }
161 }
162
163 void for_each_hash(std::vector<std::string> const & filenames, auto && callback) const
164 {
165 for (auto && filename : filenames)
166 for_each_hash(filename, callback);
167 }
168
169 void for_each_hash(std::string const & filename, auto && callback) const
170 {
171 std::ifstream fin{filename, std::ios::binary};
172 uint64_t value;
173 while (fin.read(reinterpret_cast<char *>(&value), sizeof(value)))
174 callback(value);
175 }
176};
177
178} // namespace raptor
Provides raptor::adjust_seed.
Definition file_reader.hpp:29
T copy(T... args)
Provides raptor::dna4_traits.
T for_each(T... args)
Hide me