Raptor
A fast and space-efficient pre-filter
All Classes Namespaces Files Functions Variables Macros Pages Concepts
validators.hpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: 2006-2024 Knut Reinert & Freie Universität Berlin
2// SPDX-FileCopyrightText: 2016-2024 Knut Reinert & MPI für molekulare Genetik
3// SPDX-License-Identifier: BSD-3-Clause
4
10#pragma once
11
12#include <sharg/parser.hpp>
13
14#include <seqan3/io/sequence_file/input.hpp>
15
17
18namespace raptor::detail
19{
20
21static inline std::vector<std::string> sequence_extensions{
22 seqan3::detail::valid_file_extensions<typename seqan3::sequence_file_input<>::valid_formats>()};
23
24static inline std::vector<std::string> compression_extensions{[]()
25 {
27#ifdef SEQAN3_HAS_BZIP2
28 result.push_back("bz2");
29#endif
30#ifdef SEQAN3_HAS_ZLIB
31 result.push_back("gz");
32 result.push_back("bgzf");
33#endif
34 return result;
35 }()}; // GCOVR_EXCL_LINE
36
37static inline std::vector<std::string> combined_extensions{
38 []()
39 {
40 if (compression_extensions.empty())
41 return sequence_extensions; // GCOVR_EXCL_LINE
43 for (auto && sequence_extension : sequence_extensions)
44 {
45 result.push_back(sequence_extension);
46 for (auto && compression_extension : compression_extensions)
47 result.push_back(sequence_extension + std::string{'.'} + compression_extension);
48 }
49 return result;
50 }()};
51
52} // namespace raptor::detail
53
54namespace raptor
55{
56
58{
59 using option_value_type = size_t;
60
61 void operator()(option_value_type const & val) const
62 {
63 if (!std::has_single_bit(val))
64 throw sharg::validation_error{"The value must be a power of two."};
65 }
66
67 static std::string get_help_page_message()
68 {
69 return "Value must be a power of two.";
70 }
71};
72
74{
75public:
76 using option_value_type = size_t;
77
80 positive_integer_validator & operator=(positive_integer_validator const &) = default;
84
85 explicit positive_integer_validator(bool const is_zero_positive_) : is_zero_positive{is_zero_positive_}
86 {}
87
88 void operator()(option_value_type const & val) const
89 {
90 if (!is_zero_positive && !val)
91 throw sharg::validation_error{"The value must be a positive integer."};
92 }
93
94 std::string get_help_page_message() const
95 {
96 if (is_zero_positive)
97 return "Value must be a positive integer or 0.";
98 else
99 return "Value must be a positive integer.";
100 }
101
102private:
103 bool is_zero_positive{false};
104};
105
107{
108public:
110
111 size_validator() = default;
112 size_validator(size_validator const &) = default;
113 size_validator & operator=(size_validator const &) = default;
114 size_validator(size_validator &&) = default;
115 size_validator & operator=(size_validator &&) = default;
116 ~size_validator() = default;
117
118 explicit size_validator(std::string const & pattern) : expression{pattern}
119 {}
120
121 void operator()(option_value_type const & cmp) const
122 {
123 if (!std::regex_match(cmp, expression))
124 throw sharg::validation_error{
125 seqan3::detail::to_string("Value ",
126 cmp,
127 " must be an integer followed by [k,m,g,t] (case insensitive).")};
128 }
129
130 template <std::ranges::forward_range range_type>
132 void operator()(range_type const & v) const
133 {
134 std::for_each(v.begin(),
135 v.end(),
136 [&](auto cmp)
137 {
138 (*this)(cmp);
139 });
140 }
141
142 std::string get_help_page_message() const
143 {
144 return "Must be an integer followed by [k,m,g,t] (case insensitive).";
145 }
146
147private:
148 std::regex expression;
149};
150
152{
153public:
155
156 bin_validator() = default;
157 bin_validator(bin_validator const &) = default;
158 bin_validator & operator=(bin_validator const &) = default;
159 bin_validator(bin_validator &&) = default;
160 bin_validator & operator=(bin_validator &&) = default;
161 ~bin_validator() = default;
162
163 void operator()(option_value_type const & values) const
164 {
165 if (values.empty())
166 throw sharg::validation_error{"The list of input files cannot be empty."};
167
168 bool const is_minimiser_input = std::filesystem::path{values[0][0]}.extension() == ".minimiser";
169
170 for (std::vector<std::string> const & vector_of_paths : values)
171 {
172 for (std::string const & value : vector_of_paths)
173 {
174 std::filesystem::path const file_path{value};
175
176 if (is_minimiser_input && (file_path.extension() != ".minimiser"))
177 throw sharg::validation_error{"You cannot mix sequence and minimiser files as input."};
178 if (!std::filesystem::exists(file_path))
179 throw sharg::validation_error{"The file " + value + " does not exist."};
180 if (std::filesystem::file_size(file_path) == 0u)
181 throw sharg::validation_error{"The file " + value + " is empty."};
182
183 if (is_minimiser_input)
184 minimiser_file_validator(file_path);
185 else
186 sequence_file_validator(file_path);
187 }
188 }
189 }
190
191 std::string get_help_page_message() const
192 {
193 return seqan3::detail::to_string("The file must contain at least one file path per line, with multiple paths "
194 "being separated by a whitespace. Each line in the file corresponds to one "
195 "bin. Valid extensions for the paths in the file are [minimiser] when "
196 " using preprocessed input from \fBraptor prepare\fP, and ",
197 raptor::detail::sequence_extensions,
198#if defined(SEQAN3_HAS_BZIP2) || defined(SEQAN3_HAS_ZLIB)
199 ", possibly followed by ",
200 raptor::detail::compression_extensions,
201#endif
202 ". ");
203 }
204
205private:
206 sharg::input_file_validator minimiser_file_validator{{"minimiser"}};
207
208public:
209 sharg::input_file_validator sequence_file_validator{raptor::detail::combined_extensions};
210};
211
213{
214public:
216
217 output_directory_validator() = default;
219 output_directory_validator & operator=(output_directory_validator const &) = default;
222 ~output_directory_validator() = default;
223
224 void operator()(option_value_type const & value) const
225 {
226 std::filesystem::path const out_dir{value};
227 std::error_code ec{};
229 if (ec)
230 // GCOVR_EXCL_START
231 throw sharg::validation_error{
232 sharg::detail::to_string("Failed to create directory\"", out_dir.c_str(), "\": ", ec.message())};
233 // GCOVR_EXCL_STOP
234
235 validator(out_dir);
236 }
237
238 std::string get_help_page_message() const
239 {
240 return "A valid path for the output directory.";
241 }
242
243private:
244 sharg::output_directory_validator validator{};
245};
246
248{
249public:
251
252 output_file_validator() = default;
254 output_file_validator & operator=(output_file_validator const &) = default;
256 output_file_validator & operator=(output_file_validator &&) = default;
257 ~output_file_validator() = default;
258
259 void operator()(option_value_type const & value) const
260 {
261 std::filesystem::path const out_path{value};
262 std::filesystem::path const out_dir{out_path.parent_path()};
263 if (!out_dir.empty())
264 {
265 // GCOVR_EXCL_START
266 std::error_code ec{};
268 if (ec)
269 throw sharg::validation_error{
270 sharg::detail::to_string("Failed to create directory \"", out_dir.c_str(), "\": ", ec.message())};
271 // GCOVR_EXCL_STOP
272 }
273
274 validator(out_path);
275 }
276
277 std::string get_help_page_message() const
278 {
279 return "A valid path for the output file. Write permissions must be granted.";
280 }
281
282private:
283 sharg::output_file_validator validator{sharg::output_file_open_options::open_or_create};
284};
285
286class sequence_file_validator : public sharg::input_file_validator
287{
288private:
289 using base_t = sharg::input_file_validator;
290
291public:
292 using base_t::base_t;
293
294 std::string get_help_page_message() const
295 {
296 return seqan3::detail::to_string(
297 "The input file must exist and read permissions must be granted. Valid file extensions are ",
298 raptor::detail::sequence_extensions,
299#if defined(SEQAN3_HAS_BZIP2) || defined(SEQAN3_HAS_ZLIB)
300 ", possibly followed by ",
301 raptor::detail::compression_extensions,
302#endif
303 ". ");
304 }
305};
306
307} // namespace raptor
Definition validators.hpp:152
Definition validators.hpp:213
Definition validators.hpp:248
Definition validators.hpp:74
Definition validators.hpp:287
Definition validators.hpp:107
T create_directories(T... args)
T empty(T... args)
T exists(T... args)
T file_size(T... args)
T for_each(T... args)
T has_single_bit(T... args)
T min(T... args)
T parent_path(T... args)
T push_back(T... args)
T regex_match(T... args)
Provides raptor::window.
Definition validators.hpp:58
Hide me