Raptor 3.0.0-rc.1
A fast and space-efficient pre-filter for querying very large collections of nucleotide sequences
 
validators.hpp
Go to the documentation of this file.
1// --------------------------------------------------------------------------------------------------
2// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
3// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
4// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5// shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
6// --------------------------------------------------------------------------------------------------
7
13#pragma once
14
15#include <sharg/parser.hpp>
16
17#include <seqan3/io/sequence_file/input.hpp>
18
20
21namespace raptor::detail
22{
23
24static inline std::vector<std::string> sequence_extensions{
25 seqan3::detail::valid_file_extensions<typename seqan3::sequence_file_input<>::valid_formats>()};
26
27static inline std::vector<std::string> compression_extensions{[]()
28 {
30#ifdef SEQAN3_HAS_BZIP2
31 result.push_back("bz2");
32#endif
33#ifdef SEQAN3_HAS_ZLIB
34 result.push_back("gz");
35 result.push_back("bgzf");
36#endif
37 return result;
38 }()}; // GCOVR_EXCL_LINE
39
40static inline std::vector<std::string> combined_extensions{
41 []()
42 {
43 if (compression_extensions.empty())
44 return sequence_extensions; // GCOVR_EXCL_LINE
46 for (auto && sequence_extension : sequence_extensions)
47 {
48 result.push_back(sequence_extension);
49 for (auto && compression_extension : compression_extensions)
50 result.push_back(sequence_extension + std::string{'.'} + compression_extension);
51 }
52 return result;
53 }()};
54
55} // namespace raptor::detail
56
57namespace raptor
58{
59
61{
62 using option_value_type = size_t;
63
64 void operator()(option_value_type const & val) const
65 {
66 if (!std::has_single_bit(val))
67 throw sharg::validation_error{"The value must be a power of two."};
68 }
69
70 static std::string get_help_page_message()
71 {
72 return "Value must be a power of two.";
73 }
74};
75
77{
78public:
79 using option_value_type = size_t;
80
83 positive_integer_validator & operator=(positive_integer_validator const &) = default;
87
88 explicit positive_integer_validator(bool const is_zero_positive_) : is_zero_positive{is_zero_positive_}
89 {}
90
91 void operator()(option_value_type const & val) const
92 {
93 if (!is_zero_positive && !val)
94 throw sharg::validation_error{"The value must be a positive integer."};
95 }
96
97 std::string get_help_page_message() const
98 {
99 if (is_zero_positive)
100 return "Value must be a positive integer or 0.";
101 else
102 return "Value must be a positive integer.";
103 }
104
105private:
106 bool is_zero_positive{false};
107};
108
110{
111public:
113
114 size_validator() = default;
115 size_validator(size_validator const &) = default;
116 size_validator & operator=(size_validator const &) = default;
117 size_validator(size_validator &&) = default;
118 size_validator & operator=(size_validator &&) = default;
119 ~size_validator() = default;
120
121 explicit size_validator(std::string const & pattern) : expression{pattern}
122 {}
123
124 void operator()(option_value_type const & cmp) const
125 {
126 if (!std::regex_match(cmp, expression))
127 throw sharg::validation_error{
128 seqan3::detail::to_string("Value ",
129 cmp,
130 " must be an integer followed by [k,m,g,t] (case insensitive).")};
131 }
132
133 template <std::ranges::forward_range range_type>
134 requires std::convertible_to<std::ranges::range_value_t<range_type>, option_value_type const &>
135 void operator()(range_type const & v) const
136 {
137 std::for_each(v.begin(),
138 v.end(),
139 [&](auto cmp)
140 {
141 (*this)(cmp);
142 });
143 }
144
145 std::string get_help_page_message() const
146 {
147 return "Must be an integer followed by [k,m,g,t] (case insensitive).";
148 }
149
150private:
151 std::regex expression;
152};
153
155{
156public:
158
159 bin_validator() = default;
160 bin_validator(bin_validator const &) = default;
161 bin_validator & operator=(bin_validator const &) = default;
162 bin_validator(bin_validator &&) = default;
163 bin_validator & operator=(bin_validator &&) = default;
164 ~bin_validator() = default;
165
166 void operator()(option_value_type const & values) const
167 {
168 if (values.empty())
169 throw sharg::validation_error{"The list of input files cannot be empty."};
170
171 bool const is_minimiser_input = std::filesystem::path{values[0][0]}.extension() == ".minimiser";
172
173 for (std::vector<std::string> const & vector_of_paths : values)
174 {
175 for (std::string const & value : vector_of_paths)
176 {
177 std::filesystem::path const file_path{value};
178
179 if (is_minimiser_input && (file_path.extension() != ".minimiser"))
180 throw sharg::validation_error{"You cannot mix sequence and minimiser files as input."};
181 if (std::filesystem::file_size(file_path) == 0u)
182 throw sharg::validation_error{"The file " + value + " is empty."};
183
184 if (is_minimiser_input)
185 minimiser_file_validator(file_path);
186 else
187 sequence_file_validator(file_path);
188 }
189 }
190 }
191
192 std::string get_help_page_message() const
193 {
194 return seqan3::detail::to_string("The file must contain at least one file path per line, with multiple paths "
195 "being separated by a whitespace. Each line in the file corresponds to one "
196 "bin. Valid extensions for the paths in the file are [minimiser] when "
197 " using preprocessed input from \\fBraptor prepare\\fP, and ",
198 raptor::detail::sequence_extensions,
199#if defined(SEQAN3_HAS_BZIP2) || defined(SEQAN3_HAS_ZLIB)
200 ", possibly followed by ",
201 raptor::detail::compression_extensions,
202#endif
203 ". ");
204 }
205
206private:
207 sharg::input_file_validator minimiser_file_validator{{"minimiser"}};
208
209public:
210 sharg::input_file_validator sequence_file_validator{raptor::detail::combined_extensions};
211};
212
214{
215public:
217
218 output_directory_validator() = default;
220 output_directory_validator & operator=(output_directory_validator const &) = default;
223 ~output_directory_validator() = default;
224
225 void operator()(option_value_type const & value) const
226 {
227 std::filesystem::path const out_dir{value};
228 std::error_code ec{};
230 if (ec)
231 // GCOVR_EXCL_START
232 throw sharg::validation_error{
233 sharg::detail::to_string("Failed to create directory\"", out_dir.c_str(), "\": ", ec.message())};
234 // GCOVR_EXCL_STOP
235
236 validator(out_dir);
237 }
238
239 std::string get_help_page_message() const
240 {
241 return "A valid path for the output directory.";
242 }
243
244private:
245 sharg::output_directory_validator validator{};
246};
247
249{
250public:
252
253 output_file_validator() = default;
255 output_file_validator & operator=(output_file_validator const &) = default;
257 output_file_validator & operator=(output_file_validator &&) = default;
258 ~output_file_validator() = default;
259
260 void operator()(option_value_type const & value) const
261 {
262 std::filesystem::path const out_path{value};
263 std::filesystem::path const out_dir{out_path.parent_path()};
264 if (!out_dir.empty())
265 {
266 std::error_code ec{};
268 if (ec)
269 // GCOVR_EXCL_START
270 throw sharg::validation_error{
271 sharg::detail::to_string("Failed to create directory \"", out_dir.c_str(), "\": ", ec.message())};
272 // GCOVR_EXCL_STOP
273 }
274
275 validator(out_path);
276 }
277
278 std::string get_help_page_message() const
279 {
280 return "A valid path for the output file. Write permissions must be granted.";
281 }
282
283private:
284 sharg::output_file_validator validator{sharg::output_file_open_options::open_or_create};
285};
286
287class sequence_file_validator : public sharg::input_file_validator
288{
289private:
290 using base_t = sharg::input_file_validator;
291
292public:
293 using base_t::base_t;
294
295 std::string get_help_page_message() const
296 {
297 return seqan3::detail::to_string(
298 "The input file must exist and read permissions must be granted. Valid file extensions are ",
299 raptor::detail::sequence_extensions,
300#if defined(SEQAN3_HAS_BZIP2) || defined(SEQAN3_HAS_ZLIB)
301 ", possibly followed by ",
302 raptor::detail::compression_extensions,
303#endif
304 ". ");
305 }
306};
307
308} // namespace raptor
Definition: validators.hpp:155
Definition: validators.hpp:214
Definition: validators.hpp:249
Definition: validators.hpp:77
Definition: validators.hpp:288
Definition: validators.hpp:110
T create_directories(T... args)
T empty(T... args)
T file_size(T... args)
T for_each(T... args)
T has_single_bit(T... args)
T parent_path(T... args)
T push_back(T... args)
T regex_match(T... args)
Provides raptor::window.
Definition: validators.hpp:61