15#include <seqan3/search/views/minimiser_hash.hpp>
17#include <hibf/contrib/std/chunk_view.hpp>
29template <
typename index_t>
30void search_singular_ibf(search_arguments
const & arguments, index_t && index)
34 auto cereal_future =
std::async(std::launch::async,
37 load_index(index, arguments);
40 seqan3::sequence_file_input<dna4_traits, seqan3::fields<seqan3::field::id, seqan3::field::seq>> fin{
41 arguments.query_file};
42 using record_type =
typename decltype(fin)::record_type;
45 sync_out synced_out{arguments};
49 auto worker = [&](
size_t const start,
size_t const extent)
51 seqan::hibf::serial_timer local_compute_minimiser_timer{};
52 seqan::hibf::serial_timer local_query_ibf_timer{};
53 seqan::hibf::serial_timer local_generate_results_timer{};
56 auto counter = [&index]()
58 auto counter = [&index, is_ibf]()
62 return index.ibf().template counting_agent<uint16_t>();
64 return index.ibf().membership_agent();
69 auto hash_adaptor = seqan3::views::minimiser_hash(arguments.shape,
70 seqan3::window_size{arguments.window_size},
71 seqan3::seed{adjust_seed(arguments.shape_weight)});
73 for (
auto && [
id, seq] :
std::span{records.
data() + start, extent})
75 result_string.clear();
77 result_string +=
'\t';
79 auto minimiser_view = seq | hash_adaptor | std::views::common;
80 local_compute_minimiser_timer.start();
81 minimiser.assign(minimiser_view.begin(), minimiser_view.end());
82 local_compute_minimiser_timer.stop();
84 size_t const minimiser_count{minimiser.size()};
85 size_t const threshold = thresholder.get(minimiser_count);
89 local_query_ibf_timer.start();
90 auto & result = counter.bulk_count(minimiser);
91 local_query_ibf_timer.stop();
92 size_t current_bin{0};
93 local_generate_results_timer.start();
94 for (
auto && count : result)
96 if (count >= threshold)
106 local_query_ibf_timer.start();
107 auto & result = counter.membership_for(minimiser, threshold);
108 local_query_ibf_timer.stop();
109 local_generate_results_timer.start();
110 for (
auto && count : result)
113 result_string +=
',';
117 if (
auto & last_char = result_string.back(); last_char ==
',')
120 result_string +=
'\n';
122 synced_out.write(result_string);
123 local_generate_results_timer.stop();
126 arguments.compute_minimiser_timer += local_compute_minimiser_timer;
127 arguments.query_ibf_timer += local_query_ibf_timer;
128 arguments.generate_results_timer += local_generate_results_timer;
131 auto write_header = [&]()
133 if constexpr (is_ibf)
134 return synced_out.write_header(arguments, index.ibf().hash_function_count());
136 return synced_out.write_header(arguments, index.ibf().ibf_vector[0].hash_function_count());
139 for (
auto && chunked_records : fin | seqan::stl::views::chunk((1ULL << 20) * 10))
142 arguments.query_file_io_timer.start();
146 arguments.query_file_io_timer.stop();
149 [[maybe_unused]]
static bool header_written = write_header();
151 arguments.parallel_search_timer.start();
152 do_parallel(worker, records.size(), arguments.threads);
153 arguments.parallel_search_timer.stop();
Provides raptor::adjust_seed.
T back_inserter(T... args)
Definition threshold.hpp:19
Provides raptor::dna4_traits.
Provides raptor::do_parallel.
Provides raptor::load_index.
Provides raptor::sync_out.
Provides raptor::threshold::threshold.