15#include <seqan3/search/views/minimiser_hash.hpp>
18#include <raptor/contrib/std/chunk_view.hpp>
28template <
typename index_t>
29void search_partitioned_hibf(search_arguments
const & arguments, index_t && index)
31 seqan3::sequence_file_input<dna4_traits, seqan3::fields<seqan3::field::id, seqan3::field::seq>> fin{
32 arguments.query_file};
33 using record_type =
typename decltype(fin)::record_type;
37 sync_out synced_out{arguments};
44 auto worker = [&](
size_t const start,
size_t const extent,
bool const output_results)
46 seqan::hibf::serial_timer local_compute_minimiser_timer{};
47 seqan::hibf::serial_timer local_query_ibf_timer{};
48 seqan::hibf::serial_timer local_generate_results_timer{};
50 auto agent = index.ibf().membership_agent();
55 auto hash_adaptor = seqan3::views::minimiser_hash(arguments.shape,
56 seqan3::window_size{arguments.window_size},
57 seqan3::seed{adjust_seed(arguments.shape_weight)});
59 for (
size_t pos = start; pos < start + extent; ++pos)
61 auto const & seq = records[pos].sequence();
64 auto minimiser_view = seq | hash_adaptor | std::views::common;
65 local_compute_minimiser_timer.start();
66 minimiser.assign(minimiser_view.begin(), minimiser_view.end());
67 local_compute_minimiser_timer.stop();
69 size_t const minimiser_count{minimiser.size()};
70 size_t const threshold = thresholder.get(minimiser_count);
72 local_query_ibf_timer.start();
73 auto & result = agent.membership_for(minimiser, threshold);
74 local_query_ibf_timer.stop();
75 local_generate_results_timer.start();
76 for (
auto && user_bin_id : result)
85 auto const &
id = records[pos].id();
86 result_string.
insert(result_string.
begin(),
id.begin(),
id.end());
88 if (
auto & last_char = result_string.
back(); last_char ==
',')
91 result_string +=
'\n';
93 synced_out.write(result_string);
94 result_string.
clear();
96 local_generate_results_timer.stop();
99 arguments.compute_minimiser_timer += local_compute_minimiser_timer;
100 arguments.query_ibf_timer += local_query_ibf_timer;
101 arguments.generate_results_timer += local_generate_results_timer;
104 for (
auto && chunked_records : fin | seqan::stl::views::chunk((1ULL << 20) * 10))
107 auto cereal_future =
std::async(std::launch::async,
110 load_index(index, arguments, 0);
114 arguments.query_file_io_timer.start();
116 arguments.query_file_io_timer.stop();
118 results.
resize(records.size());
121 synced_out.write_header(arguments, index.ibf().ibf_vector[0].hash_function_count());
123 assert(arguments.parts > 0);
124 for (
int part = 0; part < arguments.parts - 1; ++part)
126 do_parallel(worker, records.size(), arguments.threads,
false );
127 arguments.write_timings_to_file();
128 arguments.compute_minimiser_timer = {};
129 arguments.query_ibf_timer = {};
130 arguments.generate_results_timer = {};
131 arguments.load_index_timer = {};
132 load_index(index, arguments, part + 1);
135 do_parallel(worker, records.size(), arguments.threads,
true );
136 arguments.write_timings_to_file();
Provides raptor::adjust_seed.
T back_inserter(T... args)
Definition threshold.hpp:19
Provides raptor::dna4_traits.
Provides raptor::do_parallel.
Provides raptor::load_index.
Provides raptor::sync_out.
Provides raptor::threshold::threshold.