17#include <seqan3/search/dream_index/interleaved_bloom_filter.hpp>
19#ifndef RAPTOR_HIBF_HAS_COUNT
20# define RAPTOR_HIBF_HAS_COUNT 0
86template <seqan3::data_layout data_layout_mode_ = seqan3::data_layout::uncompressed>
96#if RAPTOR_HIBF_HAS_COUNT
98 template <std::
integral value_t>
106 using ibf_t = seqan3::interleaved_bloom_filter<data_layout_mode_>;
143#if RAPTOR_HIBF_HAS_COUNT
147 template <std::
integral value_t = u
int16_t>
162 template <seqan3::cereal_archive archive_t>
163 void CEREAL_SERIALIZE_FUNCTION_NAME(archive_t & archive)
174template <seqan3::data_layout data_layout_mode>
193 return user_bin_filenames.
size();
199 ibf_bin_to_filename_position.resize(size);
205 user_bin_filenames.
resize(size);
219 return ibf_bin_to_filename_position[idx];
233 return user_bin_filenames[idx];
239 return user_bin_filenames[ibf_bin_to_filename_position[index_pair.
first][index_pair.
second]];
247 return ibf_bin_to_filename_position[ibf_idx]
248 | std::views::transform(
254 return user_bin_filenames[i];
261 return ibf_bin_to_filename_position[ibf_idx][bin_idx];
269 template <
typename stream_t>
274 for (
auto const & filename : user_bin_filenames)
295 template <
typename archive_t>
298 archive(user_bin_filenames);
299 archive(ibf_bin_to_filename_position);
309template <seqan3::data_layout data_layout_mode>
320 template <std::ranges::forward_range value_range_t>
323 auto agent = hibf_ptr->ibf_vector[ibf_idx].template counting_agent<uint16_t>();
324 auto & result = agent.bulk_count(values);
328 for (
size_t bin{}; bin < result.size(); ++bin)
332 auto const current_filename_index = hibf_ptr->user_bins.filename_index(ibf_idx, bin);
334 if (current_filename_index < 0)
337 bulk_contains_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
340 else if (bin + 1u == result.size() ||
341 current_filename_index != hibf_ptr->user_bins.filename_index(ibf_idx, bin + 1))
343 if (sum >= threshold)
344 result_buffer.emplace_back(current_filename_index);
389 template <std::ranges::forward_range value_range_t>
392 assert(hibf_ptr !=
nullptr);
394 static_assert(std::ranges::forward_range<value_range_t>,
"The values must model forward_range.");
395 static_assert(std::unsigned_integral<std::ranges::range_value_t<value_range_t>>,
396 "An individual value must be an unsigned integral.");
398 result_buffer.
clear();
400 bulk_contains_impl(values, 0, threshold);
404 return result_buffer;
409 template <std::ranges::range value_range_t>
411 size_t const threshold) &&
noexcept =
delete;
415#if RAPTOR_HIBF_HAS_COUNT
418template <seqan3::data_layout data_layout_mode>
419template <std::
integral value_t>
430 template <std::ranges::forward_range value_range_t>
431 void bulk_count_impl(value_range_t && values, int64_t
const ibf_idx,
size_t const threshold)
433 auto agent = hibf_ptr->ibf_vector[ibf_idx].template counting_agent<value_t>();
434 auto & result = agent.bulk_count(values);
438 for (
size_t bin{}; bin < result.size(); ++bin)
441 auto const current_filename_index = hibf_ptr->user_bins.filename_index(ibf_idx, bin);
443 if (current_filename_index < 0)
445 if (sum >= threshold)
446 bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
449 else if (bin + 1u == result.size() ||
450 current_filename_index != hibf_ptr->user_bins.filename_index(ibf_idx, bin + 1))
452 if (sum >= threshold)
453 result_buffer[current_filename_index] = sum;
475 hibf_ptr(
std::addressof(hibf)),
476 result_buffer(hibf_ptr->
user_bins.num_user_bins())
502 template <std::ranges::forward_range value_range_t>
503 [[nodiscard]] seqan3::counting_vector<value_t>
const &
bulk_count(value_range_t && values,
504 size_t const threshold = 1u) &
noexcept
506 assert(hibf_ptr !=
nullptr);
507 assert(threshold > 0u);
508 assert(result_buffer.size() == hibf_ptr->user_bins.num_user_bins());
510 static_assert(std::ranges::forward_range<value_range_t>,
"The values must model forward_range.");
511 static_assert(std::unsigned_integral<std::ranges::range_value_t<value_range_t>>,
512 "An individual value must be an unsigned integral.");
516 bulk_count_impl(values, 0, threshold);
518 return result_buffer;
523 template <std::ranges::range value_range_t>
524 [[nodiscard]] seqan3::counting_vector<value_t>
const & bulk_count(value_range_t && values,
525 size_t const threshold = 1u) &&
noexcept =
delete;
Manages counting ranges of values for the raptor::hierarchical_interleaved_bloom_filter.
Definition: hierarchical_interleaved_bloom_filter.hpp:421
counting_agent_type(hibf_t const &hibf)
Construct a counting_agent_type for an existing hierarchical_interleaved_bloom_filter.
Definition: hierarchical_interleaved_bloom_filter.hpp:474
counting_agent_type & operator=(counting_agent_type const &)=default
Defaulted.
counting_agent_type & operator=(counting_agent_type &&)=default
Defaulted.
void bulk_count_impl(value_range_t &&values, int64_t const ibf_idx, size_t const threshold)
Helper for recursive bulk counting.
Definition: hierarchical_interleaved_bloom_filter.hpp:431
counting_agent_type(counting_agent_type &&)=default
Defaulted.
seqan3::counting_vector< value_t > const & bulk_count(value_range_t &&values, size_t const threshold=1u) &noexcept
Counts the occurrences in each bin for all values in a range.
Definition: hierarchical_interleaved_bloom_filter.hpp:503
counting_agent_type(counting_agent_type const &)=default
Defaulted.
~counting_agent_type()=default
Defaulted.
counting_agent_type()=default
Defaulted.
seqan3::counting_vector< value_t > result_buffer
Stores the result of bulk_count().
Definition: hierarchical_interleaved_bloom_filter.hpp:481
Manages membership queries for the raptor::hierarchical_interleaved_bloom_filter.
Definition: hierarchical_interleaved_bloom_filter.hpp:311
membership_agent(hibf_t const &hibf)
Construct a membership_agent for an existing hierarchical_interleaved_bloom_filter.
Definition: hierarchical_interleaved_bloom_filter.hpp:365
void bulk_contains_impl(value_range_t &&values, int64_t const ibf_idx, size_t const threshold)
Helper for recursive membership querying.
Definition: hierarchical_interleaved_bloom_filter.hpp:321
membership_agent & operator=(membership_agent const &)=default
Defaulted.
membership_agent(membership_agent const &)=default
Defaulted.
membership_agent & operator=(membership_agent &&)=default
Defaulted.
std::vector< int64_t > const & bulk_contains(value_range_t &&values, size_t const threshold) &noexcept
Determines set membership of given values, and returns the user bin indices of occurrences.
Definition: hierarchical_interleaved_bloom_filter.hpp:390
~membership_agent()=default
Defaulted.
std::vector< int64_t > result_buffer
Stores the result of bulk_contains().
Definition: hierarchical_interleaved_bloom_filter.hpp:370
membership_agent()=default
Defaulted.
membership_agent(membership_agent &&)=default
Defaulted.
Bookkeeping for user and technical bins.
Definition: hierarchical_interleaved_bloom_filter.hpp:176
int64_t filename_index(size_t const ibf_idx, size_t const bin_idx) const
Returns the filename index of the ibf_idxth IBF for bin bin_idx.
Definition: hierarchical_interleaved_bloom_filter.hpp:259
void set_ibf_count(size_t const size)
Changes the number of managed IBFs.
Definition: hierarchical_interleaved_bloom_filter.hpp:197
void set_user_bin_count(size_t const size)
Changes the number of managed user bins.
Definition: hierarchical_interleaved_bloom_filter.hpp:203
auto operator[](size_t const ibf_idx) const
Returns a view over the user bin filenames for the ibf_idxth IBF. An empty string is returned for mer...
Definition: hierarchical_interleaved_bloom_filter.hpp:245
void write_filenames(stream_t &out_stream) const
Writes all filenames to a stream. Index and filename are tab-separated.
Definition: hierarchical_interleaved_bloom_filter.hpp:270
std::string const & operator[](std::pair< size_t, size_t > const &index_pair) const
For a pair (a,b), returns a const reference to the filename of the user bin at IBF a,...
Definition: hierarchical_interleaved_bloom_filter.hpp:237
std::string & filename_of_user_bin(size_t const idx)
Returns the filename of the idxth user bin.
Definition: hierarchical_interleaved_bloom_filter.hpp:231
void serialize(archive_t &archive)
Serialisation support function.
Definition: hierarchical_interleaved_bloom_filter.hpp:296
size_t num_user_bins() const noexcept
Returns the number of managed user bins.
Definition: hierarchical_interleaved_bloom_filter.hpp:191
std::vector< int64_t > & bin_indices_of_ibf(size_t const idx)
Returns a vector containing user bin indices for each bin in the idxth IBF.
Definition: hierarchical_interleaved_bloom_filter.hpp:217
std::vector< std::string > user_bin_filenames
Contains filenames of all user bins.
Definition: hierarchical_interleaved_bloom_filter.hpp:179
The HIBF binning directory. A data structure that efficiently answers set-membership queries for mult...
Definition: hierarchical_interleaved_bloom_filter.hpp:88
counting_agent_type< value_t > counting_agent() const
Returns a counting_agent_type to be used for counting.
Definition: hierarchical_interleaved_bloom_filter.hpp:148
hierarchical_interleaved_bloom_filter(hierarchical_interleaved_bloom_filter const &)=default
Defaulted.
~hierarchical_interleaved_bloom_filter()=default
Defaulted.
std::vector< ibf_t > ibf_vector
The individual interleaved Bloom filters.
Definition: hierarchical_interleaved_bloom_filter.hpp:123
hierarchical_interleaved_bloom_filter & operator=(hierarchical_interleaved_bloom_filter const &)=default
Defaulted.
membership_agent membership_agent() const
Returns a membership_agent to be used for counting.
Definition: hierarchical_interleaved_bloom_filter.hpp:138
static constexpr seqan3::data_layout data_layout_mode
Indicates whether the Interleaved Bloom Filter is compressed.
Definition: hierarchical_interleaved_bloom_filter.hpp:103
hierarchical_interleaved_bloom_filter & operator=(hierarchical_interleaved_bloom_filter &&)=default
Defaulted.
std::vector< std::vector< int64_t > > next_ibf_id
Stores for each bin in each IBF of the HIBF the ID of the next IBF.
Definition: hierarchical_interleaved_bloom_filter.hpp:132
hierarchical_interleaved_bloom_filter(hierarchical_interleaved_bloom_filter &&)=default
Defaulted.
seqan3::interleaved_bloom_filter< data_layout_mode_ > ibf_t
The type of an individual Bloom filter.
Definition: hierarchical_interleaved_bloom_filter.hpp:106
hierarchical_interleaved_bloom_filter()=default
Defaulted.
user_bins user_bins
The underlying user bins.
Definition: hierarchical_interleaved_bloom_filter.hpp:135