mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-11-29 18:38:44 +08:00
enhance: [bitset] multiple 'and' and 'or' in a single op (#33345)
issue #34117 * Refactoring * Added a capability to perform multiple bitwise `and` and `or` operations in a single op * AVX2, AVX512, ARM NEON, ARM SVE backed bitwise `and`, `op`, `xor` and `sub` ops * more unit tests for bitset * fixed a bug in `or_with_count` for certain bitset sizes * fixed a bug for certain offset values for inplace operations that take two bitsets Signed-off-by: Alexandr Guzhva <alexanderguzhva@gmail.com>
This commit is contained in:
parent
6bedc7e8c8
commit
5a1f752272
@ -23,6 +23,7 @@
|
||||
#include <type_traits>
|
||||
|
||||
#include "common.h"
|
||||
#include "detail/maybe_vector.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace bitset {
|
||||
@ -109,7 +110,6 @@ class BitsetBase {
|
||||
public:
|
||||
using policy_type = PolicyT;
|
||||
using data_type = typename policy_type::data_type;
|
||||
using size_type = typename policy_type::size_type;
|
||||
using proxy_type = typename policy_type::proxy_type;
|
||||
using const_proxy_type = typename policy_type::const_proxy_type;
|
||||
|
||||
@ -128,21 +128,21 @@ class BitsetBase {
|
||||
}
|
||||
|
||||
// Return the number of bits we're working with.
|
||||
inline size_type
|
||||
inline size_t
|
||||
size() const {
|
||||
return as_derived().size_impl();
|
||||
}
|
||||
|
||||
// Return the number of bytes which is needed to
|
||||
// contain all our bits.
|
||||
inline size_type
|
||||
inline size_t
|
||||
size_in_bytes() const {
|
||||
return policy_type::get_required_size_in_bytes(this->size());
|
||||
}
|
||||
|
||||
// Return the number of elements which is needed to
|
||||
// contain all our bits.
|
||||
inline size_type
|
||||
inline size_t
|
||||
size_in_elements() const {
|
||||
return policy_type::get_required_size_in_elements(this->size());
|
||||
}
|
||||
@ -155,19 +155,19 @@ class BitsetBase {
|
||||
|
||||
//
|
||||
inline proxy_type
|
||||
operator[](const size_type bit_idx) {
|
||||
operator[](const size_t bit_idx) {
|
||||
range_checker::lt(bit_idx, this->size());
|
||||
|
||||
const size_type idx_v = bit_idx + this->offset();
|
||||
const size_t idx_v = bit_idx + this->offset();
|
||||
return policy_type::get_proxy(this->data(), idx_v);
|
||||
}
|
||||
|
||||
//
|
||||
inline bool
|
||||
operator[](const size_type bit_idx) const {
|
||||
operator[](const size_t bit_idx) const {
|
||||
range_checker::lt(bit_idx, this->size());
|
||||
|
||||
const size_type idx_v = bit_idx + this->offset();
|
||||
const size_t idx_v = bit_idx + this->offset();
|
||||
const auto proxy = policy_type::get_proxy(this->data(), idx_v);
|
||||
return proxy.operator bool();
|
||||
}
|
||||
@ -180,10 +180,21 @@ class BitsetBase {
|
||||
|
||||
// Set a given bit to a given value.
|
||||
inline void
|
||||
set(const size_type bit_idx, const bool value = true) {
|
||||
set(const size_t bit_idx, const bool value = true) {
|
||||
this->operator[](bit_idx) = value;
|
||||
}
|
||||
|
||||
// Set a given range of [a, b) bits to a given value.
|
||||
inline void
|
||||
set(const size_t bit_idx_start,
|
||||
const size_t size,
|
||||
const bool value = true) {
|
||||
range_checker::le(bit_idx_start + size, this->size());
|
||||
|
||||
policy_type::op_fill(
|
||||
this->data(), this->offset() + bit_idx_start, size, value);
|
||||
}
|
||||
|
||||
// Set all bits to false.
|
||||
inline void
|
||||
reset() {
|
||||
@ -192,10 +203,16 @@ class BitsetBase {
|
||||
|
||||
// Set a given bit to false.
|
||||
inline void
|
||||
reset(const size_type bit_idx) {
|
||||
reset(const size_t bit_idx) {
|
||||
this->operator[](bit_idx) = false;
|
||||
}
|
||||
|
||||
// Set a given range of [a, b) bits to false.
|
||||
inline void
|
||||
reset(const size_t bit_idx_start, const size_t size) {
|
||||
this->set(bit_idx_start, size, false);
|
||||
}
|
||||
|
||||
// Return whether all bits are set to true.
|
||||
inline bool
|
||||
all() const {
|
||||
@ -217,7 +234,7 @@ class BitsetBase {
|
||||
// Inplace and.
|
||||
template <typename I, bool R>
|
||||
inline void
|
||||
inplace_and(const BitsetBase<PolicyT, I, R>& other, const size_type size) {
|
||||
inplace_and(const BitsetBase<PolicyT, I, R>& other, const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
range_checker::le(size, other.size());
|
||||
|
||||
@ -225,6 +242,74 @@ class BitsetBase {
|
||||
this->data(), other.data(), this->offset(), other.offset(), size);
|
||||
}
|
||||
|
||||
template <bool R>
|
||||
inline void
|
||||
inplace_and(const BitsetView<PolicyT, R>* const others,
|
||||
const size_t n_others,
|
||||
const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
for (size_t i = 0; i < n_others; i++) {
|
||||
range_checker::le(size, others[i].size());
|
||||
}
|
||||
|
||||
// pick buffers
|
||||
detail::MaybeVector<const data_type*> tmp_data(n_others);
|
||||
detail::MaybeVector<size_t> tmp_offset(n_others);
|
||||
|
||||
for (size_t i = 0; i < n_others; i++) {
|
||||
tmp_data[i] = others[i].data();
|
||||
tmp_offset[i] = others[i].offset();
|
||||
}
|
||||
|
||||
policy_type::op_and_multiple(this->data(),
|
||||
tmp_data.data(),
|
||||
this->offset(),
|
||||
tmp_offset.data(),
|
||||
n_others,
|
||||
size);
|
||||
}
|
||||
|
||||
template <bool R>
|
||||
inline void
|
||||
inplace_and(const BitsetView<PolicyT, R>* const others,
|
||||
const size_t n_others) {
|
||||
this->inplace_and(others, n_others, this->size());
|
||||
}
|
||||
|
||||
template <typename ContainerT, bool R>
|
||||
inline void
|
||||
inplace_and(const Bitset<PolicyT, ContainerT, R>* const others,
|
||||
const size_t n_others,
|
||||
const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
for (size_t i = 0; i < n_others; i++) {
|
||||
range_checker::le(size, others[i].size());
|
||||
}
|
||||
|
||||
// pick buffers
|
||||
detail::MaybeVector<const data_type*> tmp_data(n_others);
|
||||
detail::MaybeVector<size_t> tmp_offset(n_others);
|
||||
|
||||
for (size_t i = 0; i < n_others; i++) {
|
||||
tmp_data[i] = others[i].data();
|
||||
tmp_offset[i] = others[i].offset();
|
||||
}
|
||||
|
||||
policy_type::op_and_multiple(this->data(),
|
||||
tmp_data.data(),
|
||||
this->offset(),
|
||||
tmp_offset.data(),
|
||||
n_others,
|
||||
size);
|
||||
}
|
||||
|
||||
template <typename ContainerT, bool R>
|
||||
inline void
|
||||
inplace_and(const Bitset<PolicyT, ContainerT, R>* const others,
|
||||
const size_t n_others) {
|
||||
this->inplace_and(others, n_others, this->size());
|
||||
}
|
||||
|
||||
// Inplace and. A given bitset / bitset view is expected to have the same size.
|
||||
template <typename I, bool R>
|
||||
inline ImplT&
|
||||
@ -238,7 +323,7 @@ class BitsetBase {
|
||||
// Inplace or.
|
||||
template <typename I, bool R>
|
||||
inline void
|
||||
inplace_or(const BitsetBase<PolicyT, I, R>& other, const size_type size) {
|
||||
inplace_or(const BitsetBase<PolicyT, I, R>& other, const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
range_checker::le(size, other.size());
|
||||
|
||||
@ -246,6 +331,74 @@ class BitsetBase {
|
||||
this->data(), other.data(), this->offset(), other.offset(), size);
|
||||
}
|
||||
|
||||
template <bool R>
|
||||
inline void
|
||||
inplace_or(const BitsetView<PolicyT, R>* const others,
|
||||
const size_t n_others,
|
||||
const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
for (size_t i = 0; i < n_others; i++) {
|
||||
range_checker::le(size, others[i].size());
|
||||
}
|
||||
|
||||
// pick buffers
|
||||
detail::MaybeVector<const data_type*> tmp_data(n_others);
|
||||
detail::MaybeVector<size_t> tmp_offset(n_others);
|
||||
|
||||
for (size_t i = 0; i < n_others; i++) {
|
||||
tmp_data[i] = others[i].data();
|
||||
tmp_offset[i] = others[i].offset();
|
||||
}
|
||||
|
||||
policy_type::op_or_multiple(this->data(),
|
||||
tmp_data.data(),
|
||||
this->offset(),
|
||||
tmp_offset.data(),
|
||||
n_others,
|
||||
size);
|
||||
}
|
||||
|
||||
template <bool R>
|
||||
inline void
|
||||
inplace_or(const BitsetView<PolicyT, R>* const others,
|
||||
const size_t n_others) {
|
||||
this->inplace_or(others, n_others, this->size());
|
||||
}
|
||||
|
||||
template <typename ContainerT, bool R>
|
||||
inline void
|
||||
inplace_or(const Bitset<PolicyT, ContainerT, R>* const others,
|
||||
const size_t n_others,
|
||||
const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
for (size_t i = 0; i < n_others; i++) {
|
||||
range_checker::le(size, others[i].size());
|
||||
}
|
||||
|
||||
// pick buffers
|
||||
detail::MaybeVector<const data_type*> tmp_data(n_others);
|
||||
detail::MaybeVector<size_t> tmp_offset(n_others);
|
||||
|
||||
for (size_t i = 0; i < n_others; i++) {
|
||||
tmp_data[i] = others[i].data();
|
||||
tmp_offset[i] = others[i].offset();
|
||||
}
|
||||
|
||||
policy_type::op_or_multiple(this->data(),
|
||||
tmp_data.data(),
|
||||
this->offset(),
|
||||
tmp_offset.data(),
|
||||
n_others,
|
||||
size);
|
||||
}
|
||||
|
||||
template <typename ContainerT, bool R>
|
||||
inline void
|
||||
inplace_or(const Bitset<PolicyT, ContainerT, R>* const others,
|
||||
const size_t n_others) {
|
||||
this->inplace_or(others, n_others, this->size());
|
||||
}
|
||||
|
||||
// Inplace or. A given bitset / bitset view is expected to have the same size.
|
||||
template <typename I, bool R>
|
||||
inline ImplT&
|
||||
@ -264,13 +417,13 @@ class BitsetBase {
|
||||
|
||||
//
|
||||
inline BitsetView<PolicyT, IsRangeCheckEnabled>
|
||||
operator+(const size_type offset) {
|
||||
operator+(const size_t offset) {
|
||||
return this->view(offset);
|
||||
}
|
||||
|
||||
// Create a view of a given size from the given position.
|
||||
inline BitsetView<PolicyT, IsRangeCheckEnabled>
|
||||
view(const size_type offset, const size_type size) {
|
||||
view(const size_t offset, const size_t size) {
|
||||
range_checker::le(offset, this->size());
|
||||
range_checker::le(offset + size, this->size());
|
||||
|
||||
@ -280,7 +433,7 @@ class BitsetBase {
|
||||
|
||||
// Create a const view of a given size from the given position.
|
||||
inline BitsetView<PolicyT, IsRangeCheckEnabled>
|
||||
view(const size_type offset, const size_type size) const {
|
||||
view(const size_t offset, const size_t size) const {
|
||||
range_checker::le(offset, this->size());
|
||||
range_checker::le(offset + size, this->size());
|
||||
|
||||
@ -292,7 +445,7 @@ class BitsetBase {
|
||||
|
||||
// Create a view from the given position, which uses all available size.
|
||||
inline BitsetView<PolicyT, IsRangeCheckEnabled>
|
||||
view(const size_type offset) {
|
||||
view(const size_t offset) {
|
||||
range_checker::le(offset, this->size());
|
||||
|
||||
return BitsetView<PolicyT, IsRangeCheckEnabled>(
|
||||
@ -301,7 +454,7 @@ class BitsetBase {
|
||||
|
||||
// Create a const view from the given position, which uses all available size.
|
||||
inline const BitsetView<PolicyT, IsRangeCheckEnabled>
|
||||
view(const size_type offset) const {
|
||||
view(const size_t offset) const {
|
||||
range_checker::le(offset, this->size());
|
||||
|
||||
return BitsetView<PolicyT, IsRangeCheckEnabled>(
|
||||
@ -323,7 +476,7 @@ class BitsetBase {
|
||||
}
|
||||
|
||||
// Return the number of bits which are set to true.
|
||||
inline size_type
|
||||
inline size_t
|
||||
count() const {
|
||||
return policy_type::op_count(
|
||||
this->data(), this->offset(), this->size());
|
||||
@ -354,7 +507,7 @@ class BitsetBase {
|
||||
// Inplace xor.
|
||||
template <typename I, bool R>
|
||||
inline void
|
||||
inplace_xor(const BitsetBase<PolicyT, I, R>& other, const size_type size) {
|
||||
inplace_xor(const BitsetBase<PolicyT, I, R>& other, const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
range_checker::le(size, other.size());
|
||||
|
||||
@ -375,7 +528,7 @@ class BitsetBase {
|
||||
// Inplace sub.
|
||||
template <typename I, bool R>
|
||||
inline void
|
||||
inplace_sub(const BitsetBase<PolicyT, I, R>& other, const size_type size) {
|
||||
inplace_sub(const BitsetBase<PolicyT, I, R>& other, const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
range_checker::le(size, other.size());
|
||||
|
||||
@ -394,16 +547,16 @@ class BitsetBase {
|
||||
}
|
||||
|
||||
// Find the index of the first bit set to true.
|
||||
inline std::optional<size_type>
|
||||
inline std::optional<size_t>
|
||||
find_first() const {
|
||||
return policy_type::op_find(
|
||||
this->data(), this->offset(), this->size(), 0);
|
||||
}
|
||||
|
||||
// Find the index of the first bit set to true, starting from a given bit index.
|
||||
inline std::optional<size_type>
|
||||
find_next(const size_type starting_bit_idx) const {
|
||||
const size_type size_v = this->size();
|
||||
inline std::optional<size_t>
|
||||
find_next(const size_t starting_bit_idx) const {
|
||||
const size_t size_v = this->size();
|
||||
if (starting_bit_idx + 1 >= size_v) {
|
||||
return std::nullopt;
|
||||
}
|
||||
@ -414,7 +567,7 @@ class BitsetBase {
|
||||
|
||||
// Read multiple bits starting from a given bit index.
|
||||
inline data_type
|
||||
read(const size_type starting_bit_idx, const size_type nbits) {
|
||||
read(const size_t starting_bit_idx, const size_t nbits) {
|
||||
range_checker::le(nbits, sizeof(data_type));
|
||||
|
||||
return policy_type::op_read(
|
||||
@ -423,9 +576,9 @@ class BitsetBase {
|
||||
|
||||
// Write multiple bits starting from a given bit index.
|
||||
inline void
|
||||
write(const size_type starting_bit_idx,
|
||||
write(const size_t starting_bit_idx,
|
||||
const data_type value,
|
||||
const size_type nbits) {
|
||||
const size_t nbits) {
|
||||
range_checker::le(nbits, sizeof(data_type));
|
||||
|
||||
policy_type::op_write(
|
||||
@ -437,7 +590,7 @@ class BitsetBase {
|
||||
void
|
||||
inplace_compare_column(const T* const __restrict t,
|
||||
const U* const __restrict u,
|
||||
const size_type size,
|
||||
const size_t size,
|
||||
CompareOpType op) {
|
||||
if (op == CompareOpType::EQ) {
|
||||
this->inplace_compare_column<T, U, CompareOpType::EQ>(t, u, size);
|
||||
@ -460,7 +613,7 @@ class BitsetBase {
|
||||
void
|
||||
inplace_compare_column(const T* const __restrict t,
|
||||
const U* const __restrict u,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
|
||||
policy_type::template op_compare_column<T, U, Op>(
|
||||
@ -471,7 +624,7 @@ class BitsetBase {
|
||||
template <typename T>
|
||||
void
|
||||
inplace_compare_val(const T* const __restrict t,
|
||||
const size_type size,
|
||||
const size_t size,
|
||||
const T& value,
|
||||
CompareOpType op) {
|
||||
if (op == CompareOpType::EQ) {
|
||||
@ -494,7 +647,7 @@ class BitsetBase {
|
||||
template <typename T, CompareOpType Op>
|
||||
void
|
||||
inplace_compare_val(const T* const __restrict t,
|
||||
const size_type size,
|
||||
const size_t size,
|
||||
const T& value) {
|
||||
range_checker::le(size, this->size());
|
||||
|
||||
@ -508,7 +661,7 @@ class BitsetBase {
|
||||
inplace_within_range_column(const T* const __restrict lower,
|
||||
const T* const __restrict upper,
|
||||
const T* const __restrict values,
|
||||
const size_type size,
|
||||
const size_t size,
|
||||
const RangeType op) {
|
||||
if (op == RangeType::IncInc) {
|
||||
this->inplace_within_range_column<T, RangeType::IncInc>(
|
||||
@ -532,7 +685,7 @@ class BitsetBase {
|
||||
inplace_within_range_column(const T* const __restrict lower,
|
||||
const T* const __restrict upper,
|
||||
const T* const __restrict values,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
|
||||
policy_type::template op_within_range_column<T, Op>(
|
||||
@ -545,7 +698,7 @@ class BitsetBase {
|
||||
inplace_within_range_val(const T& lower,
|
||||
const T& upper,
|
||||
const T* const __restrict values,
|
||||
const size_type size,
|
||||
const size_t size,
|
||||
const RangeType op) {
|
||||
if (op == RangeType::IncInc) {
|
||||
this->inplace_within_range_val<T, RangeType::IncInc>(
|
||||
@ -569,7 +722,7 @@ class BitsetBase {
|
||||
inplace_within_range_val(const T& lower,
|
||||
const T& upper,
|
||||
const T* const __restrict values,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
|
||||
policy_type::template op_within_range_val<T, Op>(
|
||||
@ -582,7 +735,7 @@ class BitsetBase {
|
||||
inplace_arith_compare(const T* const __restrict src,
|
||||
const ArithHighPrecisionType<T>& right_operand,
|
||||
const ArithHighPrecisionType<T>& value,
|
||||
const size_type size,
|
||||
const size_t size,
|
||||
const ArithOpType a_op,
|
||||
const CompareOpType cmp_op) {
|
||||
if (a_op == ArithOpType::Add) {
|
||||
@ -765,7 +918,7 @@ class BitsetBase {
|
||||
inplace_arith_compare(const T* const __restrict src,
|
||||
const ArithHighPrecisionType<T>& right_operand,
|
||||
const ArithHighPrecisionType<T>& value,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
|
||||
policy_type::template op_arith_compare<T, AOp, CmpOp>(
|
||||
@ -775,9 +928,9 @@ class BitsetBase {
|
||||
//
|
||||
// Inplace and. Also, counts the number of active bits.
|
||||
template <typename I, bool R>
|
||||
inline size_type
|
||||
inline size_t
|
||||
inplace_and_with_count(const BitsetBase<PolicyT, I, R>& other,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
range_checker::le(size, other.size());
|
||||
|
||||
@ -787,9 +940,9 @@ class BitsetBase {
|
||||
|
||||
// Inplace or. Also, counts the number of inactive bits.
|
||||
template <typename I, bool R>
|
||||
inline size_type
|
||||
inline size_t
|
||||
inplace_or_with_count(const BitsetBase<PolicyT, I, R>& other,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
range_checker::le(size, this->size());
|
||||
range_checker::le(size, other.size());
|
||||
|
||||
@ -798,7 +951,7 @@ class BitsetBase {
|
||||
}
|
||||
|
||||
// Return the starting bit offset in our container.
|
||||
inline size_type
|
||||
inline size_t
|
||||
offset() const {
|
||||
return as_derived().offset_impl();
|
||||
}
|
||||
@ -829,7 +982,6 @@ class BitsetView : public BitsetBase<PolicyT,
|
||||
public:
|
||||
using policy_type = PolicyT;
|
||||
using data_type = typename policy_type::data_type;
|
||||
using size_type = typename policy_type::size_type;
|
||||
using proxy_type = typename policy_type::proxy_type;
|
||||
using const_proxy_type = typename policy_type::const_proxy_type;
|
||||
|
||||
@ -849,11 +1001,11 @@ class BitsetView : public BitsetBase<PolicyT,
|
||||
: Data{bitset.data()}, Size{bitset.size()}, Offset{bitset.offset()} {
|
||||
}
|
||||
|
||||
BitsetView(void* data, const size_type size)
|
||||
BitsetView(void* data, const size_t size)
|
||||
: Data{reinterpret_cast<data_type*>(data)}, Size{size}, Offset{0} {
|
||||
}
|
||||
|
||||
BitsetView(void* data, const size_type offset, const size_type size)
|
||||
BitsetView(void* data, const size_t offset, const size_t size)
|
||||
: Data{reinterpret_cast<data_type*>(data)}, Size{size}, Offset{offset} {
|
||||
}
|
||||
|
||||
@ -861,9 +1013,9 @@ class BitsetView : public BitsetBase<PolicyT,
|
||||
// the referenced bits are [Offset, Offset + Size)
|
||||
data_type* Data = nullptr;
|
||||
// measured in bits
|
||||
size_type Size = 0;
|
||||
size_t Size = 0;
|
||||
// measured in bits
|
||||
size_type Offset = 0;
|
||||
size_t Offset = 0;
|
||||
|
||||
inline data_type*
|
||||
data_impl() {
|
||||
@ -873,11 +1025,11 @@ class BitsetView : public BitsetBase<PolicyT,
|
||||
data_impl() const {
|
||||
return Data;
|
||||
}
|
||||
inline size_type
|
||||
inline size_t
|
||||
size_impl() const {
|
||||
return Size;
|
||||
}
|
||||
inline size_type
|
||||
inline size_t
|
||||
offset_impl() const {
|
||||
return Offset;
|
||||
}
|
||||
@ -896,10 +1048,11 @@ class Bitset
|
||||
public:
|
||||
using policy_type = PolicyT;
|
||||
using data_type = typename policy_type::data_type;
|
||||
using size_type = typename policy_type::size_type;
|
||||
using proxy_type = typename policy_type::proxy_type;
|
||||
using const_proxy_type = typename policy_type::const_proxy_type;
|
||||
|
||||
using view_type = BitsetView<PolicyT, IsRangeCheckEnabled>;
|
||||
|
||||
// This is the container type.
|
||||
using container_type = ContainerT;
|
||||
// This is how the data is stored. For example, we may operate using
|
||||
@ -914,11 +1067,11 @@ class Bitset
|
||||
Bitset() {
|
||||
}
|
||||
// Allocate the given number of bits.
|
||||
Bitset(const size_type size)
|
||||
Bitset(const size_t size)
|
||||
: Data(get_required_size_in_container_elements(size)), Size{size} {
|
||||
}
|
||||
// Allocate the given number of bits, initialize with a given value.
|
||||
Bitset(const size_type size, const bool init)
|
||||
Bitset(const size_t size, const bool init)
|
||||
: Data(get_required_size_in_container_elements(size),
|
||||
init ? data_type(-1) : 0),
|
||||
Size{size} {
|
||||
@ -964,8 +1117,8 @@ class Bitset
|
||||
|
||||
// Resize.
|
||||
void
|
||||
resize(const size_type new_size) {
|
||||
const size_type new_size_in_container_elements =
|
||||
resize(const size_t new_size) {
|
||||
const size_t new_size_in_container_elements =
|
||||
get_required_size_in_container_elements(new_size);
|
||||
Data.resize(new_size_in_container_elements);
|
||||
Size = new_size;
|
||||
@ -973,8 +1126,8 @@ class Bitset
|
||||
|
||||
// Resize and initialize new bits with a given value if grown.
|
||||
void
|
||||
resize(const size_type new_size, const bool init) {
|
||||
const size_type old_size = this->size();
|
||||
resize(const size_t new_size, const bool init) {
|
||||
const size_t old_size = this->size();
|
||||
this->resize(new_size);
|
||||
|
||||
if (new_size > old_size) {
|
||||
@ -989,11 +1142,11 @@ class Bitset
|
||||
template <typename I, bool R>
|
||||
void
|
||||
append(const BitsetBase<PolicyT, I, R>& other,
|
||||
const size_type starting_bit_idx,
|
||||
const size_type count) {
|
||||
const size_t starting_bit_idx,
|
||||
const size_t count) {
|
||||
range_checker::le(starting_bit_idx, other.size());
|
||||
|
||||
const size_type old_size = this->size();
|
||||
const size_t old_size = this->size();
|
||||
this->resize(this->size() + count);
|
||||
|
||||
policy_type::op_copy(other.data(),
|
||||
@ -1020,8 +1173,8 @@ class Bitset
|
||||
|
||||
// Reserve
|
||||
inline void
|
||||
reserve(const size_type capacity) {
|
||||
const size_type capacity_in_container_elements =
|
||||
reserve(const size_t capacity) {
|
||||
const size_t capacity_in_container_elements =
|
||||
get_required_size_in_container_elements(capacity);
|
||||
Data.reserve(capacity_in_container_elements);
|
||||
}
|
||||
@ -1048,7 +1201,7 @@ class Bitset
|
||||
// the container
|
||||
container_type Data;
|
||||
// the actual number of bits
|
||||
size_type Size = 0;
|
||||
size_t Size = 0;
|
||||
|
||||
inline data_type*
|
||||
data_impl() {
|
||||
@ -1058,19 +1211,19 @@ class Bitset
|
||||
data_impl() const {
|
||||
return reinterpret_cast<const data_type*>(Data.data());
|
||||
}
|
||||
inline size_type
|
||||
inline size_t
|
||||
size_impl() const {
|
||||
return Size;
|
||||
}
|
||||
inline size_type
|
||||
inline size_t
|
||||
offset_impl() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
//
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_required_size_in_container_elements(const size_t size) {
|
||||
const size_type size_in_bytes =
|
||||
const size_t size_in_bytes =
|
||||
policy_type::get_required_size_in_bytes(size);
|
||||
return (size_in_bytes + sizeof(container_data_type) - 1) /
|
||||
sizeof(container_data_type);
|
||||
|
@ -27,6 +27,19 @@ namespace bitset {
|
||||
// this option is only somewhat supported
|
||||
// #define BITSET_HEADER_ONLY
|
||||
|
||||
// `always inline` hint.
|
||||
// It is introduced to deal with clang's behavior to reuse
|
||||
// once generated code. But if it is needed to generate
|
||||
// different machine code for multiple platforms based on
|
||||
// a single template, then such a behavior is undesired.
|
||||
// `always inline` is applied for PolicyT methods. It is fine,
|
||||
// because they are not used directly and are wrapped
|
||||
// in BitsetBase methods. So, a compiler may decide whether
|
||||
// to really inline them, but it forces a compiler to
|
||||
// generate specialized code for every hardward platform.
|
||||
// todo: MSVC has its own way to define `always inline`.
|
||||
#define BITSET_ALWAYS_INLINE __attribute__((always_inline))
|
||||
|
||||
// a supporting utility
|
||||
template <class>
|
||||
inline constexpr bool always_false_v = false;
|
||||
|
@ -32,55 +32,53 @@ namespace detail {
|
||||
template <typename ElementT>
|
||||
struct BitWiseBitsetPolicy {
|
||||
using data_type = ElementT;
|
||||
constexpr static auto data_bits = sizeof(data_type) * 8;
|
||||
|
||||
using size_type = size_t;
|
||||
constexpr static size_t data_bits = sizeof(data_type) * 8;
|
||||
|
||||
using self_type = BitWiseBitsetPolicy<ElementT>;
|
||||
|
||||
using proxy_type = Proxy<self_type>;
|
||||
using const_proxy_type = ConstProxy<self_type>;
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_element(const size_t idx) {
|
||||
return idx / data_bits;
|
||||
}
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_shift(const size_t idx) {
|
||||
return idx % data_bits;
|
||||
}
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_required_size_in_elements(const size_t size) {
|
||||
return (size + data_bits - 1) / data_bits;
|
||||
}
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_required_size_in_bytes(const size_t size) {
|
||||
return get_required_size_in_elements(size) * sizeof(data_type);
|
||||
}
|
||||
|
||||
static inline proxy_type
|
||||
get_proxy(data_type* const __restrict data, const size_type idx) {
|
||||
get_proxy(data_type* const __restrict data, const size_t idx) {
|
||||
data_type& element = data[get_element(idx)];
|
||||
const size_type shift = get_shift(idx);
|
||||
const size_t shift = get_shift(idx);
|
||||
return proxy_type{element, shift};
|
||||
}
|
||||
|
||||
static inline const_proxy_type
|
||||
get_proxy(const data_type* const __restrict data, const size_type idx) {
|
||||
get_proxy(const data_type* const __restrict data, const size_t idx) {
|
||||
const data_type& element = data[get_element(idx)];
|
||||
const size_type shift = get_shift(idx);
|
||||
const size_t shift = get_shift(idx);
|
||||
return const_proxy_type{element, shift};
|
||||
}
|
||||
|
||||
static inline data_type
|
||||
op_read(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type nbits) {
|
||||
const size_t start,
|
||||
const size_t nbits) {
|
||||
data_type value = 0;
|
||||
for (size_type i = 0; i < nbits; i++) {
|
||||
for (size_t i = 0; i < nbits; i++) {
|
||||
const auto proxy = get_proxy(data, start + i);
|
||||
value += proxy ? (data_type(1) << i) : 0;
|
||||
}
|
||||
@ -90,10 +88,10 @@ struct BitWiseBitsetPolicy {
|
||||
|
||||
static void
|
||||
op_write(data_type* const data,
|
||||
const size_type start,
|
||||
const size_type nbits,
|
||||
const size_t start,
|
||||
const size_t nbits,
|
||||
const data_type value) {
|
||||
for (size_type i = 0; i < nbits; i++) {
|
||||
for (size_t i = 0; i < nbits; i++) {
|
||||
auto proxy = get_proxy(data, start + i);
|
||||
data_type mask = data_type(1) << i;
|
||||
if ((value & mask) == mask) {
|
||||
@ -105,10 +103,8 @@ struct BitWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_flip(data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
op_flip(data_type* const data, const size_t start, const size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
auto proxy = get_proxy(data, start + i);
|
||||
proxy.flip();
|
||||
}
|
||||
@ -122,7 +118,7 @@ struct BitWiseBitsetPolicy {
|
||||
const size_t size) {
|
||||
// todo: check if intersect
|
||||
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
auto proxy_left = get_proxy(left, start_left + i);
|
||||
auto proxy_right = get_proxy(right, start_right + i);
|
||||
|
||||
@ -130,6 +126,27 @@ struct BitWiseBitsetPolicy {
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_and_multiple(data_type* const left,
|
||||
const data_type* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
auto proxy_left = get_proxy(left, start_left + i);
|
||||
|
||||
bool value = proxy_left;
|
||||
for (size_t j = 0; j < n_rights; j++) {
|
||||
auto proxy_right = get_proxy(rights[j], start_rights[j] + i);
|
||||
|
||||
value &= proxy_right;
|
||||
}
|
||||
|
||||
proxy_left = value;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_or(data_type* const left,
|
||||
const data_type* const right,
|
||||
@ -138,7 +155,7 @@ struct BitWiseBitsetPolicy {
|
||||
const size_t size) {
|
||||
// todo: check if intersect
|
||||
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
auto proxy_left = get_proxy(left, start_left + i);
|
||||
auto proxy_right = get_proxy(right, start_right + i);
|
||||
|
||||
@ -147,26 +164,43 @@ struct BitWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_set(data_type* const data, const size_type start, const size_type size) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
op_or_multiple(data_type* const left,
|
||||
const data_type* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
auto proxy_left = get_proxy(left, start_left + i);
|
||||
|
||||
bool value = proxy_left;
|
||||
for (size_t j = 0; j < n_rights; j++) {
|
||||
auto proxy_right = get_proxy(rights[j], start_rights[j] + i);
|
||||
|
||||
value |= proxy_right;
|
||||
}
|
||||
|
||||
proxy_left = value;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_set(data_type* const data, const size_t start, const size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
get_proxy(data, start + i) = true;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_reset(data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
op_reset(data_type* const data, const size_t start, const size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
get_proxy(data, start + i) = false;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_all(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
op_all(const data_type* const data, const size_t start, const size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
if (!get_proxy(data, start + i)) {
|
||||
return false;
|
||||
}
|
||||
@ -177,9 +211,9 @@ struct BitWiseBitsetPolicy {
|
||||
|
||||
static inline bool
|
||||
op_none(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
const size_t start,
|
||||
const size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
if (get_proxy(data, start + i)) {
|
||||
return false;
|
||||
}
|
||||
@ -190,11 +224,11 @@ struct BitWiseBitsetPolicy {
|
||||
|
||||
static void
|
||||
op_copy(const data_type* const src,
|
||||
const size_type start_src,
|
||||
const size_t start_src,
|
||||
data_type* const dst,
|
||||
const size_type start_dst,
|
||||
const size_type size) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
const size_t start_dst,
|
||||
const size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
const auto src_p = get_proxy(src, start_src + i);
|
||||
auto dst_p = get_proxy(dst, start_dst + i);
|
||||
dst_p = src_p.operator bool();
|
||||
@ -203,22 +237,22 @@ struct BitWiseBitsetPolicy {
|
||||
|
||||
static void
|
||||
op_fill(data_type* const dst,
|
||||
const size_type start_dst,
|
||||
const size_type size,
|
||||
const size_t start_dst,
|
||||
const size_t size,
|
||||
const bool value) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
auto dst_p = get_proxy(dst, start_dst + i);
|
||||
dst_p = value;
|
||||
}
|
||||
}
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
op_count(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
size_type count = 0;
|
||||
const size_t start,
|
||||
const size_t size) {
|
||||
size_t count = 0;
|
||||
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
auto proxy = get_proxy(data, start + i);
|
||||
count += (proxy) ? 1 : 0;
|
||||
}
|
||||
@ -232,7 +266,7 @@ struct BitWiseBitsetPolicy {
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
const auto proxy_left = get_proxy(left, start_left + i);
|
||||
const auto proxy_right = get_proxy(right, start_right + i);
|
||||
|
||||
@ -252,7 +286,7 @@ struct BitWiseBitsetPolicy {
|
||||
const size_t size) {
|
||||
// todo: check if intersect
|
||||
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
auto proxy_left = get_proxy(left, start_left + i);
|
||||
const auto proxy_right = get_proxy(right, start_right + i);
|
||||
|
||||
@ -268,7 +302,7 @@ struct BitWiseBitsetPolicy {
|
||||
const size_t size) {
|
||||
// todo: check if intersect
|
||||
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
auto proxy_left = get_proxy(left, start_left + i);
|
||||
const auto proxy_right = get_proxy(right, start_right + i);
|
||||
|
||||
@ -277,12 +311,12 @@ struct BitWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
//
|
||||
static inline std::optional<size_type>
|
||||
static inline std::optional<size_t>
|
||||
op_find(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size,
|
||||
const size_type starting_idx) {
|
||||
for (size_type i = starting_idx; i < size; i++) {
|
||||
const size_t start,
|
||||
const size_t size,
|
||||
const size_t starting_idx) {
|
||||
for (size_t i = starting_idx; i < size; i++) {
|
||||
const auto proxy = get_proxy(data, start + i);
|
||||
if (proxy) {
|
||||
return i;
|
||||
@ -296,11 +330,11 @@ struct BitWiseBitsetPolicy {
|
||||
template <typename T, typename U, CompareOpType Op>
|
||||
static inline void
|
||||
op_compare_column(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T* const __restrict t,
|
||||
const U* const __restrict u,
|
||||
const size_type size) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
const size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
get_proxy(data, start + i) =
|
||||
CompareOperator<Op>::compare(t[i], u[i]);
|
||||
}
|
||||
@ -310,11 +344,11 @@ struct BitWiseBitsetPolicy {
|
||||
template <typename T, CompareOpType Op>
|
||||
static inline void
|
||||
op_compare_val(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T* const __restrict t,
|
||||
const size_type size,
|
||||
const size_t size,
|
||||
const T& value) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
get_proxy(data, start + i) =
|
||||
CompareOperator<Op>::compare(t[i], value);
|
||||
}
|
||||
@ -323,12 +357,12 @@ struct BitWiseBitsetPolicy {
|
||||
template <typename T, RangeType Op>
|
||||
static inline void
|
||||
op_within_range_column(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T* const __restrict lower,
|
||||
const T* const __restrict upper,
|
||||
const T* const __restrict values,
|
||||
const size_type size) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
const size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
get_proxy(data, start + i) =
|
||||
RangeOperator<Op>::within_range(lower[i], upper[i], values[i]);
|
||||
}
|
||||
@ -338,12 +372,12 @@ struct BitWiseBitsetPolicy {
|
||||
template <typename T, RangeType Op>
|
||||
static inline void
|
||||
op_within_range_val(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T& lower,
|
||||
const T& upper,
|
||||
const T* const __restrict values,
|
||||
const size_type size) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
const size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
get_proxy(data, start + i) =
|
||||
RangeOperator<Op>::within_range(lower, upper, values[i]);
|
||||
}
|
||||
@ -353,12 +387,12 @@ struct BitWiseBitsetPolicy {
|
||||
template <typename T, ArithOpType AOp, CompareOpType CmpOp>
|
||||
static inline void
|
||||
op_arith_compare(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T* const __restrict src,
|
||||
const ArithHighPrecisionType<T>& right_operand,
|
||||
const ArithHighPrecisionType<T>& value,
|
||||
const size_type size) {
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
const size_t size) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
get_proxy(data, start + i) =
|
||||
ArithCompareOperator<AOp, CmpOp>::compare(
|
||||
src[i], right_operand, value);
|
||||
@ -375,7 +409,7 @@ struct BitWiseBitsetPolicy {
|
||||
// todo: check if intersect
|
||||
|
||||
size_t active = 0;
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
auto proxy_left = get_proxy(left, start_left + i);
|
||||
auto proxy_right = get_proxy(right, start_right + i);
|
||||
|
||||
@ -397,7 +431,7 @@ struct BitWiseBitsetPolicy {
|
||||
// todo: check if intersect
|
||||
|
||||
size_t inactive = 0;
|
||||
for (size_type i = 0; i < size; i++) {
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
auto proxy_left = get_proxy(left, start_left + i);
|
||||
auto proxy_right = get_proxy(right, start_right + i);
|
||||
|
||||
|
@ -32,53 +32,49 @@ namespace detail {
|
||||
template <typename ElementT, typename VectorizedT>
|
||||
struct VectorizedElementWiseBitsetPolicy {
|
||||
using data_type = ElementT;
|
||||
constexpr static auto data_bits = sizeof(data_type) * 8;
|
||||
|
||||
using size_type = size_t;
|
||||
constexpr static size_t data_bits = sizeof(data_type) * 8;
|
||||
|
||||
using self_type = VectorizedElementWiseBitsetPolicy<ElementT, VectorizedT>;
|
||||
|
||||
using proxy_type = Proxy<self_type>;
|
||||
using const_proxy_type = ConstProxy<self_type>;
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_element(const size_t idx) {
|
||||
return idx / data_bits;
|
||||
}
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_shift(const size_t idx) {
|
||||
return idx % data_bits;
|
||||
}
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_required_size_in_elements(const size_t size) {
|
||||
return (size + data_bits - 1) / data_bits;
|
||||
}
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_required_size_in_bytes(const size_t size) {
|
||||
return get_required_size_in_elements(size) * sizeof(data_type);
|
||||
}
|
||||
|
||||
static inline proxy_type
|
||||
get_proxy(data_type* const __restrict data, const size_type idx) {
|
||||
get_proxy(data_type* const __restrict data, const size_t idx) {
|
||||
data_type& element = data[get_element(idx)];
|
||||
const size_type shift = get_shift(idx);
|
||||
const size_t shift = get_shift(idx);
|
||||
return proxy_type{element, shift};
|
||||
}
|
||||
|
||||
static inline const_proxy_type
|
||||
get_proxy(const data_type* const __restrict data, const size_type idx) {
|
||||
get_proxy(const data_type* const __restrict data, const size_t idx) {
|
||||
const data_type& element = data[get_element(idx)];
|
||||
const size_type shift = get_shift(idx);
|
||||
const size_t shift = get_shift(idx);
|
||||
return const_proxy_type{element, shift};
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_flip(data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
op_flip(data_type* const data, const size_t start, const size_t size) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_flip(data, start, size);
|
||||
}
|
||||
|
||||
@ -88,8 +84,25 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_and(
|
||||
left, right, start_left, start_right, size);
|
||||
if (!VectorizedT::template forward_op_and<ElementT>(
|
||||
left, right, start_left, start_right, size)) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_and(
|
||||
left, right, start_left, start_right, size);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_and_multiple(data_type* const left,
|
||||
const data_type* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
if (!VectorizedT::template forward_op_and_multiple<ElementT>(
|
||||
left, rights, start_left, start_rights, n_rights, size)) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_and_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -98,59 +111,72 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_or(
|
||||
left, right, start_left, start_right, size);
|
||||
if (!VectorizedT::template forward_op_or<ElementT>(
|
||||
left, right, start_left, start_right, size)) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_or(
|
||||
left, right, start_left, start_right, size);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_set(data_type* const data, const size_type start, const size_type size) {
|
||||
op_or_multiple(data_type* const left,
|
||||
const data_type* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
if (!VectorizedT::template forward_op_or_multiple<ElementT>(
|
||||
left, rights, start_left, start_rights, n_rights, size)) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_or_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_set(data_type* const data, const size_t start, const size_t size) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_set(data, start, size);
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_reset(data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
op_reset(data_type* const data, const size_t start, const size_t size) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_reset(data, start, size);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_all(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
op_all(const data_type* const data, const size_t start, const size_t size) {
|
||||
return ElementWiseBitsetPolicy<ElementT>::op_all(data, start, size);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_none(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
const size_t start,
|
||||
const size_t size) {
|
||||
return ElementWiseBitsetPolicy<ElementT>::op_none(data, start, size);
|
||||
}
|
||||
|
||||
static void
|
||||
op_copy(const data_type* const src,
|
||||
const size_type start_src,
|
||||
const size_t start_src,
|
||||
data_type* const dst,
|
||||
const size_type start_dst,
|
||||
const size_type size) {
|
||||
const size_t start_dst,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_copy(
|
||||
src, start_src, dst, start_dst, size);
|
||||
}
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
op_count(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
const size_t start,
|
||||
const size_t size) {
|
||||
return ElementWiseBitsetPolicy<ElementT>::op_count(data, start, size);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_eq(const data_type* const left,
|
||||
const data_type* const right,
|
||||
const size_type start_left,
|
||||
const size_type start_right,
|
||||
const size_type size) {
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return ElementWiseBitsetPolicy<ElementT>::op_eq(
|
||||
left, right, start_left, start_right, size);
|
||||
}
|
||||
@ -161,8 +187,11 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_xor(
|
||||
left, right, start_left, start_right, size);
|
||||
if (!VectorizedT::template forward_op_xor<ElementT>(
|
||||
left, right, start_left, start_right, size)) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_xor(
|
||||
left, right, start_left, start_right, size);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
@ -171,24 +200,27 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_sub(
|
||||
left, right, start_left, start_right, size);
|
||||
if (!VectorizedT::template forward_op_sub<ElementT>(
|
||||
left, right, start_left, start_right, size)) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_sub(
|
||||
left, right, start_left, start_right, size);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
op_fill(data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size,
|
||||
const size_t start,
|
||||
const size_t size,
|
||||
const bool value) {
|
||||
ElementWiseBitsetPolicy<ElementT>::op_fill(data, start, size, value);
|
||||
}
|
||||
|
||||
//
|
||||
static inline std::optional<size_type>
|
||||
static inline std::optional<size_t>
|
||||
op_find(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size,
|
||||
const size_type starting_idx) {
|
||||
const size_t start,
|
||||
const size_t size,
|
||||
const size_t starting_idx) {
|
||||
return ElementWiseBitsetPolicy<ElementT>::op_find(
|
||||
data, start, size, starting_idx);
|
||||
}
|
||||
@ -197,16 +229,16 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
template <typename T, typename U, CompareOpType Op>
|
||||
static inline void
|
||||
op_compare_column(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T* const __restrict t,
|
||||
const U* const __restrict u,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
op_func(
|
||||
start,
|
||||
size,
|
||||
[data, t, u](const size_type starting_bit,
|
||||
const size_type ptr_offset,
|
||||
const size_type nbits) {
|
||||
[data, t, u](const size_t starting_bit,
|
||||
const size_t ptr_offset,
|
||||
const size_t nbits) {
|
||||
ElementWiseBitsetPolicy<ElementT>::
|
||||
template op_compare_column<T, U, Op>(data,
|
||||
starting_bit,
|
||||
@ -214,9 +246,9 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
u + ptr_offset,
|
||||
nbits);
|
||||
},
|
||||
[data, t, u](const size_type starting_element,
|
||||
const size_type ptr_offset,
|
||||
const size_type nbits) {
|
||||
[data, t, u](const size_t starting_element,
|
||||
const size_t ptr_offset,
|
||||
const size_t nbits) {
|
||||
return VectorizedT::template op_compare_column<T, U, Op>(
|
||||
reinterpret_cast<uint8_t*>(data + starting_element),
|
||||
t + ptr_offset,
|
||||
@ -229,23 +261,23 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
template <typename T, CompareOpType Op>
|
||||
static inline void
|
||||
op_compare_val(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T* const __restrict t,
|
||||
const size_type size,
|
||||
const size_t size,
|
||||
const T& value) {
|
||||
op_func(
|
||||
start,
|
||||
size,
|
||||
[data, t, value](const size_type starting_bit,
|
||||
const size_type ptr_offset,
|
||||
const size_type nbits) {
|
||||
[data, t, value](const size_t starting_bit,
|
||||
const size_t ptr_offset,
|
||||
const size_t nbits) {
|
||||
ElementWiseBitsetPolicy<ElementT>::template op_compare_val<T,
|
||||
Op>(
|
||||
data, starting_bit, t + ptr_offset, nbits, value);
|
||||
},
|
||||
[data, t, value](const size_type starting_element,
|
||||
const size_type ptr_offset,
|
||||
const size_type nbits) {
|
||||
[data, t, value](const size_t starting_element,
|
||||
const size_t ptr_offset,
|
||||
const size_t nbits) {
|
||||
return VectorizedT::template op_compare_val<T, Op>(
|
||||
reinterpret_cast<uint8_t*>(data + starting_element),
|
||||
t + ptr_offset,
|
||||
@ -258,17 +290,17 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
template <typename T, RangeType Op>
|
||||
static inline void
|
||||
op_within_range_column(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T* const __restrict lower,
|
||||
const T* const __restrict upper,
|
||||
const T* const __restrict values,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
op_func(
|
||||
start,
|
||||
size,
|
||||
[data, lower, upper, values](const size_type starting_bit,
|
||||
const size_type ptr_offset,
|
||||
const size_type nbits) {
|
||||
[data, lower, upper, values](const size_t starting_bit,
|
||||
const size_t ptr_offset,
|
||||
const size_t nbits) {
|
||||
ElementWiseBitsetPolicy<ElementT>::
|
||||
template op_within_range_column<T, Op>(data,
|
||||
starting_bit,
|
||||
@ -277,9 +309,9 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
values + ptr_offset,
|
||||
nbits);
|
||||
},
|
||||
[data, lower, upper, values](const size_type starting_element,
|
||||
const size_type ptr_offset,
|
||||
const size_type nbits) {
|
||||
[data, lower, upper, values](const size_t starting_element,
|
||||
const size_t ptr_offset,
|
||||
const size_t nbits) {
|
||||
return VectorizedT::template op_within_range_column<T, Op>(
|
||||
reinterpret_cast<uint8_t*>(data + starting_element),
|
||||
lower + ptr_offset,
|
||||
@ -293,17 +325,17 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
template <typename T, RangeType Op>
|
||||
static inline void
|
||||
op_within_range_val(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T& lower,
|
||||
const T& upper,
|
||||
const T* const __restrict values,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
op_func(
|
||||
start,
|
||||
size,
|
||||
[data, lower, upper, values](const size_type starting_bit,
|
||||
const size_type ptr_offset,
|
||||
const size_type nbits) {
|
||||
[data, lower, upper, values](const size_t starting_bit,
|
||||
const size_t ptr_offset,
|
||||
const size_t nbits) {
|
||||
ElementWiseBitsetPolicy<ElementT>::
|
||||
template op_within_range_val<T, Op>(data,
|
||||
starting_bit,
|
||||
@ -312,9 +344,9 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
values + ptr_offset,
|
||||
nbits);
|
||||
},
|
||||
[data, lower, upper, values](const size_type starting_element,
|
||||
const size_type ptr_offset,
|
||||
const size_type nbits) {
|
||||
[data, lower, upper, values](const size_t starting_element,
|
||||
const size_t ptr_offset,
|
||||
const size_t nbits) {
|
||||
return VectorizedT::template op_within_range_val<T, Op>(
|
||||
reinterpret_cast<uint8_t*>(data + starting_element),
|
||||
lower,
|
||||
@ -328,17 +360,17 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
template <typename T, ArithOpType AOp, CompareOpType CmpOp>
|
||||
static inline void
|
||||
op_arith_compare(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T* const __restrict src,
|
||||
const ArithHighPrecisionType<T>& right_operand,
|
||||
const ArithHighPrecisionType<T>& value,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
op_func(
|
||||
start,
|
||||
size,
|
||||
[data, src, right_operand, value](const size_type starting_bit,
|
||||
const size_type ptr_offset,
|
||||
const size_type nbits) {
|
||||
[data, src, right_operand, value](const size_t starting_bit,
|
||||
const size_t ptr_offset,
|
||||
const size_t nbits) {
|
||||
ElementWiseBitsetPolicy<ElementT>::
|
||||
template op_arith_compare<T, AOp, CmpOp>(data,
|
||||
starting_bit,
|
||||
@ -347,9 +379,9 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
value,
|
||||
nbits);
|
||||
},
|
||||
[data, src, right_operand, value](const size_type starting_element,
|
||||
const size_type ptr_offset,
|
||||
const size_type nbits) {
|
||||
[data, src, right_operand, value](const size_t starting_element,
|
||||
const size_t ptr_offset,
|
||||
const size_t nbits) {
|
||||
return VectorizedT::template op_arith_compare<T, AOp, CmpOp>(
|
||||
reinterpret_cast<uint8_t*>(data + starting_element),
|
||||
src + ptr_offset,
|
||||
@ -380,12 +412,12 @@ struct VectorizedElementWiseBitsetPolicy {
|
||||
left, right, start_left, start_right, size);
|
||||
}
|
||||
|
||||
// void FuncBaseline(const size_t starting_bit, const size_type ptr_offset, const size_type nbits)
|
||||
// bool FuncVectorized(const size_type starting_element, const size_type ptr_offset, const size_type nbits)
|
||||
// void FuncBaseline(const size_t starting_bit, const size_t ptr_offset, const size_t nbits)
|
||||
// bool FuncVectorized(const size_t starting_element, const size_t ptr_offset, const size_t nbits)
|
||||
template <typename FuncBaseline, typename FuncVectorized>
|
||||
static inline void
|
||||
op_func(const size_type start,
|
||||
const size_type size,
|
||||
op_func(const size_t start,
|
||||
const size_t size,
|
||||
FuncBaseline func_baseline,
|
||||
FuncVectorized func_vectorized) {
|
||||
if (size == 0) {
|
||||
|
@ -26,6 +26,8 @@
|
||||
#include "popcount.h"
|
||||
|
||||
#include "bitset/common.h"
|
||||
#include "maybe_vector.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace bitset {
|
||||
namespace detail {
|
||||
@ -34,53 +36,51 @@ namespace detail {
|
||||
template <typename ElementT>
|
||||
struct ElementWiseBitsetPolicy {
|
||||
using data_type = ElementT;
|
||||
constexpr static auto data_bits = sizeof(data_type) * 8;
|
||||
|
||||
using size_type = size_t;
|
||||
constexpr static size_t data_bits = sizeof(data_type) * 8;
|
||||
|
||||
using self_type = ElementWiseBitsetPolicy<ElementT>;
|
||||
|
||||
using proxy_type = Proxy<self_type>;
|
||||
using const_proxy_type = ConstProxy<self_type>;
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_element(const size_t idx) {
|
||||
return idx / data_bits;
|
||||
}
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_shift(const size_t idx) {
|
||||
return idx % data_bits;
|
||||
}
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_required_size_in_elements(const size_t size) {
|
||||
return (size + data_bits - 1) / data_bits;
|
||||
}
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
get_required_size_in_bytes(const size_t size) {
|
||||
return get_required_size_in_elements(size) * sizeof(data_type);
|
||||
}
|
||||
|
||||
static inline proxy_type
|
||||
get_proxy(data_type* const __restrict data, const size_type idx) {
|
||||
get_proxy(data_type* const __restrict data, const size_t idx) {
|
||||
data_type& element = data[get_element(idx)];
|
||||
const size_type shift = get_shift(idx);
|
||||
const size_t shift = get_shift(idx);
|
||||
return proxy_type{element, shift};
|
||||
}
|
||||
|
||||
static inline const_proxy_type
|
||||
get_proxy(const data_type* const __restrict data, const size_type idx) {
|
||||
get_proxy(const data_type* const __restrict data, const size_t idx) {
|
||||
const data_type& element = data[get_element(idx)];
|
||||
const size_type shift = get_shift(idx);
|
||||
const size_t shift = get_shift(idx);
|
||||
return const_proxy_type{element, shift};
|
||||
}
|
||||
|
||||
static inline data_type
|
||||
op_read(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type nbits) {
|
||||
const size_t start,
|
||||
const size_t nbits) {
|
||||
if (nbits == 0) {
|
||||
return 0;
|
||||
}
|
||||
@ -121,8 +121,8 @@ struct ElementWiseBitsetPolicy {
|
||||
|
||||
static inline void
|
||||
op_write(data_type* const data,
|
||||
const size_type start,
|
||||
const size_type nbits,
|
||||
const size_t start,
|
||||
const size_t nbits,
|
||||
const data_type value) {
|
||||
if (nbits == 0) {
|
||||
return;
|
||||
@ -169,9 +169,7 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_flip(data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
op_flip(data_type* const data, const size_t start, const size_t size) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
@ -211,7 +209,7 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
// process the middle
|
||||
for (size_type i = start_element; i < end_element; i++) {
|
||||
for (size_t i = start_element; i < end_element; i++) {
|
||||
data[i] = ~data[i];
|
||||
}
|
||||
|
||||
@ -228,7 +226,7 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
static BITSET_ALWAYS_INLINE inline void
|
||||
op_and(data_type* const left,
|
||||
const data_type* const right,
|
||||
const size_t start_left,
|
||||
@ -244,7 +242,25 @@ struct ElementWiseBitsetPolicy {
|
||||
});
|
||||
}
|
||||
|
||||
static inline void
|
||||
static BITSET_ALWAYS_INLINE inline void
|
||||
op_and_multiple(data_type* const left,
|
||||
const data_type* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
op_func(left,
|
||||
rights,
|
||||
start_left,
|
||||
start_rights,
|
||||
n_rights,
|
||||
size,
|
||||
[](const data_type left_v, const data_type right_v) {
|
||||
return left_v & right_v;
|
||||
});
|
||||
}
|
||||
|
||||
static BITSET_ALWAYS_INLINE inline void
|
||||
op_or(data_type* const left,
|
||||
const data_type* const right,
|
||||
const size_t start_left,
|
||||
@ -260,8 +276,26 @@ struct ElementWiseBitsetPolicy {
|
||||
});
|
||||
}
|
||||
|
||||
static BITSET_ALWAYS_INLINE inline void
|
||||
op_or_multiple(data_type* const left,
|
||||
const data_type* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
op_func(left,
|
||||
rights,
|
||||
start_left,
|
||||
start_rights,
|
||||
n_rights,
|
||||
size,
|
||||
[](const data_type left_v, const data_type right_v) {
|
||||
return left_v | right_v;
|
||||
});
|
||||
}
|
||||
|
||||
static inline data_type
|
||||
get_shift_mask_begin(const size_type shift) {
|
||||
get_shift_mask_begin(const size_t shift) {
|
||||
// 0 -> 0b00000000
|
||||
// 1 -> 0b00000001
|
||||
// 2 -> 0b00000011
|
||||
@ -273,7 +307,7 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
static inline data_type
|
||||
get_shift_mask_end(const size_type shift) {
|
||||
get_shift_mask_end(const size_t shift) {
|
||||
// 0 -> 0b11111111
|
||||
// 1 -> 0b11111110
|
||||
// 2 -> 0b11111100
|
||||
@ -281,21 +315,17 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_set(data_type* const data, const size_type start, const size_type size) {
|
||||
op_set(data_type* const data, const size_t start, const size_t size) {
|
||||
op_fill(data, start, size, true);
|
||||
}
|
||||
|
||||
static inline void
|
||||
op_reset(data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
op_reset(data_type* const data, const size_t start, const size_t size) {
|
||||
op_fill(data, start, size, false);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_all(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
op_all(const data_type* const data, const size_t start, const size_t size) {
|
||||
if (size == 0) {
|
||||
return true;
|
||||
}
|
||||
@ -329,7 +359,7 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
// process the middle
|
||||
for (size_type i = start_element; i < end_element; i++) {
|
||||
for (size_t i = start_element; i < end_element; i++) {
|
||||
if (data[i] != data_type(-1)) {
|
||||
return false;
|
||||
}
|
||||
@ -351,8 +381,8 @@ struct ElementWiseBitsetPolicy {
|
||||
|
||||
static inline bool
|
||||
op_none(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
const size_t start,
|
||||
const size_t size) {
|
||||
if (size == 0) {
|
||||
return true;
|
||||
}
|
||||
@ -386,7 +416,7 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
// process the middle
|
||||
for (size_type i = start_element; i < end_element; i++) {
|
||||
for (size_t i = start_element; i < end_element; i++) {
|
||||
if (data[i] != data_type(0)) {
|
||||
return false;
|
||||
}
|
||||
@ -408,27 +438,27 @@ struct ElementWiseBitsetPolicy {
|
||||
|
||||
static void
|
||||
op_copy(const data_type* const src,
|
||||
const size_type start_src,
|
||||
const size_t start_src,
|
||||
data_type* const dst,
|
||||
const size_type start_dst,
|
||||
const size_type size) {
|
||||
const size_t start_dst,
|
||||
const size_t size) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// process big blocks
|
||||
const size_type size_b = (size / data_bits) * data_bits;
|
||||
const size_t size_b = (size / data_bits) * data_bits;
|
||||
|
||||
if ((start_src % data_bits) == 0) {
|
||||
if ((start_dst % data_bits) == 0) {
|
||||
// plain memcpy
|
||||
for (size_type i = 0; i < size_b; i += data_bits) {
|
||||
for (size_t i = 0; i < size_b; i += data_bits) {
|
||||
const data_type src_v = src[(start_src + i) / data_bits];
|
||||
dst[(start_dst + i) / data_bits] = src_v;
|
||||
}
|
||||
} else {
|
||||
// easier read
|
||||
for (size_type i = 0; i < size_b; i += data_bits) {
|
||||
for (size_t i = 0; i < size_b; i += data_bits) {
|
||||
const data_type src_v = src[(start_src + i) / data_bits];
|
||||
op_write(dst, start_dst + i, data_bits, src_v);
|
||||
}
|
||||
@ -436,14 +466,14 @@ struct ElementWiseBitsetPolicy {
|
||||
} else {
|
||||
if ((start_dst % data_bits) == 0) {
|
||||
// easier write
|
||||
for (size_type i = 0; i < size_b; i += data_bits) {
|
||||
for (size_t i = 0; i < size_b; i += data_bits) {
|
||||
const data_type src_v =
|
||||
op_read(src, start_src + i, data_bits);
|
||||
dst[(start_dst + i) / data_bits] = src_v;
|
||||
}
|
||||
} else {
|
||||
// general case
|
||||
for (size_type i = 0; i < size_b; i += data_bits) {
|
||||
for (size_t i = 0; i < size_b; i += data_bits) {
|
||||
const data_type src_v =
|
||||
op_read(src, start_src + i, data_bits);
|
||||
op_write(dst, start_dst + i, data_bits, src_v);
|
||||
@ -461,8 +491,8 @@ struct ElementWiseBitsetPolicy {
|
||||
|
||||
static void
|
||||
op_fill(data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size,
|
||||
const size_t start,
|
||||
const size_t size,
|
||||
const bool value) {
|
||||
if (size == 0) {
|
||||
return;
|
||||
@ -504,7 +534,7 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
// process the middle
|
||||
for (size_type i = start_element; i < end_element; i++) {
|
||||
for (size_t i = start_element; i < end_element; i++) {
|
||||
data[i] = new_v;
|
||||
}
|
||||
|
||||
@ -520,15 +550,15 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
}
|
||||
|
||||
static inline size_type
|
||||
static inline size_t
|
||||
op_count(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size) {
|
||||
const size_t start,
|
||||
const size_t size) {
|
||||
if (size == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_type count = 0;
|
||||
size_t count = 0;
|
||||
|
||||
auto start_element = get_element(start);
|
||||
const auto end_element = get_element(start + size);
|
||||
@ -558,7 +588,7 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
// process the middle
|
||||
for (size_type i = start_element; i < end_element; i++) {
|
||||
for (size_t i = start_element; i < end_element; i++) {
|
||||
count += PopCountHelper<data_type>::count(data[i]);
|
||||
}
|
||||
|
||||
@ -577,24 +607,23 @@ struct ElementWiseBitsetPolicy {
|
||||
static inline bool
|
||||
op_eq(const data_type* const left,
|
||||
const data_type* const right,
|
||||
const size_type start_left,
|
||||
const size_type start_right,
|
||||
const size_type size) {
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
if (size == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// process big chunks
|
||||
const size_type size_b = (size / data_bits) * data_bits;
|
||||
const size_t size_b = (size / data_bits) * data_bits;
|
||||
|
||||
if ((start_left % data_bits) == 0) {
|
||||
if ((start_right % data_bits) == 0) {
|
||||
// plain "memcpy"
|
||||
size_type start_left_idx = start_left / data_bits;
|
||||
size_type start_right_idx = start_right / data_bits;
|
||||
size_t start_left_idx = start_left / data_bits;
|
||||
size_t start_right_idx = start_right / data_bits;
|
||||
|
||||
for (size_type i = 0, j = 0; i < size_b;
|
||||
i += data_bits, j += 1) {
|
||||
for (size_t i = 0, j = 0; i < size_b; i += data_bits, j += 1) {
|
||||
const data_type left_v = left[start_left_idx + j];
|
||||
const data_type right_v = right[start_right_idx + j];
|
||||
if (left_v != right_v) {
|
||||
@ -603,10 +632,9 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
} else {
|
||||
// easier left
|
||||
size_type start_left_idx = start_left / data_bits;
|
||||
size_t start_left_idx = start_left / data_bits;
|
||||
|
||||
for (size_type i = 0, j = 0; i < size_b;
|
||||
i += data_bits, j += 1) {
|
||||
for (size_t i = 0, j = 0; i < size_b; i += data_bits, j += 1) {
|
||||
const data_type left_v = left[start_left_idx + j];
|
||||
const data_type right_v =
|
||||
op_read(right, start_right + i, data_bits);
|
||||
@ -618,10 +646,9 @@ struct ElementWiseBitsetPolicy {
|
||||
} else {
|
||||
if ((start_right % data_bits) == 0) {
|
||||
// easier right
|
||||
size_type start_right_idx = start_right / data_bits;
|
||||
size_t start_right_idx = start_right / data_bits;
|
||||
|
||||
for (size_type i = 0, j = 0; i < size_b;
|
||||
i += data_bits, j += 1) {
|
||||
for (size_t i = 0, j = 0; i < size_b; i += data_bits, j += 1) {
|
||||
const data_type left_v =
|
||||
op_read(left, start_left + i, data_bits);
|
||||
const data_type right_v = right[start_right_idx + j];
|
||||
@ -631,7 +658,7 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
} else {
|
||||
// general case
|
||||
for (size_type i = 0; i < size_b; i += data_bits) {
|
||||
for (size_t i = 0; i < size_b; i += data_bits) {
|
||||
const data_type left_v =
|
||||
op_read(left, start_left + i, data_bits);
|
||||
const data_type right_v =
|
||||
@ -657,7 +684,7 @@ struct ElementWiseBitsetPolicy {
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void
|
||||
static BITSET_ALWAYS_INLINE inline void
|
||||
op_xor(data_type* const left,
|
||||
const data_type* const right,
|
||||
const size_t start_left,
|
||||
@ -673,7 +700,7 @@ struct ElementWiseBitsetPolicy {
|
||||
});
|
||||
}
|
||||
|
||||
static inline void
|
||||
static BITSET_ALWAYS_INLINE inline void
|
||||
op_sub(data_type* const left,
|
||||
const data_type* const right,
|
||||
const size_t start_left,
|
||||
@ -690,11 +717,11 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
//
|
||||
static inline std::optional<size_type>
|
||||
static inline std::optional<size_t>
|
||||
op_find(const data_type* const data,
|
||||
const size_type start,
|
||||
const size_type size,
|
||||
const size_type starting_idx) {
|
||||
const size_t start,
|
||||
const size_t size,
|
||||
const size_t starting_idx) {
|
||||
if (size == 0) {
|
||||
return std::nullopt;
|
||||
}
|
||||
@ -706,7 +733,7 @@ struct ElementWiseBitsetPolicy {
|
||||
const auto start_shift = get_shift(start + starting_idx);
|
||||
const auto end_shift = get_shift(start + size);
|
||||
|
||||
size_type extra_offset = 0;
|
||||
size_t extra_offset = 0;
|
||||
|
||||
// same element?
|
||||
if (start_element == end_element) {
|
||||
@ -718,7 +745,7 @@ struct ElementWiseBitsetPolicy {
|
||||
const data_type value = existing_v & existing_mask;
|
||||
if (value != 0) {
|
||||
const auto ctz = CtzHelper<data_type>::ctz(value);
|
||||
return size_type(ctz) + start_element * data_bits - start;
|
||||
return size_t(ctz) + start_element * data_bits - start;
|
||||
} else {
|
||||
return std::nullopt;
|
||||
}
|
||||
@ -733,7 +760,7 @@ struct ElementWiseBitsetPolicy {
|
||||
if (value != 0) {
|
||||
const auto ctz = CtzHelper<data_type>::ctz(value) +
|
||||
start_element * data_bits - start;
|
||||
return size_type(ctz);
|
||||
return size_t(ctz);
|
||||
}
|
||||
|
||||
start_element += 1;
|
||||
@ -741,11 +768,11 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
// process the middle
|
||||
for (size_type i = start_element; i < end_element; i++) {
|
||||
for (size_t i = start_element; i < end_element; i++) {
|
||||
const data_type value = data[i];
|
||||
if (value != 0) {
|
||||
const auto ctz = CtzHelper<data_type>::ctz(value);
|
||||
return size_type(ctz) + i * data_bits - start;
|
||||
return size_t(ctz) + i * data_bits - start;
|
||||
}
|
||||
}
|
||||
|
||||
@ -757,7 +784,7 @@ struct ElementWiseBitsetPolicy {
|
||||
const data_type value = existing_v & existing_mask;
|
||||
if (value != 0) {
|
||||
const auto ctz = CtzHelper<data_type>::ctz(value);
|
||||
return size_type(ctz) + end_element * data_bits - start;
|
||||
return size_t(ctz) + end_element * data_bits - start;
|
||||
}
|
||||
}
|
||||
|
||||
@ -768,11 +795,11 @@ struct ElementWiseBitsetPolicy {
|
||||
template <typename T, typename U, CompareOpType Op>
|
||||
static inline void
|
||||
op_compare_column(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T* const __restrict t,
|
||||
const U* const __restrict u,
|
||||
const size_type size) {
|
||||
op_func(data, start, size, [t, u](const size_type bit_idx) {
|
||||
const size_t size) {
|
||||
op_func(data, start, size, [t, u](const size_t bit_idx) {
|
||||
return CompareOperator<Op>::compare(t[bit_idx], u[bit_idx]);
|
||||
});
|
||||
}
|
||||
@ -781,11 +808,11 @@ struct ElementWiseBitsetPolicy {
|
||||
template <typename T, CompareOpType Op>
|
||||
static inline void
|
||||
op_compare_val(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T* const __restrict t,
|
||||
const size_type size,
|
||||
const size_t size,
|
||||
const T& value) {
|
||||
op_func(data, start, size, [t, value](const size_type bit_idx) {
|
||||
op_func(data, start, size, [t, value](const size_t bit_idx) {
|
||||
return CompareOperator<Op>::compare(t[bit_idx], value);
|
||||
});
|
||||
}
|
||||
@ -794,13 +821,13 @@ struct ElementWiseBitsetPolicy {
|
||||
template <typename T, RangeType Op>
|
||||
static inline void
|
||||
op_within_range_column(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T* const __restrict lower,
|
||||
const T* const __restrict upper,
|
||||
const T* const __restrict values,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
op_func(
|
||||
data, start, size, [lower, upper, values](const size_type bit_idx) {
|
||||
data, start, size, [lower, upper, values](const size_t bit_idx) {
|
||||
return RangeOperator<Op>::within_range(
|
||||
lower[bit_idx], upper[bit_idx], values[bit_idx]);
|
||||
});
|
||||
@ -810,13 +837,13 @@ struct ElementWiseBitsetPolicy {
|
||||
template <typename T, RangeType Op>
|
||||
static inline void
|
||||
op_within_range_val(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T& lower,
|
||||
const T& upper,
|
||||
const T* const __restrict values,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
op_func(
|
||||
data, start, size, [lower, upper, values](const size_type bit_idx) {
|
||||
data, start, size, [lower, upper, values](const size_t bit_idx) {
|
||||
return RangeOperator<Op>::within_range(
|
||||
lower, upper, values[bit_idx]);
|
||||
});
|
||||
@ -826,15 +853,15 @@ struct ElementWiseBitsetPolicy {
|
||||
template <typename T, ArithOpType AOp, CompareOpType CmpOp>
|
||||
static inline void
|
||||
op_arith_compare(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const T* const __restrict src,
|
||||
const ArithHighPrecisionType<T>& right_operand,
|
||||
const ArithHighPrecisionType<T>& value,
|
||||
const size_type size) {
|
||||
const size_t size) {
|
||||
op_func(data,
|
||||
start,
|
||||
size,
|
||||
[src, right_operand, value](const size_type bit_idx) {
|
||||
[src, right_operand, value](const size_t bit_idx) {
|
||||
return ArithCompareOperator<AOp, CmpOp>::compare(
|
||||
src[bit_idx], right_operand, value);
|
||||
});
|
||||
@ -872,11 +899,14 @@ struct ElementWiseBitsetPolicy {
|
||||
const size_t size) {
|
||||
size_t inactive = 0;
|
||||
|
||||
const size_t size_b = (size / data_bits) * data_bits;
|
||||
|
||||
// process bulk
|
||||
op_func(left,
|
||||
right,
|
||||
start_left,
|
||||
start_right,
|
||||
size,
|
||||
size_b,
|
||||
[&inactive](const data_type left_v, const data_type right_v) {
|
||||
const data_type result = left_v | right_v;
|
||||
inactive +=
|
||||
@ -885,12 +915,25 @@ struct ElementWiseBitsetPolicy {
|
||||
return result;
|
||||
});
|
||||
|
||||
// process leftovers
|
||||
if (size != size_b) {
|
||||
const data_type left_v =
|
||||
op_read(left, start_left + size_b, size - size_b);
|
||||
const data_type right_v =
|
||||
op_read(right, start_right + size_b, size - size_b);
|
||||
|
||||
const data_type result_v = left_v | right_v;
|
||||
inactive +=
|
||||
(size - size_b - PopCountHelper<data_type>::count(result_v));
|
||||
op_write(left, start_left + size_b, size - size_b, result_v);
|
||||
}
|
||||
|
||||
return inactive;
|
||||
}
|
||||
|
||||
// data_type Func(const data_type left_v, const data_type right_v);
|
||||
template <typename Func>
|
||||
static inline void
|
||||
static BITSET_ALWAYS_INLINE inline void
|
||||
op_func(data_type* const left,
|
||||
const data_type* const right,
|
||||
const size_t start_left,
|
||||
@ -902,16 +945,15 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
|
||||
// process big blocks
|
||||
const size_type size_b = (size / data_bits) * data_bits;
|
||||
const size_t size_b = (size / data_bits) * data_bits;
|
||||
if ((start_left % data_bits) == 0) {
|
||||
if ((start_right % data_bits) == 0) {
|
||||
// plain "memcpy".
|
||||
// A compiler auto-vectorization is expected.
|
||||
size_type start_left_idx = start_left / data_bits;
|
||||
size_type start_right_idx = start_right / data_bits;
|
||||
size_t start_left_idx = start_left / data_bits;
|
||||
size_t start_right_idx = start_right / data_bits;
|
||||
|
||||
for (size_type i = 0, j = 0; i < size_b;
|
||||
i += data_bits, j += 1) {
|
||||
for (size_t i = 0, j = 0; i < size_b; i += data_bits, j += 1) {
|
||||
data_type& left_v = left[start_left_idx + j];
|
||||
const data_type right_v = right[start_right_idx + j];
|
||||
|
||||
@ -920,25 +962,9 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
} else {
|
||||
// easier read
|
||||
size_type start_right_idx = start_right / data_bits;
|
||||
size_t start_left_idx = start_left / data_bits;
|
||||
|
||||
for (size_type i = 0, j = 0; i < size_b;
|
||||
i += data_bits, j += 1) {
|
||||
const data_type left_v =
|
||||
op_read(left, start_left + i, data_bits);
|
||||
const data_type right_v = right[start_right_idx + j];
|
||||
|
||||
const data_type result_v = func(left_v, right_v);
|
||||
op_write(left, start_right + i, data_bits, result_v);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ((start_right % data_bits) == 0) {
|
||||
// easier write
|
||||
size_type start_left_idx = start_left / data_bits;
|
||||
|
||||
for (size_type i = 0, j = 0; i < size_b;
|
||||
i += data_bits, j += 1) {
|
||||
for (size_t i = 0, j = 0; i < size_b; i += data_bits, j += 1) {
|
||||
data_type& left_v = left[start_left_idx + j];
|
||||
const data_type right_v =
|
||||
op_read(right, start_right + i, data_bits);
|
||||
@ -946,16 +972,30 @@ struct ElementWiseBitsetPolicy {
|
||||
const data_type result_v = func(left_v, right_v);
|
||||
left_v = result_v;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ((start_right % data_bits) == 0) {
|
||||
// easier write
|
||||
size_t start_right_idx = start_right / data_bits;
|
||||
|
||||
for (size_t i = 0, j = 0; i < size_b; i += data_bits, j += 1) {
|
||||
const data_type left_v =
|
||||
op_read(left, start_left + i, data_bits);
|
||||
const data_type right_v = right[start_right_idx + j];
|
||||
|
||||
const data_type result_v = func(left_v, right_v);
|
||||
op_write(left, start_left + i, data_bits, result_v);
|
||||
}
|
||||
} else {
|
||||
// general case
|
||||
for (size_type i = 0; i < size_b; i += data_bits) {
|
||||
for (size_t i = 0; i < size_b; i += data_bits) {
|
||||
const data_type left_v =
|
||||
op_read(left, start_left + i, data_bits);
|
||||
const data_type right_v =
|
||||
op_read(right, start_right + i, data_bits);
|
||||
|
||||
const data_type result_v = func(left_v, right_v);
|
||||
op_write(left, start_right + i, data_bits, result_v);
|
||||
op_write(left, start_left + i, data_bits, result_v);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -972,11 +1012,145 @@ struct ElementWiseBitsetPolicy {
|
||||
}
|
||||
}
|
||||
|
||||
// bool Func(const size_type bit_idx);
|
||||
// data_type Func(const data_type left_v, const data_type right_v);
|
||||
template <typename Func>
|
||||
static inline void
|
||||
static BITSET_ALWAYS_INLINE inline void
|
||||
op_func(data_type* const left,
|
||||
const data_type* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size,
|
||||
Func func) {
|
||||
if (size == 0 || n_rights == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (n_rights == 1) {
|
||||
op_func<Func>(
|
||||
left, rights[0], start_left, start_rights[0], size, func);
|
||||
return;
|
||||
}
|
||||
|
||||
// process big blocks
|
||||
const size_t size_b = (size / data_bits) * data_bits;
|
||||
|
||||
// check a specific case
|
||||
bool all_aligned = true;
|
||||
for (size_t i = 0; i < n_rights; i++) {
|
||||
if (start_rights[i] % data_bits != 0) {
|
||||
all_aligned = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// all are aligned
|
||||
if (all_aligned) {
|
||||
MaybeVector<const data_type*> tmp(n_rights);
|
||||
for (size_t i = 0; i < n_rights; i++) {
|
||||
tmp[i] = rights[i] + (start_rights[i] / data_bits);
|
||||
}
|
||||
|
||||
// plain "memcpy".
|
||||
// A compiler auto-vectorization is expected.
|
||||
const size_t start_left_idx = start_left / data_bits;
|
||||
data_type* left_ptr = left + start_left_idx;
|
||||
|
||||
auto unrolled = [left_ptr, &tmp, func, size_b](const size_t count) {
|
||||
for (size_t i = 0, j = 0; i < size_b; i += data_bits, j += 1) {
|
||||
data_type& left_v = left_ptr[j];
|
||||
data_type value = left_v;
|
||||
|
||||
for (size_t k = 0; k < count; k++) {
|
||||
const data_type right_v = tmp[k][j];
|
||||
|
||||
value = func(value, right_v);
|
||||
}
|
||||
|
||||
left_v = value;
|
||||
}
|
||||
};
|
||||
|
||||
switch (n_rights) {
|
||||
// case 1: unrolled(1); break;
|
||||
case 2:
|
||||
unrolled(2);
|
||||
break;
|
||||
case 3:
|
||||
unrolled(3);
|
||||
break;
|
||||
case 4:
|
||||
unrolled(4);
|
||||
break;
|
||||
case 5:
|
||||
unrolled(5);
|
||||
break;
|
||||
case 6:
|
||||
unrolled(6);
|
||||
break;
|
||||
case 7:
|
||||
unrolled(7);
|
||||
break;
|
||||
case 8:
|
||||
unrolled(8);
|
||||
break;
|
||||
default: {
|
||||
for (size_t i = 0, j = 0; i < size_b;
|
||||
i += data_bits, j += 1) {
|
||||
data_type& left_v = left_ptr[j];
|
||||
data_type value = left_v;
|
||||
|
||||
for (size_t k = 0; k < n_rights; k++) {
|
||||
const data_type right_v = tmp[k][j];
|
||||
|
||||
value = func(value, right_v);
|
||||
}
|
||||
|
||||
left_v = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// general case. Unoptimized.
|
||||
for (size_t i = 0; i < size_b; i += data_bits) {
|
||||
const data_type left_v =
|
||||
op_read(left, start_left + i, data_bits);
|
||||
|
||||
data_type value = left_v;
|
||||
for (size_t k = 0; k < n_rights; k++) {
|
||||
const data_type right_v =
|
||||
op_read(rights[k], start_rights[k] + i, data_bits);
|
||||
|
||||
value = func(value, right_v);
|
||||
}
|
||||
|
||||
op_write(left, start_left + i, data_bits, value);
|
||||
}
|
||||
}
|
||||
|
||||
// process leftovers
|
||||
if (size_b != size) {
|
||||
const data_type left_v =
|
||||
op_read(left, start_left + size_b, size - size_b);
|
||||
|
||||
data_type value = left_v;
|
||||
for (size_t k = 0; k < n_rights; k++) {
|
||||
const data_type right_v =
|
||||
op_read(rights[k], start_rights[k] + size_b, size - size_b);
|
||||
|
||||
value = func(value, right_v);
|
||||
}
|
||||
|
||||
op_write(left, start_left + size_b, size - size_b, value);
|
||||
}
|
||||
}
|
||||
|
||||
// bool Func(const size_t bit_idx);
|
||||
template <typename Func>
|
||||
static BITSET_ALWAYS_INLINE inline void
|
||||
op_func(data_type* const __restrict data,
|
||||
const size_type start,
|
||||
const size_t start,
|
||||
const size_t size,
|
||||
Func func) {
|
||||
if (size == 0) {
|
||||
@ -991,7 +1165,7 @@ struct ElementWiseBitsetPolicy {
|
||||
|
||||
if (start_element == end_element) {
|
||||
data_type bits = 0;
|
||||
for (size_type j = 0; j < size; j++) {
|
||||
for (size_t j = 0; j < size; j++) {
|
||||
const bool bit = func(j);
|
||||
// // a curious example where the compiler does not optimize the code properly
|
||||
// bits |= (bit ? (data_type(1) << j) : 0);
|
||||
@ -1009,10 +1183,10 @@ struct ElementWiseBitsetPolicy {
|
||||
|
||||
// process the first element
|
||||
if (start_shift != 0) {
|
||||
const size_type n_bits = data_bits - start_shift;
|
||||
const size_t n_bits = data_bits - start_shift;
|
||||
|
||||
data_type bits = 0;
|
||||
for (size_type j = 0; j < n_bits; j++) {
|
||||
for (size_t j = 0; j < n_bits; j++) {
|
||||
const bool bit = func(j);
|
||||
bits |= (data_type(bit ? 1 : 0) << j);
|
||||
}
|
||||
@ -1026,9 +1200,9 @@ struct ElementWiseBitsetPolicy {
|
||||
|
||||
// process the middle
|
||||
{
|
||||
for (size_type i = start_element; i < end_element; i++) {
|
||||
for (size_t i = start_element; i < end_element; i++) {
|
||||
data_type bits = 0;
|
||||
for (size_type j = 0; j < data_bits; j++) {
|
||||
for (size_t j = 0; j < data_bits; j++) {
|
||||
const bool bit = func(ptr_offset + j);
|
||||
bits |= (data_type(bit ? 1 : 0) << j);
|
||||
}
|
||||
@ -1041,7 +1215,7 @@ struct ElementWiseBitsetPolicy {
|
||||
// process the last element
|
||||
if (end_shift != 0) {
|
||||
data_type bits = 0;
|
||||
for (size_type j = 0; j < end_shift; j++) {
|
||||
for (size_t j = 0; j < end_shift; j++) {
|
||||
const bool bit = func(ptr_offset + j);
|
||||
bits |= (data_type(bit ? 1 : 0) << j);
|
||||
}
|
||||
|
91
internal/core/src/bitset/detail/maybe_vector.h
Normal file
91
internal/core/src/bitset/detail/maybe_vector.h
Normal file
@ -0,0 +1,91 @@
|
||||
// Licensed to the LF AI & Data foundation under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
||||
namespace milvus {
|
||||
namespace bitset {
|
||||
namespace detail {
|
||||
|
||||
// A structure that allocates an array of elements.
|
||||
// No ownership is implied.
|
||||
// If the number of elements is small,
|
||||
// then an allocation will be done on the stack.
|
||||
// If the number of elements is large,
|
||||
// then an allocation will be done on the heap.
|
||||
template <typename T>
|
||||
struct MaybeVector {
|
||||
public:
|
||||
static_assert(std::is_scalar_v<T>);
|
||||
|
||||
static constexpr size_t num_array_elements = 64;
|
||||
|
||||
std::unique_ptr<T[]> maybe_memory;
|
||||
std::array<T, num_array_elements> maybe_array;
|
||||
|
||||
MaybeVector(const size_t n_elements) {
|
||||
m_size = n_elements;
|
||||
|
||||
if (n_elements < num_array_elements) {
|
||||
m_data = maybe_array.data();
|
||||
} else {
|
||||
maybe_memory = std::make_unique<T[]>(m_size);
|
||||
m_data = maybe_memory.get();
|
||||
}
|
||||
}
|
||||
|
||||
MaybeVector(const MaybeVector&) = delete;
|
||||
MaybeVector(MaybeVector&&) = delete;
|
||||
MaybeVector&
|
||||
operator=(const MaybeVector&) = delete;
|
||||
MaybeVector&
|
||||
operator=(MaybeVector&&) = delete;
|
||||
|
||||
inline size_t
|
||||
size() const {
|
||||
return m_size;
|
||||
}
|
||||
inline T*
|
||||
data() {
|
||||
return m_data;
|
||||
}
|
||||
inline const T*
|
||||
data() const {
|
||||
return m_data;
|
||||
}
|
||||
|
||||
inline T&
|
||||
operator[](const size_t idx) {
|
||||
return m_data[idx];
|
||||
}
|
||||
inline const T&
|
||||
operator[](const size_t idx) const {
|
||||
return m_data[idx];
|
||||
}
|
||||
|
||||
private:
|
||||
size_t m_size = 0;
|
||||
|
||||
T* m_data = nullptr;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
} // namespace bitset
|
||||
} // namespace milvus
|
@ -39,6 +39,11 @@ namespace neon {
|
||||
FUNC(float); \
|
||||
FUNC(double);
|
||||
|
||||
// a facility to run through all acceptable forward types
|
||||
#define ALL_FORWARD_TYPES_1(FUNC) \
|
||||
FUNC(uint8_t); \
|
||||
FUNC(uint64_t);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// the default implementation does nothing
|
||||
@ -192,7 +197,122 @@ ALL_DATATYPES_1(DECLARE_PARTIAL_OP_ARITH_COMPARE)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// forward ops
|
||||
template <typename ElementT>
|
||||
struct ForwardOpsImpl {
|
||||
static inline bool
|
||||
op_and(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_and_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_or(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_or_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_xor(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_sub(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
#define DECLARE_PARTIAL_FORWARD_OPS(ELEMENTTYPE) \
|
||||
template <> \
|
||||
struct ForwardOpsImpl<ELEMENTTYPE> { \
|
||||
static bool \
|
||||
op_and(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_and_multiple(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const* const rights, \
|
||||
const size_t start_left, \
|
||||
const size_t* const __restrict start_rights, \
|
||||
const size_t n_rights, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_or(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_or_multiple(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const* const rights, \
|
||||
const size_t start_left, \
|
||||
const size_t* const __restrict start_rights, \
|
||||
const size_t n_rights, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_sub(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_xor(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
};
|
||||
|
||||
ALL_FORWARD_TYPES_1(DECLARE_PARTIAL_FORWARD_OPS)
|
||||
|
||||
#undef DECLARE_PARTIAL_FORWARD_OPS
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#undef ALL_DATATYPES_1
|
||||
#undef ALL_FORWARD_TYPES_1
|
||||
|
||||
} // namespace neon
|
||||
} // namespace arm
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "neon-decl.h"
|
||||
|
||||
#include "bitset/common.h"
|
||||
#include "bitset/detail/element_wise.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace bitset {
|
||||
@ -1810,6 +1811,151 @@ OpArithCompareImpl<double, AOp, CmpOp>::op_arith_compare(
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// forward ops
|
||||
|
||||
//
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_and(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_and(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_and_multiple(
|
||||
uint8_t* const left,
|
||||
const uint8_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_and_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_or(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_or(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_or_multiple(
|
||||
uint8_t* const left,
|
||||
const uint8_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_or_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_xor(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_xor(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_sub(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_sub(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_and(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_and(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_and_multiple(
|
||||
uint64_t* const left,
|
||||
const uint64_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_and_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_or(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_or(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_or_multiple(
|
||||
uint64_t* const left,
|
||||
const uint64_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_or_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_xor(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_xor(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_sub(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_sub(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
} // namespace neon
|
||||
|
@ -55,6 +55,30 @@ struct VectorizedNeon {
|
||||
template <typename T, ArithOpType AOp, CompareOpType CmpOp>
|
||||
static constexpr inline auto op_arith_compare =
|
||||
neon::OpArithCompareImpl<T, AOp, CmpOp>::op_arith_compare;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_and =
|
||||
neon::ForwardOpsImpl<ElementT>::op_and;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_and_multiple =
|
||||
neon::ForwardOpsImpl<ElementT>::op_and_multiple;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_or =
|
||||
neon::ForwardOpsImpl<ElementT>::op_or;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_or_multiple =
|
||||
neon::ForwardOpsImpl<ElementT>::op_or_multiple;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_xor =
|
||||
neon::ForwardOpsImpl<ElementT>::op_xor;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_sub =
|
||||
neon::ForwardOpsImpl<ElementT>::op_sub;
|
||||
};
|
||||
|
||||
} // namespace arm
|
||||
|
@ -39,6 +39,11 @@ namespace sve {
|
||||
FUNC(float); \
|
||||
FUNC(double);
|
||||
|
||||
// a facility to run through all acceptable forward types
|
||||
#define ALL_FORWARD_TYPES_1(FUNC) \
|
||||
FUNC(uint8_t); \
|
||||
FUNC(uint64_t);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// the default implementation does nothing
|
||||
@ -192,7 +197,122 @@ ALL_DATATYPES_1(DECLARE_PARTIAL_OP_ARITH_COMPARE)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// forward ops
|
||||
template <typename ElementT>
|
||||
struct ForwardOpsImpl {
|
||||
static inline bool
|
||||
op_and(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_and_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_or(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_or_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_xor(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_sub(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
#define DECLARE_PARTIAL_FORWARD_OPS(ELEMENTTYPE) \
|
||||
template <> \
|
||||
struct ForwardOpsImpl<ELEMENTTYPE> { \
|
||||
static bool \
|
||||
op_and(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_and_multiple(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const* const rights, \
|
||||
const size_t start_left, \
|
||||
const size_t* const __restrict start_rights, \
|
||||
const size_t n_rights, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_or(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_or_multiple(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const* const rights, \
|
||||
const size_t start_left, \
|
||||
const size_t* const __restrict start_rights, \
|
||||
const size_t n_rights, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_sub(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_xor(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
};
|
||||
|
||||
ALL_FORWARD_TYPES_1(DECLARE_PARTIAL_FORWARD_OPS)
|
||||
|
||||
#undef DECLARE_PARTIAL_FORWARD_OPS
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#undef ALL_DATATYPES_1
|
||||
#undef ALL_FORWARD_TYPES_1
|
||||
|
||||
} // namespace sve
|
||||
} // namespace arm
|
||||
|
@ -28,8 +28,7 @@
|
||||
#include "sve-decl.h"
|
||||
|
||||
#include "bitset/common.h"
|
||||
|
||||
// #include <stdio.h>
|
||||
#include "bitset/detail/element_wise.h"
|
||||
|
||||
namespace milvus {
|
||||
namespace bitset {
|
||||
@ -1623,6 +1622,151 @@ OpArithCompareImpl<double, AOp, CmpOp>::op_arith_compare(
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// forward ops
|
||||
|
||||
//
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_and(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_and(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_and_multiple(
|
||||
uint8_t* const left,
|
||||
const uint8_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_and_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_or(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_or(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_or_multiple(
|
||||
uint8_t* const left,
|
||||
const uint8_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_or_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_xor(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_xor(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_sub(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_sub(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_and(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_and(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_and_multiple(
|
||||
uint64_t* const left,
|
||||
const uint64_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_and_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_or(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_or(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_or_multiple(
|
||||
uint64_t* const left,
|
||||
const uint64_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_or_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_xor(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_xor(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_sub(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_sub(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
} // namespace sve
|
||||
|
@ -55,6 +55,30 @@ struct VectorizedSve {
|
||||
template <typename T, ArithOpType AOp, CompareOpType CmpOp>
|
||||
static constexpr inline auto op_arith_compare =
|
||||
sve::OpArithCompareImpl<T, AOp, CmpOp>::op_arith_compare;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_and =
|
||||
sve::ForwardOpsImpl<ElementT>::op_and;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_and_multiple =
|
||||
sve::ForwardOpsImpl<ElementT>::op_and_multiple;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_or =
|
||||
sve::ForwardOpsImpl<ElementT>::op_or;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_or_multiple =
|
||||
sve::ForwardOpsImpl<ElementT>::op_or_multiple;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_xor =
|
||||
sve::ForwardOpsImpl<ElementT>::op_xor;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_sub =
|
||||
sve::ForwardOpsImpl<ElementT>::op_sub;
|
||||
};
|
||||
|
||||
} // namespace arm
|
||||
|
@ -88,6 +88,11 @@ using namespace milvus::bitset::detail::arm;
|
||||
FUNC(__VA_ARGS__, Mod, LT); \
|
||||
FUNC(__VA_ARGS__, Mod, NE);
|
||||
|
||||
// a facility to run through all possible forward ElementT
|
||||
#define ALL_FORWARD_OPS(FUNC) \
|
||||
FUNC(uint8_t); \
|
||||
FUNC(uint64_t);
|
||||
|
||||
//
|
||||
namespace milvus {
|
||||
namespace bitset {
|
||||
@ -235,6 +240,7 @@ ALL_RANGE_OPS(DISPATCH_OP_WITHIN_RANGE_COLUMN_IMPL, float)
|
||||
ALL_RANGE_OPS(DISPATCH_OP_WITHIN_RANGE_COLUMN_IMPL, double)
|
||||
|
||||
#undef DISPATCH_OP_WITHIN_RANGE_COLUMN_IMPL
|
||||
|
||||
} // namespace dynamic
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
@ -282,6 +288,8 @@ ALL_RANGE_OPS(DISPATCH_OP_WITHIN_RANGE_VAL_IMPL, int64_t)
|
||||
ALL_RANGE_OPS(DISPATCH_OP_WITHIN_RANGE_VAL_IMPL, float)
|
||||
ALL_RANGE_OPS(DISPATCH_OP_WITHIN_RANGE_VAL_IMPL, double)
|
||||
|
||||
#undef DISPATCH_OP_WITHIN_RANGE_VAL_IMPL
|
||||
|
||||
} // namespace dynamic
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
@ -332,6 +340,108 @@ ALL_ARITH_CMP_OPS(DISPATCH_OP_ARITH_COMPARE, int64_t)
|
||||
ALL_ARITH_CMP_OPS(DISPATCH_OP_ARITH_COMPARE, float)
|
||||
ALL_ARITH_CMP_OPS(DISPATCH_OP_ARITH_COMPARE, double)
|
||||
|
||||
#undef DISPATCH_OP_ARITH_COMPARE
|
||||
|
||||
} // namespace dynamic
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
// forward_ops
|
||||
|
||||
template <typename ElementT>
|
||||
using ForwardOpsOp2 = bool (*)(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size);
|
||||
|
||||
template <typename ElementT>
|
||||
using ForwardOpsOpMultiple2 =
|
||||
bool (*)(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size);
|
||||
|
||||
#define DECLARE_FORWARD_OPS_OP2(ELEMENTTYPE) \
|
||||
ForwardOpsOp2<ELEMENTTYPE> forward_op_and_##ELEMENTTYPE = \
|
||||
VectorizedRef::template forward_op_and<ELEMENTTYPE>; \
|
||||
ForwardOpsOpMultiple2<ELEMENTTYPE> forward_op_and_multiple_##ELEMENTTYPE = \
|
||||
VectorizedRef::template forward_op_and_multiple<ELEMENTTYPE>; \
|
||||
ForwardOpsOp2<ELEMENTTYPE> forward_op_or_##ELEMENTTYPE = \
|
||||
VectorizedRef::template forward_op_or<ELEMENTTYPE>; \
|
||||
ForwardOpsOpMultiple2<ELEMENTTYPE> forward_op_or_multiple_##ELEMENTTYPE = \
|
||||
VectorizedRef::template forward_op_or_multiple<ELEMENTTYPE>; \
|
||||
ForwardOpsOp2<ELEMENTTYPE> forward_op_xor_##ELEMENTTYPE = \
|
||||
VectorizedRef::template forward_op_xor<ELEMENTTYPE>; \
|
||||
ForwardOpsOp2<ELEMENTTYPE> forward_op_sub_##ELEMENTTYPE = \
|
||||
VectorizedRef::template forward_op_sub<ELEMENTTYPE>;
|
||||
|
||||
ALL_FORWARD_OPS(DECLARE_FORWARD_OPS_OP2)
|
||||
|
||||
#undef DECLARE_FORWARD_OPS_OP2
|
||||
|
||||
//
|
||||
namespace dynamic {
|
||||
|
||||
#define DISPATCH_FORWARD_OPS_OP_AND(ELEMENTTYPE) \
|
||||
bool ForwardOpsImpl<ELEMENTTYPE>::op_and(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size) { \
|
||||
return forward_op_and_##ELEMENTTYPE( \
|
||||
left, right, start_left, start_right, size); \
|
||||
} \
|
||||
bool ForwardOpsImpl<ELEMENTTYPE>::op_and_multiple( \
|
||||
ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const* const rights, \
|
||||
const size_t start_left, \
|
||||
const size_t* const __restrict start_rights, \
|
||||
const size_t n_rights, \
|
||||
const size_t size) { \
|
||||
return forward_op_and_multiple_##ELEMENTTYPE( \
|
||||
left, rights, start_left, start_rights, n_rights, size); \
|
||||
} \
|
||||
bool ForwardOpsImpl<ELEMENTTYPE>::op_or(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size) { \
|
||||
return forward_op_or_##ELEMENTTYPE( \
|
||||
left, right, start_left, start_right, size); \
|
||||
} \
|
||||
bool ForwardOpsImpl<ELEMENTTYPE>::op_or_multiple( \
|
||||
ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const* const rights, \
|
||||
const size_t start_left, \
|
||||
const size_t* const __restrict start_rights, \
|
||||
const size_t n_rights, \
|
||||
const size_t size) { \
|
||||
return forward_op_or_multiple_##ELEMENTTYPE( \
|
||||
left, rights, start_left, start_rights, n_rights, size); \
|
||||
} \
|
||||
bool ForwardOpsImpl<ELEMENTTYPE>::op_xor(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size) { \
|
||||
return forward_op_xor_##ELEMENTTYPE( \
|
||||
left, right, start_left, start_right, size); \
|
||||
} \
|
||||
bool ForwardOpsImpl<ELEMENTTYPE>::op_sub(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size) { \
|
||||
return forward_op_sub_##ELEMENTTYPE( \
|
||||
left, right, start_left, start_right, size); \
|
||||
}
|
||||
|
||||
ALL_FORWARD_OPS(DISPATCH_FORWARD_OPS_OP_AND)
|
||||
|
||||
#undef DISPATCH_FORWARD_OPS_OP_AND
|
||||
|
||||
} // namespace dynamic
|
||||
|
||||
} // namespace detail
|
||||
@ -402,11 +512,28 @@ init_dynamic_hook() {
|
||||
ALL_ARITH_CMP_OPS(SET_ARITH_COMPARE_AVX512, float)
|
||||
ALL_ARITH_CMP_OPS(SET_ARITH_COMPARE_AVX512, double)
|
||||
|
||||
#define SET_FORWARD_OPS_AVX512(ELEMENTTYPE) \
|
||||
forward_op_and_##ELEMENTTYPE = \
|
||||
VectorizedAvx512::template forward_op_and<ELEMENTTYPE>; \
|
||||
forward_op_and_multiple_##ELEMENTTYPE = \
|
||||
VectorizedAvx512::template forward_op_and_multiple<ELEMENTTYPE>; \
|
||||
forward_op_or_##ELEMENTTYPE = \
|
||||
VectorizedAvx512::template forward_op_or<ELEMENTTYPE>; \
|
||||
forward_op_or_multiple_##ELEMENTTYPE = \
|
||||
VectorizedAvx512::template forward_op_or_multiple<ELEMENTTYPE>; \
|
||||
forward_op_xor_##ELEMENTTYPE = \
|
||||
VectorizedAvx512::template forward_op_xor<ELEMENTTYPE>; \
|
||||
forward_op_sub_##ELEMENTTYPE = \
|
||||
VectorizedAvx512::template forward_op_sub<ELEMENTTYPE>;
|
||||
|
||||
ALL_FORWARD_OPS(SET_FORWARD_OPS_AVX512)
|
||||
|
||||
#undef SET_OP_COMPARE_COLUMN_AVX512
|
||||
#undef SET_OP_COMPARE_VAL_AVX512
|
||||
#undef SET_OP_WITHIN_RANGE_COLUMN_AVX512
|
||||
#undef SET_OP_WITHIN_RANGE_VAL_AVX512
|
||||
#undef SET_ARITH_COMPARE_AVX512
|
||||
#undef SET_FORWARD_OPS_AVX512
|
||||
|
||||
return;
|
||||
}
|
||||
@ -467,11 +594,28 @@ init_dynamic_hook() {
|
||||
ALL_ARITH_CMP_OPS(SET_ARITH_COMPARE_AVX2, float)
|
||||
ALL_ARITH_CMP_OPS(SET_ARITH_COMPARE_AVX2, double)
|
||||
|
||||
#define SET_FORWARD_OPS_AVX2(ELEMENTTYPE) \
|
||||
forward_op_and_##ELEMENTTYPE = \
|
||||
VectorizedAvx2::template forward_op_and<ELEMENTTYPE>; \
|
||||
forward_op_and_multiple_##ELEMENTTYPE = \
|
||||
VectorizedAvx2::template forward_op_and_multiple<ELEMENTTYPE>; \
|
||||
forward_op_or_##ELEMENTTYPE = \
|
||||
VectorizedAvx2::template forward_op_or<ELEMENTTYPE>; \
|
||||
forward_op_or_multiple_##ELEMENTTYPE = \
|
||||
VectorizedAvx2::template forward_op_or_multiple<ELEMENTTYPE>; \
|
||||
forward_op_xor_##ELEMENTTYPE = \
|
||||
VectorizedAvx2::template forward_op_xor<ELEMENTTYPE>; \
|
||||
forward_op_sub_##ELEMENTTYPE = \
|
||||
VectorizedAvx2::template forward_op_sub<ELEMENTTYPE>;
|
||||
|
||||
ALL_FORWARD_OPS(SET_FORWARD_OPS_AVX2)
|
||||
|
||||
#undef SET_OP_COMPARE_COLUMN_AVX2
|
||||
#undef SET_OP_COMPARE_VAL_AVX2
|
||||
#undef SET_OP_WITHIN_RANGE_COLUMN_AVX2
|
||||
#undef SET_OP_WITHIN_RANGE_VAL_AVX2
|
||||
#undef SET_ARITH_COMPARE_AVX2
|
||||
#undef SET_FORWARD_OPS_AVX2
|
||||
|
||||
return;
|
||||
}
|
||||
@ -535,15 +679,33 @@ init_dynamic_hook() {
|
||||
ALL_ARITH_CMP_OPS(SET_ARITH_COMPARE_SVE, float)
|
||||
ALL_ARITH_CMP_OPS(SET_ARITH_COMPARE_SVE, double)
|
||||
|
||||
#define SET_FORWARD_OPS_SVE(ELEMENTTYPE) \
|
||||
forward_op_and_##ELEMENTTYPE = \
|
||||
VectorizedSve::template forward_op_and<ELEMENTTYPE>; \
|
||||
forward_op_and_multiple_##ELEMENTTYPE = \
|
||||
VectorizedSve::template forward_op_and_multiple<ELEMENTTYPE>; \
|
||||
forward_op_or_##ELEMENTTYPE = \
|
||||
VectorizedSve::template forward_op_or<ELEMENTTYPE>; \
|
||||
forward_op_or_multiple_##ELEMENTTYPE = \
|
||||
VectorizedSve::template forward_op_or_multiple<ELEMENTTYPE>; \
|
||||
forward_op_xor_##ELEMENTTYPE = \
|
||||
VectorizedSve::template forward_op_xor<ELEMENTTYPE>; \
|
||||
forward_op_sub_##ELEMENTTYPE = \
|
||||
VectorizedSve::template forward_op_sub<ELEMENTTYPE>;
|
||||
|
||||
ALL_FORWARD_OPS(SET_FORWARD_OPS_SVE)
|
||||
|
||||
#undef SET_OP_COMPARE_COLUMN_SVE
|
||||
#undef SET_OP_COMPARE_VAL_SVE
|
||||
#undef SET_OP_WITHIN_RANGE_COLUMN_SVE
|
||||
#undef SET_OP_WITHIN_RANGE_VAL_SVE
|
||||
#undef SET_ARITH_COMPARE_SVE
|
||||
#undef SET_FORWARD_OPS_SVE
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// neon ?
|
||||
{
|
||||
#define SET_OP_COMPARE_COLUMN_NEON(TTYPE, UTYPE, OP) \
|
||||
@ -600,11 +762,28 @@ init_dynamic_hook() {
|
||||
ALL_ARITH_CMP_OPS(SET_ARITH_COMPARE_NEON, float)
|
||||
ALL_ARITH_CMP_OPS(SET_ARITH_COMPARE_NEON, double)
|
||||
|
||||
#define SET_FORWARD_OPS_NEON(ELEMENTTYPE) \
|
||||
forward_op_and_##ELEMENTTYPE = \
|
||||
VectorizedNeon::template forward_op_and<ELEMENTTYPE>; \
|
||||
forward_op_and_multiple_##ELEMENTTYPE = \
|
||||
VectorizedNeon::template forward_op_and_multiple<ELEMENTTYPE>; \
|
||||
forward_op_or_##ELEMENTTYPE = \
|
||||
VectorizedNeon::template forward_op_or<ELEMENTTYPE>; \
|
||||
forward_op_or_multiple_##ELEMENTTYPE = \
|
||||
VectorizedNeon::template forward_op_or_multiple<ELEMENTTYPE>; \
|
||||
forward_op_xor_##ELEMENTTYPE = \
|
||||
VectorizedNeon::template forward_op_xor<ELEMENTTYPE>; \
|
||||
forward_op_sub_##ELEMENTTYPE = \
|
||||
VectorizedNeon::template forward_op_sub<ELEMENTTYPE>;
|
||||
|
||||
ALL_FORWARD_OPS(SET_FORWARD_OPS_NEON)
|
||||
|
||||
#undef SET_OP_COMPARE_COLUMN_NEON
|
||||
#undef SET_OP_COMPARE_VAL_NEON
|
||||
#undef SET_OP_WITHIN_RANGE_COLUMN_NEON
|
||||
#undef SET_OP_WITHIN_RANGE_VAL_NEON
|
||||
#undef SET_ARITH_COMPARE_NEON
|
||||
#undef SET_FORWARD_OPS_NEON
|
||||
|
||||
return;
|
||||
}
|
||||
@ -616,6 +795,7 @@ init_dynamic_hook() {
|
||||
#undef ALL_COMPARE_OPS
|
||||
#undef ALL_RANGE_OPS
|
||||
#undef ALL_ARITH_CMP_OPS
|
||||
#undef ALL_FORWARD_OPS
|
||||
|
||||
//
|
||||
static int init_dynamic_ = []() {
|
||||
|
@ -37,6 +37,11 @@ namespace dynamic {
|
||||
FUNC(float); \
|
||||
FUNC(double);
|
||||
|
||||
// a facility to run through all acceptable forward types
|
||||
#define ALL_FORWARD_TYPES_1(FUNC) \
|
||||
FUNC(uint8_t); \
|
||||
FUNC(uint64_t);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// the default implementation
|
||||
template <typename T, typename U, CompareOpType Op>
|
||||
@ -176,11 +181,125 @@ struct OpArithCompareImpl {
|
||||
|
||||
ALL_DATATYPES_1(DECLARE_PARTIAL_OP_ARITH_COMPARE)
|
||||
|
||||
//
|
||||
#undef DECLARE_PARTIAL_OP_ARITH_COMPARE
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// the default implementation
|
||||
template <typename ElementT>
|
||||
struct ForwardOpsImpl {
|
||||
static inline bool
|
||||
op_and(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_and_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_or(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_or_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_xor(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_sub(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
#define DECLARE_PARTIAL_FORWARD_OPS(ELEMENTTYPE) \
|
||||
template <> \
|
||||
struct ForwardOpsImpl<ELEMENTTYPE> { \
|
||||
static bool \
|
||||
op_and(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_and_multiple(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const* const rights, \
|
||||
const size_t start_left, \
|
||||
const size_t* const __restrict start_rights, \
|
||||
const size_t n_rights, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_or(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_or_multiple(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const* const rights, \
|
||||
const size_t start_left, \
|
||||
const size_t* const __restrict start_rights, \
|
||||
const size_t n_rights, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_sub(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_xor(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
};
|
||||
|
||||
ALL_FORWARD_TYPES_1(DECLARE_PARTIAL_FORWARD_OPS)
|
||||
|
||||
#undef DECLARE_PARTIAL_FORWARD_OPS
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#undef ALL_DATATYPES_1
|
||||
#undef ALL_FORWARD_TYPES_1
|
||||
|
||||
} // namespace dynamic
|
||||
|
||||
@ -248,6 +367,77 @@ struct VectorizedDynamic {
|
||||
return dynamic::OpArithCompareImpl<T, AOp, CmpOp>::op_arith_compare(
|
||||
bitmask, src, right_operand, value, size);
|
||||
}
|
||||
|
||||
// The following functions just forward parameters to the reference code,
|
||||
// generated for a particular platform.
|
||||
|
||||
template <typename ElementT>
|
||||
static inline bool
|
||||
forward_op_and(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return dynamic::ForwardOpsImpl<ElementT>::op_and(
|
||||
left, right, start_left, start_right, size);
|
||||
}
|
||||
|
||||
template <typename ElementT>
|
||||
static inline bool
|
||||
forward_op_and_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return dynamic::ForwardOpsImpl<ElementT>::op_and_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
}
|
||||
|
||||
template <typename ElementT>
|
||||
static inline bool
|
||||
forward_op_or(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return dynamic::ForwardOpsImpl<ElementT>::op_or(
|
||||
left, right, start_left, start_right, size);
|
||||
}
|
||||
|
||||
template <typename ElementT>
|
||||
static inline bool
|
||||
forward_op_or_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return dynamic::ForwardOpsImpl<ElementT>::op_or_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
}
|
||||
|
||||
template <typename ElementT>
|
||||
static inline bool
|
||||
forward_op_xor(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return dynamic::ForwardOpsImpl<ElementT>::op_xor(
|
||||
left, right, start_left, start_right, size);
|
||||
}
|
||||
|
||||
template <typename ElementT>
|
||||
static inline bool
|
||||
forward_op_sub(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return dynamic::ForwardOpsImpl<ElementT>::op_sub(
|
||||
left, right, start_left, start_right, size);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
@ -27,9 +27,13 @@ namespace bitset {
|
||||
namespace detail {
|
||||
|
||||
// The default reference vectorizer.
|
||||
// Its every function returns a boolean value whether a vectorized implementation
|
||||
// Functions return a boolean value whether a vectorized implementation
|
||||
// exists and was invoked. If not, then the caller code will use a default
|
||||
// non-vectorized implementation.
|
||||
// Certain functions just forward the parameters to the platform code. Basically,
|
||||
// sometimes compiler can do a good job on its own, we just need to make sure
|
||||
// that it uses available appropriate hardware instructions. No specialized
|
||||
// implementation is used under the hood.
|
||||
// The default vectorizer provides no vectorized implementation, forcing the
|
||||
// caller to use a defaut non-vectorized implementation every time.
|
||||
struct VectorizedRef {
|
||||
@ -88,6 +92,72 @@ struct VectorizedRef {
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// The following functions just forward parameters to the reference code,
|
||||
// generated for a particular platform.
|
||||
// The reference 'platform' is just a default platform.
|
||||
|
||||
template <typename ElementT>
|
||||
static inline bool
|
||||
forward_op_and(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename ElementT>
|
||||
static inline bool
|
||||
forward_op_and_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename ElementT>
|
||||
static inline bool
|
||||
forward_op_or(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename ElementT>
|
||||
static inline bool
|
||||
forward_op_or_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename ElementT>
|
||||
static inline bool
|
||||
forward_op_xor(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename ElementT>
|
||||
static inline bool
|
||||
forward_op_sub(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
@ -39,6 +39,11 @@ namespace avx2 {
|
||||
FUNC(float); \
|
||||
FUNC(double);
|
||||
|
||||
// a facility to run through all acceptable forward types
|
||||
#define ALL_FORWARD_TYPES_1(FUNC) \
|
||||
FUNC(uint8_t); \
|
||||
FUNC(uint64_t);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// the default implementation does nothing
|
||||
@ -192,7 +197,122 @@ ALL_DATATYPES_1(DECLARE_PARTIAL_OP_ARITH_COMPARE)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// forward ops
|
||||
template <typename ElementT>
|
||||
struct ForwardOpsImpl {
|
||||
static inline bool
|
||||
op_and(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_and_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_or(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_or_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_xor(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_sub(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
#define DECLARE_PARTIAL_FORWARD_OPS(ELEMENTTYPE) \
|
||||
template <> \
|
||||
struct ForwardOpsImpl<ELEMENTTYPE> { \
|
||||
static bool \
|
||||
op_and(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_and_multiple(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const* const rights, \
|
||||
const size_t start_left, \
|
||||
const size_t* const __restrict start_rights, \
|
||||
const size_t n_rights, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_or(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_or_multiple(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const* const rights, \
|
||||
const size_t start_left, \
|
||||
const size_t* const __restrict start_rights, \
|
||||
const size_t n_rights, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_sub(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_xor(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
};
|
||||
|
||||
ALL_FORWARD_TYPES_1(DECLARE_PARTIAL_FORWARD_OPS)
|
||||
|
||||
#undef DECLARE_PARTIAL_FORWARD_OPS
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#undef ALL_DATATYPES_1
|
||||
#undef ALL_FORWARD_TYPES_1
|
||||
|
||||
} // namespace avx2
|
||||
} // namespace x86
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "avx2-decl.h"
|
||||
|
||||
#include "bitset/common.h"
|
||||
#include "bitset/detail/element_wise.h"
|
||||
#include "common.h"
|
||||
|
||||
namespace milvus {
|
||||
@ -1649,6 +1650,151 @@ OpArithCompareImpl<double, AOp, CmpOp>::op_arith_compare(
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// forward ops
|
||||
|
||||
//
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_and(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_and(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_and_multiple(
|
||||
uint8_t* const left,
|
||||
const uint8_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_and_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_or(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_or(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_or_multiple(
|
||||
uint8_t* const left,
|
||||
const uint8_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_or_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_xor(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_xor(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_sub(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_sub(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_and(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_and(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_and_multiple(
|
||||
uint64_t* const left,
|
||||
const uint64_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_and_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_or(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_or(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_or_multiple(
|
||||
uint64_t* const left,
|
||||
const uint64_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_or_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_xor(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_xor(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_sub(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_sub(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
} // namespace avx2
|
||||
|
@ -55,6 +55,30 @@ struct VectorizedAvx2 {
|
||||
template <typename T, ArithOpType AOp, CompareOpType CmpOp>
|
||||
static constexpr inline auto op_arith_compare =
|
||||
avx2::OpArithCompareImpl<T, AOp, CmpOp>::op_arith_compare;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_and =
|
||||
avx2::ForwardOpsImpl<ElementT>::op_and;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_and_multiple =
|
||||
avx2::ForwardOpsImpl<ElementT>::op_and_multiple;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_or =
|
||||
avx2::ForwardOpsImpl<ElementT>::op_or;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_or_multiple =
|
||||
avx2::ForwardOpsImpl<ElementT>::op_or_multiple;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_xor =
|
||||
avx2::ForwardOpsImpl<ElementT>::op_xor;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_sub =
|
||||
avx2::ForwardOpsImpl<ElementT>::op_sub;
|
||||
};
|
||||
|
||||
} // namespace x86
|
||||
|
@ -39,6 +39,11 @@ namespace avx512 {
|
||||
FUNC(float); \
|
||||
FUNC(double);
|
||||
|
||||
// a facility to run through all acceptable forward types
|
||||
#define ALL_FORWARD_TYPES_1(FUNC) \
|
||||
FUNC(uint8_t); \
|
||||
FUNC(uint64_t);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// the default implementation does nothing
|
||||
@ -192,7 +197,122 @@ ALL_DATATYPES_1(DECLARE_PARTIAL_OP_ARITH_COMPARE)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// forward ops
|
||||
template <typename ElementT>
|
||||
struct ForwardOpsImpl {
|
||||
static inline bool
|
||||
op_and(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_and_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_or(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_or_multiple(ElementT* const left,
|
||||
const ElementT* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_xor(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
op_sub(ElementT* const left,
|
||||
const ElementT* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
#define DECLARE_PARTIAL_FORWARD_OPS(ELEMENTTYPE) \
|
||||
template <> \
|
||||
struct ForwardOpsImpl<ELEMENTTYPE> { \
|
||||
static bool \
|
||||
op_and(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_and_multiple(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const* const rights, \
|
||||
const size_t start_left, \
|
||||
const size_t* const __restrict start_rights, \
|
||||
const size_t n_rights, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_or(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_or_multiple(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const* const rights, \
|
||||
const size_t start_left, \
|
||||
const size_t* const __restrict start_rights, \
|
||||
const size_t n_rights, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_sub(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
\
|
||||
static bool \
|
||||
op_xor(ELEMENTTYPE* const left, \
|
||||
const ELEMENTTYPE* const right, \
|
||||
const size_t start_left, \
|
||||
const size_t start_right, \
|
||||
const size_t size); \
|
||||
};
|
||||
|
||||
ALL_FORWARD_TYPES_1(DECLARE_PARTIAL_FORWARD_OPS)
|
||||
|
||||
#undef DECLARE_PARTIAL_FORWARD_OPS
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#undef ALL_DATATYPES_1
|
||||
#undef ALL_FORWARD_TYPES_1
|
||||
|
||||
} // namespace avx512
|
||||
} // namespace x86
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "avx512-decl.h"
|
||||
|
||||
#include "bitset/common.h"
|
||||
#include "bitset/detail/element_wise.h"
|
||||
#include "common.h"
|
||||
|
||||
namespace milvus {
|
||||
@ -1871,6 +1872,151 @@ OpArithCompareImpl<double, AOp, CmpOp>::op_arith_compare(
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// forward ops
|
||||
|
||||
//
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_and(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_and(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_and_multiple(
|
||||
uint8_t* const left,
|
||||
const uint8_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_and_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_or(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_or(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_or_multiple(
|
||||
uint8_t* const left,
|
||||
const uint8_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_or_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_xor(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_xor(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint8_t>::op_sub(uint8_t* const left,
|
||||
const uint8_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint8_t>::op_sub(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
//
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_and(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_and(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_and_multiple(
|
||||
uint64_t* const left,
|
||||
const uint64_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_and_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_or(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_or(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_or_multiple(
|
||||
uint64_t* const left,
|
||||
const uint64_t* const* const rights,
|
||||
const size_t start_left,
|
||||
const size_t* const __restrict start_rights,
|
||||
const size_t n_rights,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_or_multiple(
|
||||
left, rights, start_left, start_rights, n_rights, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_xor(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_xor(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
ForwardOpsImpl<uint64_t>::op_sub(uint64_t* const left,
|
||||
const uint64_t* const right,
|
||||
const size_t start_left,
|
||||
const size_t start_right,
|
||||
const size_t size) {
|
||||
ElementWiseBitsetPolicy<uint64_t>::op_sub(
|
||||
left, right, start_left, start_right, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
} // namespace avx512
|
||||
|
@ -55,6 +55,30 @@ struct VectorizedAvx512 {
|
||||
template <typename T, ArithOpType AOp, CompareOpType CmpOp>
|
||||
static constexpr inline auto op_arith_compare =
|
||||
avx512::OpArithCompareImpl<T, AOp, CmpOp>::op_arith_compare;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_and =
|
||||
avx512::ForwardOpsImpl<ElementT>::op_and;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_and_multiple =
|
||||
avx512::ForwardOpsImpl<ElementT>::op_and_multiple;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_or =
|
||||
avx512::ForwardOpsImpl<ElementT>::op_or;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_or_multiple =
|
||||
avx512::ForwardOpsImpl<ElementT>::op_or_multiple;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_xor =
|
||||
avx512::ForwardOpsImpl<ElementT>::op_xor;
|
||||
|
||||
template <typename ElementT>
|
||||
static constexpr inline auto forward_op_sub =
|
||||
avx512::ForwardOpsImpl<ElementT>::op_sub;
|
||||
};
|
||||
|
||||
} // namespace x86
|
||||
|
@ -23,14 +23,13 @@ namespace detail {
|
||||
template <typename PolicyT>
|
||||
struct ConstProxy {
|
||||
using policy_type = PolicyT;
|
||||
using size_type = typename policy_type::size_type;
|
||||
using data_type = typename policy_type::data_type;
|
||||
using self_type = ConstProxy;
|
||||
|
||||
const data_type& element;
|
||||
data_type mask;
|
||||
|
||||
inline ConstProxy(const data_type& _element, const size_type _shift)
|
||||
inline ConstProxy(const data_type& _element, const size_t _shift)
|
||||
: element{_element} {
|
||||
mask = (data_type(1) << _shift);
|
||||
}
|
||||
@ -47,15 +46,13 @@ struct ConstProxy {
|
||||
template <typename PolicyT>
|
||||
struct Proxy {
|
||||
using policy_type = PolicyT;
|
||||
using size_type = typename policy_type::size_type;
|
||||
using data_type = typename policy_type::data_type;
|
||||
using self_type = Proxy;
|
||||
|
||||
data_type& element;
|
||||
data_type mask;
|
||||
|
||||
inline Proxy(data_type& _element, const size_type _shift)
|
||||
: element{_element} {
|
||||
inline Proxy(data_type& _element, const size_t _shift) : element{_element} {
|
||||
mask = (data_type(1) << _shift);
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user