AVX-512 enabled bloom filter
From
Bonita Montero@3:633/10 to
All on Thu Apr 30 09:03:09 2026
This is a AVX-512 enabled bloom filter.
#include <array>
#include <string_view>
#include <span>
#include <bit>
#include <vector>
#include <atomic>
#include <algorithm>
#include <random>
#include <cassert>
#if defined(_MSC_VER) && defined(_M_X64)
#include <intrin.h>
#elif (defined(__GNUC__) || defined(__clang__)) && defined(__x86_64__)
#include <x86intrin.h>
#endif
#include "inline.hpp"
#include "unroll.hpp"
#include "pct.hpp"
#include "cpuid.hpp"
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable: 26495)
#elif defined(__GNUC__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif
#if defined(_M_X64) || defined(__AVX512F__) && defined(__AVX512DQ__)
#define FNV_BLOOM_AVX512
#endif
template<size_t MaxHashes, bool Atomic = false>
struct fnv_bloom
{
static constexpr float DefaultFpr = (float)1.0_percent;
using ranges = std::span<const std::string_view>;
struct match_result { bool matched; size_t hash; };
fnv_bloom();
fnv_bloom( size_t nKeys, size_t nHashes = MaxHashes, float fpr = DefaultFpr );
fnv_bloom( const fnv_bloom & ) = default;
fnv_bloom( fnv_bloom && ) noexcept;
fnv_bloom &operator =( const fnv_bloom & ) = default;
fnv_bloom &operator =( fnv_bloom && ) noexcept;
size_t add( std::string_view rng ) noexcept;
size_t add( ranges rngs ) noexcept;
match_result match( std::string_view rng ) const noexcept;
match_result match( ranges rngs ) const noexcept;
void resize( size_t nKeys, size_t nHashes = MaxHashes, float fpr = DefaultFpr );
float usage() const noexcept;
size_t n_keys() const noexcept;
size_t n_bits() const noexcept;
double fpr() const noexcept;
static size_t n_bits( size_t nKeys, size_t nHashes = MaxHashes, float fpr = DefaultFpr ) noexcept;
static float fpr( size_t nBits, size_t nKeys, size_t nHashes ) noexcept;
private:
static constexpr size_t
StSize = sizeof( size_t ),
StBits = StSize * 8;
static constexpr bool SixtyFour = StSize == 8;
static_assert( StSize == 4 || SixtyFour );
static constexpr size_t Prime = (size_t)(SixtyFour ? 0x00000100000001b3
: 0x01000193);
using add_fn = size_t (fnv_bloom<MaxHashes, Atomic>::*)( ranges ) noexcept;
using match_fn = match_result (fnv_bloom<MaxHashes, Atomic>::*)( ranges
) const noexcept;
using bitmap_word = std::conditional_t<Atomic, std::atomic<size_t>, size_t>;
std::vector<bitmap_word> m_bits;
size_t
m_iFn,
m_nKeys;
template<size_t NHashes, bool Enhanced = false>
size_t addRanges( ranges rngs ) noexcept;
template<size_t NHashes, bool Enhanced = false>
match_result matchRanges( ranges rngs ) const noexcept;
template<size_t NHashes, bool Enhanced= false>
static void makeHashes( std::array<size_t, NHashes> &hashes, ranges rngs ) noexcept;
inline static struct init
{
init() noexcept;
std::array<add_fn, MaxHashes> m_addFns;
std::array<match_fn, MaxHashes> m_matchFns;
std::array<size_t, MaxHashes> m_inits;
#if defined(FNV_BLOOM_AVX512)
static constexpr size_t NAvx = (MaxHashes + 7) / 8;
alignas( 64 ) std::array<__m512i, NAvx> m_initChunks;
#endif
} global;
};
template<size_t MaxHashes, bool Atomic>
fnv_bloom<MaxHashes, Atomic>::fnv_bloom() :
m_bits(),
m_iFn( 0 ),
m_nKeys( 0 )
{
}
template<size_t MaxHashes, bool Atomic>
fnv_bloom<MaxHashes, Atomic>::fnv_bloom( fnv_bloom &&other ) noexcept
{
using namespace std;
m_bits = move( other.m_bits );
m_iFn = other.m_iFn;
other.m_nKeys = 0;
}
template<size_t MaxHashes, bool Atomic>
auto fnv_bloom<MaxHashes, Atomic>::operator =( fnv_bloom &&other )
noexcept -> fnv_bloom &
{
using namespace std;
m_bits = move( other.m_bits );
m_iFn = other.m_iFn;
other.m_nKeys = 0;
return *this;
}
template<size_t MaxHashes, bool Atomic>
fnv_bloom<MaxHashes, Atomic>::fnv_bloom( size_t nKeys, size_t nHashes,
float fpr )
{
resize( nKeys, nHashes, fpr );
}
template<size_t MaxHashes, bool Atomic>
FORCEINLINE size_t fnv_bloom<MaxHashes, Atomic>::add( std::string_view
rng ) noexcept
{
return add( ranges( &rng, 1 ) );
}
template<size_t MaxHashes, bool Atomic>
FORCEINLINE size_t fnv_bloom<MaxHashes, Atomic>::add( ranges rngs ) noexcept
{
return (this->*global.m_addFns[m_iFn])( rngs );
}
template<size_t MaxHashes, bool Atomic>
FORCEINLINE auto fnv_bloom<MaxHashes, Atomic>::match( std::string_view
rng ) const noexcept -> match_result
{
return match( ranges( &rng, 1 ) );
}
template<size_t MaxHashes, bool Atomic>
FORCEINLINE auto fnv_bloom<MaxHashes, Atomic>::match( ranges rngs )
const noexcept -> match_result
{
return (this->*global.m_matchFns[m_iFn])( rngs );
}
template<size_t MaxHashes, bool Atomic>
void fnv_bloom<MaxHashes, Atomic>::resize( size_t nKeys, size_t nHashes,
float fpr )
{
using namespace std;
// resize bitmap, thereby erasing everything
size_t nBits = n_bits( nKeys, nHashes, fpr );
if( m_bits.size() * StBits != nBits )
m_bits = vector<bitmap_word>( nBits / StBits );
else
if constexpr( Atomic )
for( size_t a = nBits / StBits; a--; )
m_bits[a].store( 0, memory_order_relaxed );
else
fill( m_bits.begin(), m_bits.end(), 0 );
m_iFn = (nHashes <= MaxHashes ? nHashes : MaxHashes) - 1;
m_nKeys = 0;
}
template<size_t MaxHashes, bool Atomic>
float fnv_bloom<MaxHashes, Atomic>::usage() const noexcept
{
using namespace std;
// return the number of set one-bits
size_t nSet = 0;
for( const bitmap_word &word : m_bits )
if constexpr( Atomic )
nSet += popcount( word.load( memory_order_relaxed ) );
else
nSet += popcount( word );
return (float)((double)nSet / (double)n_bits());
}
template<size_t MaxHashes, bool Atomic>
double fnv_bloom<MaxHashes, Atomic>::fpr() const noexcept
{
return fpr( n_bits(), m_nKeys, m_iFn + 1 );
}
template<size_t MaxHashes, bool Atomic>
inline size_t fnv_bloom<MaxHashes, Atomic>::n_keys() const noexcept
{
return m_nKeys;
}
template<size_t MaxHashes, bool Atomic>
inline size_t fnv_bloom<MaxHashes, Atomic>::n_bits() const noexcept
{
return m_bits.size() * StBits;
}
template<size_t MaxHashes, bool Atomic>
size_t fnv_bloom<MaxHashes, Atomic>::n_bits( size_t nKeys, size_t
nHashes, float fpr ) noexcept
{
using namespace std;
// calculate the number of necessary bits according to n keys, n hashes
and false positive rate
if( !nKeys )
return 0;
static constexpr float MinFpr = (float)1.0_permill;
constexpr size_t MaxBits = numeric_limits<ptrdiff_t>::min();
fpr = !isnan( fpr ) && !isinf( fpr ) ? fpr : DefaultFpr;
fpr = max( min( fpr, 1.0f ), MinFpr );
nHashes += (size_t)!nHashes;
double
dKeys = (double)nKeys,
dHashes = (double)nHashes,
dBits;
dBits = 1.0 - pow( fpr, 1.0 / dHashes );
dBits = 1.0 - pow( dBits, 1.0 / (dHashes * dKeys) );
dBits = 1.0 / dBits;
assert(dBits >= 0.0);
size_t nBits = (size_t)min( dBits, (double)MaxBits );
return bit_ceil( max( nBits, StBits ) );
}
template<size_t MaxHashes, bool Atomic>
float fnv_bloom<MaxHashes, Atomic>::fpr( size_t nBits, size_t nKeys,
size_t nHashes ) noexcept
{
using namespace std;
// calculate false positive rate given for a bits in bitmap, b keys and
c hashes
constexpr size_t MaxBits = numeric_limits<ptrdiff_t>::min();
if( nBits < MaxBits )
nBits = bit_ceil( nBits );
else
nBits = MaxBits;
double
dBits = (double)nBits,
dKeys = (double)nKeys,
dHashes = (double)nHashes,
fpr;
fpr = 1.0 - 1.0 / dBits;
fpr = 1.0 - pow( fpr, dHashes * dKeys );
return (float)pow( fpr, dHashes );
}
template<size_t MaxHashes, bool Atomic>
template<size_t NHashes, bool Enhanced>
size_t fnv_bloom<MaxHashes, Atomic>::addRanges( ranges rngs ) noexcept
{
using namespace std;
size_t hMask = n_bits() - 1;
array<size_t, NHashes> hashes;
// calculate hashes
makeHashes<NHashes, Enhanced>( hashes, rngs );
unroll<NHashes>( [&]<size_t H>() L_FORCEINLINE
{
// set bits at position hashes[H]
size_t
bit = hashes[H] & hMask,
mask = 1ull << (bit % StBits);
if constexpr( Atomic )
m_bits[bit / StBits].fetch_or( mask, memory_order_relaxed );
else
m_bits[bit / StBits] |= mask;
} );
++m_nKeys;
size_t hash = hashes[0];
unroll<NHashes - 1>( [&]<size_t H>() L_FORCEINLINE { hash ^= hashes[H +
1]; } );
return hash;
}
template<size_t MaxHashes, bool Atomic>
template<size_t NHashes, bool Enhanced>
auto fnv_bloom<MaxHashes, Atomic>::matchRanges( ranges rngs ) const
noexcept -> match_result
{
using namespace std;
size_t hMask = n_bits() - 1;
array<size_t, NHashes> hashes;
// calculate hashes
makeHashes<NHashes, Enhanced>( hashes, rngs );
bool matched = unroll<NHashes>( [&]<size_t H>() L_FORCEINLINE
{
// match bit at position hashes[H]
size_t
bit = hashes[H] & hMask,
mask = (size_t)1 << (bit % StBits),
word;
if constexpr( Atomic )
word = m_bits[bit / StBits].load( memory_order_relaxed );
else
word = m_bits[bit / StBits];
return (bool)(word & mask);
} );
if( !matched )
return match_result( false, 0 );
// return single hash XORed from hashes[N] for further usage
size_t hash = hashes[0];
unroll<NHashes - 1>( [&]<size_t H>() L_FORCEINLINE { hash ^= hashes[H +
1]; } );
return match_result( true, hash );
}
template<size_t MaxHashes, bool Atomic>
template<size_t NHashes, bool Enhanced>
FORCEINLINE void fnv_bloom<MaxHashes, Atomic>::makeHashes(
std::array<size_t, NHashes> &hashes, ranges rngs ) noexcept
{
using namespace std;
if constexpr( !Enhanced )
{
// initialize hashes with starting value
unroll<NHashes>( [&]<size_t H>() L_FORCEINLINE { hashes[H] = global.m_inits[H]; } );
for( string_view rng : rngs )
for( unsigned char c : rng )
// calculate NHashes time hash[H] interleaved
unroll<NHashes>( [&]<size_t H>() L_FORCEINLINE { hashes[H] =
(hashes[H] ^ c) * Prime; } );
}
else
{
#if defined(FNV_BLOOM_AVX512)
// round number of hashes up to the next avx-boundary (eight times a
qword)
constexpr size_t NHashesAvx = (NHashes + 7) / 8;
// have as many hash-chunks, although less hashes may be evaluated
array<__m512i, NHashesAvx> hashChunks;
// initialize hashes with starting value
unroll<NHashesAvx>( [&]<size_t IAvx>() L_FORCEINLINE { hashChunks[IAvx] = global.m_initChunks[IAvx]; } );
// eight times the FNV prime
__m512i primeChunk = _mm512_set1_epi64( Prime );
for( string_view rng : rngs )
for( unsigned char c : rng )
{
// broadcast current byte in to lower bytes of eight * qword
__m512i broadcasted = _mm512_cvtepu8_epi64( _mm_set1_epi8( c ) );
unroll<init::NAvx>( [&]<size_t IAvx>() L_FORCEINLINE
{
// (eight times hash ^ eight times same byte) * prime
hashChunks[IAvx] = _mm512_xor_epi64( hashChunks[IAvx], broadcasted );
hashChunks[IAvx] = _mm512_mullo_epi64( hashChunks[IAvx], primeChunk );
// split up to eight hashes into hashes[IQw]
unroll<8>( [&]<size_t IQw>() L_FORCEINLINE
{
// hash index
constexpr size_t IHash = 8 * IAvx + IQw;
// do nothing is hash beyond the unrounded number of hahses
if constexpr( IHash >= NHashes )
return;
__m512i
// broadcast IQw to all qwords
iQw2All = _mm512_set1_epi64( IQw ),
// mirror qword IQw hash chunk to all qwords
qwI2All = _mm512_permutexvar_epi64( iQw2All, hashChunks[IAvx] );
// extract qword IQw to hash IHash
hashes[IHash] = _mm_cvtsi128_si64( _mm512_castsi512_si128(
qwI2All ) );
} );
} );
}
#endif
}
}
template<size_t MaxHashes, bool Atomic>
fnv_bloom<MaxHashes, Atomic>::init::init() noexcept
{
using namespace std;
bool enhanced = false;
#if defined(FNV_BLOOM_AVX512)
// CPUID Leaf vor AVX-512
constexpr unsigned LeafAvx512 = 0x7;
// registers
array<unsigned, 4> regs;
// has AVX-412 leaf
if( cpuid_x86<0>( 0, regs ) >= LeafAvx512 )
{
// get AVX-512 flags in EBX
cpuid_x86<LeafAvx512>( 0, regs );
// test of AVX-512 and AVX-512DQ are available
constexpr unsigned
Avx512F = 1u << 16,
Avx512DQ = 1u << 17,
Requested = Avx512F | Avx512DQ;
enhanced = (bool)(regs[1] & Requested);
}
#endif
// install MaxHashes add- and match-functionss in the table
unroll<MaxHashes>( [&]<size_t IHasher>() L_FORCEINLINE
{
constexpr size_t NHashes = IHasher + 1;
if( enhanced )
{
m_addFns[IHasher] = &fnv_bloom::addRanges<NHashes, true>;
m_matchFns[IHasher] = &fnv_bloom::matchRanges<NHashes, true>;
}
else
{
m_addFns[IHasher] = &fnv_bloom::addRanges<NHashes, false>;
m_matchFns[IHasher] = &fnv_bloom::matchRanges<NHashes, false>;
}
} );
// intial value for FNV qwords
constexpr size_t Base = (size_t)(SixtyFour ? 0xcbf29ce484222325 : 0x811c9dc5);
mt19937 mt;
// calculate hashes like originated from a byte-stream
for( size_t &hash : m_inits )
{
hash = Base;
for( size_t b = StSize; b; --b )
hash = (hash ^ (unsigned char)mt()) * Prime;
}
#if defined(FNV_BLOOM_AVX512)
// rounded up intial value for FNV qwords as AVX-512 qwords
for( __m512i &chunk : m_initChunks )
{
// eight hashes per chunk
array<uint64_t, 8> hashes;
for( uint64_t &hash : hashes )
{
hash = Base;
for( unsigned b = 0; b < 8; ++b )
hash = (hash ^ (unsigned char)mt()) * Prime;
}
// fill AVX-512 chunk with eight hashes calculated recently
for( size_t b = 0; b < 8; ++b )
chunk = _mm512_mask_set1_epi64( chunk, 1u << b, hashes[b] );
}
#endif
}
#if defined(_MSC_VER)
#pragma warning(pop)
#elif defined(__GNUC__)
#pragma GCC diagnostic pop
#endif
--- PyGate Linux v1.5.14
* Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)