• AVX-512 enabled bloom filter

    From Bonita Montero@3:633/10 to All on Thu Apr 30 09:03:09 2026
    This is a AVX-512 enabled bloom filter.

    #include <array>
    #include <string_view>
    #include <span>
    #include <bit>
    #include <vector>
    #include <atomic>
    #include <algorithm>
    #include <random>
    #include <cassert>
    #if defined(_MSC_VER) && defined(_M_X64)
    #include <intrin.h>
    #elif (defined(__GNUC__) || defined(__clang__)) && defined(__x86_64__)
    #include <x86intrin.h>
    #endif
    #include "inline.hpp"
    #include "unroll.hpp"
    #include "pct.hpp"
    #include "cpuid.hpp"

    #if defined(_MSC_VER)
    #pragma warning(push)
    #pragma warning(disable: 26495)
    #elif defined(__GNUC__)
    #pragma GCC diagnostic push
    #pragma GCC diagnostic ignored "-Wignored-attributes"
    #endif

    #if defined(_M_X64) || defined(__AVX512F__) && defined(__AVX512DQ__)
    #define FNV_BLOOM_AVX512
    #endif

    template<size_t MaxHashes, bool Atomic = false>
    struct fnv_bloom
    {
    static constexpr float DefaultFpr = (float)1.0_percent;
    using ranges = std::span<const std::string_view>;
    struct match_result { bool matched; size_t hash; };
    fnv_bloom();
    fnv_bloom( size_t nKeys, size_t nHashes = MaxHashes, float fpr = DefaultFpr );
    fnv_bloom( const fnv_bloom & ) = default;
    fnv_bloom( fnv_bloom && ) noexcept;
    fnv_bloom &operator =( const fnv_bloom & ) = default;
    fnv_bloom &operator =( fnv_bloom && ) noexcept;
    size_t add( std::string_view rng ) noexcept;
    size_t add( ranges rngs ) noexcept;
    match_result match( std::string_view rng ) const noexcept;
    match_result match( ranges rngs ) const noexcept;
    void resize( size_t nKeys, size_t nHashes = MaxHashes, float fpr = DefaultFpr );
    float usage() const noexcept;
    size_t n_keys() const noexcept;
    size_t n_bits() const noexcept;
    double fpr() const noexcept;
    static size_t n_bits( size_t nKeys, size_t nHashes = MaxHashes, float fpr = DefaultFpr ) noexcept;
    static float fpr( size_t nBits, size_t nKeys, size_t nHashes ) noexcept;
    private:
    static constexpr size_t
    StSize = sizeof( size_t ),
    StBits = StSize * 8;
    static constexpr bool SixtyFour = StSize == 8;
    static_assert( StSize == 4 || SixtyFour );
    static constexpr size_t Prime = (size_t)(SixtyFour ? 0x00000100000001b3
    : 0x01000193);
    using add_fn = size_t (fnv_bloom<MaxHashes, Atomic>::*)( ranges ) noexcept;
    using match_fn = match_result (fnv_bloom<MaxHashes, Atomic>::*)( ranges
    ) const noexcept;
    using bitmap_word = std::conditional_t<Atomic, std::atomic<size_t>, size_t>;
    std::vector<bitmap_word> m_bits;
    size_t
    m_iFn,
    m_nKeys;
    template<size_t NHashes, bool Enhanced = false>
    size_t addRanges( ranges rngs ) noexcept;
    template<size_t NHashes, bool Enhanced = false>
    match_result matchRanges( ranges rngs ) const noexcept;
    template<size_t NHashes, bool Enhanced= false>
    static void makeHashes( std::array<size_t, NHashes> &hashes, ranges rngs ) noexcept;
    inline static struct init
    {
    init() noexcept;
    std::array<add_fn, MaxHashes> m_addFns;
    std::array<match_fn, MaxHashes> m_matchFns;
    std::array<size_t, MaxHashes> m_inits;
    #if defined(FNV_BLOOM_AVX512)
    static constexpr size_t NAvx = (MaxHashes + 7) / 8;
    alignas( 64 ) std::array<__m512i, NAvx> m_initChunks;
    #endif
    } global;
    };

    template<size_t MaxHashes, bool Atomic>
    fnv_bloom<MaxHashes, Atomic>::fnv_bloom() :
    m_bits(),
    m_iFn( 0 ),
    m_nKeys( 0 )
    {
    }

    template<size_t MaxHashes, bool Atomic>
    fnv_bloom<MaxHashes, Atomic>::fnv_bloom( fnv_bloom &&other ) noexcept
    {
    using namespace std;
    m_bits = move( other.m_bits );
    m_iFn = other.m_iFn;
    other.m_nKeys = 0;
    }

    template<size_t MaxHashes, bool Atomic>
    auto fnv_bloom<MaxHashes, Atomic>::operator =( fnv_bloom &&other )
    noexcept -> fnv_bloom &
    {
    using namespace std;
    m_bits = move( other.m_bits );
    m_iFn = other.m_iFn;
    other.m_nKeys = 0;
    return *this;
    }

    template<size_t MaxHashes, bool Atomic>
    fnv_bloom<MaxHashes, Atomic>::fnv_bloom( size_t nKeys, size_t nHashes,
    float fpr )
    {
    resize( nKeys, nHashes, fpr );
    }

    template<size_t MaxHashes, bool Atomic>
    FORCEINLINE size_t fnv_bloom<MaxHashes, Atomic>::add( std::string_view
    rng ) noexcept
    {
    return add( ranges( &rng, 1 ) );
    }

    template<size_t MaxHashes, bool Atomic>
    FORCEINLINE size_t fnv_bloom<MaxHashes, Atomic>::add( ranges rngs ) noexcept
    {

    return (this->*global.m_addFns[m_iFn])( rngs );
    }

    template<size_t MaxHashes, bool Atomic>
    FORCEINLINE auto fnv_bloom<MaxHashes, Atomic>::match( std::string_view
    rng ) const noexcept -> match_result
    {
    return match( ranges( &rng, 1 ) );
    }

    template<size_t MaxHashes, bool Atomic>
    FORCEINLINE auto fnv_bloom<MaxHashes, Atomic>::match( ranges rngs )
    const noexcept -> match_result
    {
    return (this->*global.m_matchFns[m_iFn])( rngs );
    }

    template<size_t MaxHashes, bool Atomic>
    void fnv_bloom<MaxHashes, Atomic>::resize( size_t nKeys, size_t nHashes,
    float fpr )
    {
    using namespace std;
    // resize bitmap, thereby erasing everything
    size_t nBits = n_bits( nKeys, nHashes, fpr );
    if( m_bits.size() * StBits != nBits )
    m_bits = vector<bitmap_word>( nBits / StBits );
    else
    if constexpr( Atomic )
    for( size_t a = nBits / StBits; a--; )
    m_bits[a].store( 0, memory_order_relaxed );
    else
    fill( m_bits.begin(), m_bits.end(), 0 );
    m_iFn = (nHashes <= MaxHashes ? nHashes : MaxHashes) - 1;
    m_nKeys = 0;
    }

    template<size_t MaxHashes, bool Atomic>
    float fnv_bloom<MaxHashes, Atomic>::usage() const noexcept
    {
    using namespace std;
    // return the number of set one-bits
    size_t nSet = 0;
    for( const bitmap_word &word : m_bits )
    if constexpr( Atomic )
    nSet += popcount( word.load( memory_order_relaxed ) );
    else
    nSet += popcount( word );
    return (float)((double)nSet / (double)n_bits());
    }

    template<size_t MaxHashes, bool Atomic>
    double fnv_bloom<MaxHashes, Atomic>::fpr() const noexcept
    {
    return fpr( n_bits(), m_nKeys, m_iFn + 1 );
    }

    template<size_t MaxHashes, bool Atomic>
    inline size_t fnv_bloom<MaxHashes, Atomic>::n_keys() const noexcept
    {
    return m_nKeys;
    }

    template<size_t MaxHashes, bool Atomic>
    inline size_t fnv_bloom<MaxHashes, Atomic>::n_bits() const noexcept
    {
    return m_bits.size() * StBits;
    }

    template<size_t MaxHashes, bool Atomic>
    size_t fnv_bloom<MaxHashes, Atomic>::n_bits( size_t nKeys, size_t
    nHashes, float fpr ) noexcept
    {
    using namespace std;
    // calculate the number of necessary bits according to n keys, n hashes
    and false positive rate
    if( !nKeys )
    return 0;
    static constexpr float MinFpr = (float)1.0_permill;
    constexpr size_t MaxBits = numeric_limits<ptrdiff_t>::min();
    fpr = !isnan( fpr ) && !isinf( fpr ) ? fpr : DefaultFpr;
    fpr = max( min( fpr, 1.0f ), MinFpr );
    nHashes += (size_t)!nHashes;
    double
    dKeys = (double)nKeys,
    dHashes = (double)nHashes,
    dBits;
    dBits = 1.0 - pow( fpr, 1.0 / dHashes );
    dBits = 1.0 - pow( dBits, 1.0 / (dHashes * dKeys) );
    dBits = 1.0 / dBits;
    assert(dBits >= 0.0);
    size_t nBits = (size_t)min( dBits, (double)MaxBits );
    return bit_ceil( max( nBits, StBits ) );
    }

    template<size_t MaxHashes, bool Atomic>
    float fnv_bloom<MaxHashes, Atomic>::fpr( size_t nBits, size_t nKeys,
    size_t nHashes ) noexcept
    {
    using namespace std;
    // calculate false positive rate given for a bits in bitmap, b keys and
    c hashes
    constexpr size_t MaxBits = numeric_limits<ptrdiff_t>::min();
    if( nBits < MaxBits )
    nBits = bit_ceil( nBits );
    else
    nBits = MaxBits;
    double
    dBits = (double)nBits,
    dKeys = (double)nKeys,
    dHashes = (double)nHashes,
    fpr;
    fpr = 1.0 - 1.0 / dBits;
    fpr = 1.0 - pow( fpr, dHashes * dKeys );
    return (float)pow( fpr, dHashes );
    }

    template<size_t MaxHashes, bool Atomic>
    template<size_t NHashes, bool Enhanced>
    size_t fnv_bloom<MaxHashes, Atomic>::addRanges( ranges rngs ) noexcept
    {
    using namespace std;
    size_t hMask = n_bits() - 1;
    array<size_t, NHashes> hashes;
    // calculate hashes
    makeHashes<NHashes, Enhanced>( hashes, rngs );
    unroll<NHashes>( [&]<size_t H>() L_FORCEINLINE
    {
    // set bits at position hashes[H]
    size_t
    bit = hashes[H] & hMask,
    mask = 1ull << (bit % StBits);
    if constexpr( Atomic )
    m_bits[bit / StBits].fetch_or( mask, memory_order_relaxed );
    else
    m_bits[bit / StBits] |= mask;
    } );
    ++m_nKeys;
    size_t hash = hashes[0];
    unroll<NHashes - 1>( [&]<size_t H>() L_FORCEINLINE { hash ^= hashes[H +
    1]; } );
    return hash;
    }

    template<size_t MaxHashes, bool Atomic>
    template<size_t NHashes, bool Enhanced>
    auto fnv_bloom<MaxHashes, Atomic>::matchRanges( ranges rngs ) const
    noexcept -> match_result
    {
    using namespace std;
    size_t hMask = n_bits() - 1;
    array<size_t, NHashes> hashes;
    // calculate hashes
    makeHashes<NHashes, Enhanced>( hashes, rngs );
    bool matched = unroll<NHashes>( [&]<size_t H>() L_FORCEINLINE
    {
    // match bit at position hashes[H]
    size_t
    bit = hashes[H] & hMask,
    mask = (size_t)1 << (bit % StBits),
    word;
    if constexpr( Atomic )
    word = m_bits[bit / StBits].load( memory_order_relaxed );
    else
    word = m_bits[bit / StBits];
    return (bool)(word & mask);
    } );
    if( !matched )
    return match_result( false, 0 );
    // return single hash XORed from hashes[N] for further usage
    size_t hash = hashes[0];
    unroll<NHashes - 1>( [&]<size_t H>() L_FORCEINLINE { hash ^= hashes[H +
    1]; } );
    return match_result( true, hash );
    }

    template<size_t MaxHashes, bool Atomic>
    template<size_t NHashes, bool Enhanced>
    FORCEINLINE void fnv_bloom<MaxHashes, Atomic>::makeHashes(
    std::array<size_t, NHashes> &hashes, ranges rngs ) noexcept
    {
    using namespace std;
    if constexpr( !Enhanced )
    {
    // initialize hashes with starting value
    unroll<NHashes>( [&]<size_t H>() L_FORCEINLINE { hashes[H] = global.m_inits[H]; } );
    for( string_view rng : rngs )
    for( unsigned char c : rng )
    // calculate NHashes time hash[H] interleaved
    unroll<NHashes>( [&]<size_t H>() L_FORCEINLINE { hashes[H] =
    (hashes[H] ^ c) * Prime; } );
    }
    else
    {
    #if defined(FNV_BLOOM_AVX512)
    // round number of hashes up to the next avx-boundary (eight times a
    qword)
    constexpr size_t NHashesAvx = (NHashes + 7) / 8;
    // have as many hash-chunks, although less hashes may be evaluated
    array<__m512i, NHashesAvx> hashChunks;
    // initialize hashes with starting value
    unroll<NHashesAvx>( [&]<size_t IAvx>() L_FORCEINLINE { hashChunks[IAvx] = global.m_initChunks[IAvx]; } );
    // eight times the FNV prime
    __m512i primeChunk = _mm512_set1_epi64( Prime );
    for( string_view rng : rngs )
    for( unsigned char c : rng )
    {
    // broadcast current byte in to lower bytes of eight * qword
    __m512i broadcasted = _mm512_cvtepu8_epi64( _mm_set1_epi8( c ) );
    unroll<init::NAvx>( [&]<size_t IAvx>() L_FORCEINLINE
    {
    // (eight times hash ^ eight times same byte) * prime
    hashChunks[IAvx] = _mm512_xor_epi64( hashChunks[IAvx], broadcasted );
    hashChunks[IAvx] = _mm512_mullo_epi64( hashChunks[IAvx], primeChunk );
    // split up to eight hashes into hashes[IQw]
    unroll<8>( [&]<size_t IQw>() L_FORCEINLINE
    {
    // hash index
    constexpr size_t IHash = 8 * IAvx + IQw;
    // do nothing is hash beyond the unrounded number of hahses
    if constexpr( IHash >= NHashes )
    return;
    __m512i
    // broadcast IQw to all qwords
    iQw2All = _mm512_set1_epi64( IQw ),
    // mirror qword IQw hash chunk to all qwords
    qwI2All = _mm512_permutexvar_epi64( iQw2All, hashChunks[IAvx] );
    // extract qword IQw to hash IHash
    hashes[IHash] = _mm_cvtsi128_si64( _mm512_castsi512_si128(
    qwI2All ) );
    } );
    } );
    }
    #endif
    }
    }

    template<size_t MaxHashes, bool Atomic>
    fnv_bloom<MaxHashes, Atomic>::init::init() noexcept
    {
    using namespace std;
    bool enhanced = false;
    #if defined(FNV_BLOOM_AVX512)
    // CPUID Leaf vor AVX-512
    constexpr unsigned LeafAvx512 = 0x7;
    // registers
    array<unsigned, 4> regs;
    // has AVX-412 leaf
    if( cpuid_x86<0>( 0, regs ) >= LeafAvx512 )
    {
    // get AVX-512 flags in EBX
    cpuid_x86<LeafAvx512>( 0, regs );
    // test of AVX-512 and AVX-512DQ are available
    constexpr unsigned
    Avx512F = 1u << 16,
    Avx512DQ = 1u << 17,
    Requested = Avx512F | Avx512DQ;
    enhanced = (bool)(regs[1] & Requested);
    }
    #endif
    // install MaxHashes add- and match-functionss in the table
    unroll<MaxHashes>( [&]<size_t IHasher>() L_FORCEINLINE
    {
    constexpr size_t NHashes = IHasher + 1;
    if( enhanced )
    {
    m_addFns[IHasher] = &fnv_bloom::addRanges<NHashes, true>;
    m_matchFns[IHasher] = &fnv_bloom::matchRanges<NHashes, true>;
    }
    else
    {
    m_addFns[IHasher] = &fnv_bloom::addRanges<NHashes, false>;
    m_matchFns[IHasher] = &fnv_bloom::matchRanges<NHashes, false>;
    }
    } );
    // intial value for FNV qwords
    constexpr size_t Base = (size_t)(SixtyFour ? 0xcbf29ce484222325 : 0x811c9dc5);
    mt19937 mt;
    // calculate hashes like originated from a byte-stream
    for( size_t &hash : m_inits )
    {
    hash = Base;
    for( size_t b = StSize; b; --b )
    hash = (hash ^ (unsigned char)mt()) * Prime;
    }
    #if defined(FNV_BLOOM_AVX512)
    // rounded up intial value for FNV qwords as AVX-512 qwords
    for( __m512i &chunk : m_initChunks )
    {
    // eight hashes per chunk
    array<uint64_t, 8> hashes;
    for( uint64_t &hash : hashes )
    {
    hash = Base;
    for( unsigned b = 0; b < 8; ++b )
    hash = (hash ^ (unsigned char)mt()) * Prime;
    }
    // fill AVX-512 chunk with eight hashes calculated recently
    for( size_t b = 0; b < 8; ++b )
    chunk = _mm512_mask_set1_epi64( chunk, 1u << b, hashes[b] );
    }
    #endif
    }

    #if defined(_MSC_VER)
    #pragma warning(pop)
    #elif defined(__GNUC__)
    #pragma GCC diagnostic pop
    #endif

    --- PyGate Linux v1.5.14
    * Origin: Dragon's Lair, PyGate NNTP<>Fido Gate (3:633/10)