A. look at which function we need to add python module.
/*
* Copyright (c) 2012-2017, Jyri J. Virkki
* All rights reserved.
*
* This file is under BSD license. See LICENSE file.
*/
#ifndef _BLOOM_H
#define _BLOOM_H
#ifdef __cplusplus
extern "C" {
#endif
/** ***************************************************************************
* Structure to keep track of one bloom filter. Caller needs to
* allocate this and pass it to the functions below. First call for
* every struct must be to bloom_init().
*
*/
struct bloom
{
// These fields are part of the public interface of this structure.
// Client code may read these values if desired. Client code MUST NOT
// modify any of these.
int entries;
double error;
int bits;
int bytes;
int hashes;
// Fields below are private to the implementation. These may go away or
// change incompatibly at any moment. Client code MUST NOT access or rely
// on these.
double bpe;
unsigned char * bf;
int ready;
};
/** ***************************************************************************
* Initialize the bloom filter for use.
*
* The filter is initialized with a bit field and number of hash functions
* according to the computations from the wikipedia entry:
* http://en.wikipedia.org/wiki/Bloom_filter
*
* Optimal number of bits is:
* bits = (entries * ln(error)) / ln(2)^2
*
* Optimal number of hash functions is:
* hashes = bpe * ln(2)
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* entries - The expected number of entries which will be inserted.
* Must be at least 1000 (in practice, likely much larger).
* error - Probability of collision (as long as entries are not
* exceeded).
*
* Return:
* -------
* 0 - on success
* 1 - on failure
*
*/
int bloom_init(struct bloom * bloom, int entries, double error);
/** ***************************************************************************
* Deprecated, use bloom_init()
*
*/
int bloom_init_size(struct bloom * bloom, int entries, double error,
unsigned int cache_size);
/** ***************************************************************************
* Check if the given element is in the bloom filter. Remember this may
* return false positive if a collision occured.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* buffer - Pointer to buffer containing element to check.
* len - Size of 'buffer'.
*
* Return:
* -------
* 0 - element is not present
* 1 - element is present (or false positive due to collision)
* -1 - bloom not initialized
*
*/
int bloom_check(struct bloom * bloom, const void * buffer, int len);
/** ***************************************************************************
* Add the given element to the bloom filter.
* The return code indicates if the element (or a collision) was already in,
* so for the common check+add use case, no need to call check separately.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
* buffer - Pointer to buffer containing element to add.
* len - Size of 'buffer'.
*
* Return:
* -------
* 0 - element was not present and was added
* 1 - element (or a collision) had already been added previously
* -1 - bloom not initialized
*
*/
int bloom_add(struct bloom * bloom, const void * buffer, int len);
/** ***************************************************************************
* Print (to stdout) info about this bloom filter. Debugging aid.
*
*/
void bloom_print(struct bloom * bloom);
/** ***************************************************************************
* Deallocate internal storage.
*
* Upon return, the bloom struct is no longer usable. You may call bloom_init
* again on the same struct to reinitialize it again.
*
* Parameters:
* -----------
* bloom - Pointer to an allocated struct bloom (see above).
*
* Return: none
*
*/
void bloom_free(struct bloom * bloom);
/** ***************************************************************************
* Returns version string compiled into library.
*
* Return: version string
*
*/
const char * bloom_version();
#ifdef __cplusplus
}
#endif
#endif
according to the bloom.h file.
I only want to export the following functions for users:
1. bloom_init
2. bloom_check
3. bloom_free
4. bloom_add
B. write .pxd file to interface with c code
cdef extern from "../bloom.h":
struct bloom:
pass
int bloom_init(bloom * bloom, int entries, double error)
int bloom_check(bloom * bloom, const void * buffer, int len)
int bloom_add(bloom * bloom, const void * buffer, int len)
void bloom_free(bloom * bloom)
C. write .pyx file to interface with python code
# distutils: libraries="bloom"
# distutils: library_dirs="."
cimport cbloom
cdef class Bloom:
cdef cbloom.bloom b
cdef cbloom.bloom * pb
def __cinit__(self, n, error):
self.pb = &self.b
result = cbloom.bloom_init(self.pb, n, error)
if result == 1:
raise Exception('bloom_init failed.')
def __dealloc__(self):
cbloom.bloom_free(self.pb)
def add(self, bytes item):
result = cbloom.bloom_add(self.pb, <void*>item, len(item))
if result == 1:
raise Exception('This item has already been add ed!')
def __contains__(self, bytes item):
result = cbloom.bloom_check(self.pb, <void*>item, len(item))
return result==1
if u encountered import error, do not forget to add the path of .so file into your LD_LIBRARY_PATH
and source it.