APG v2.1.0
This commit is contained in:
129
bloom.c
129
bloom.c
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
** Copyright (c) 2001
|
||||
** Copyright (c) 2001, 2002
|
||||
** Adel I. Mirzazhanov. All rights reserved
|
||||
**
|
||||
** Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,8 +38,8 @@
|
||||
** open_filter - open APG Bloom filter file
|
||||
** get_filtersize - get APG Bloom filter size
|
||||
** count_words - count words in plain dictionary file
|
||||
***************************************************************************
|
||||
** hash2bit - generates 4 values (should be 4 values of independent
|
||||
**=============================================================
|
||||
** hash2bit - generates 5 values (should be 5 values of independent
|
||||
** hash functions) from input string.
|
||||
** getbit - get the bit value from file.
|
||||
** putbit - put the bit in the file.
|
||||
@@ -57,7 +57,7 @@ int putbit(FILE * f, h_val bitnum);
|
||||
** char *word - word to incert in the filter
|
||||
** FILE *file - filter file descriptor
|
||||
** h_val filter_size - filter size in bits
|
||||
** RETURN:
|
||||
** OUTPUT:
|
||||
** int
|
||||
** 0 - everything OK
|
||||
** -1 - something wrong
|
||||
@@ -65,14 +65,14 @@ int putbit(FILE * f, h_val bitnum);
|
||||
int
|
||||
insert_word(char *word, FILE *file, h_val filter_size)
|
||||
{
|
||||
h_val h[4];
|
||||
int i = 0;
|
||||
h_val h[H_NUM];
|
||||
int i = 0;
|
||||
|
||||
hash2bit (word, &h[0]);
|
||||
for(i = 0; i < 4; i++)
|
||||
if (putbit (file, h[i] % filter_size)== -1)
|
||||
return (-1);
|
||||
return(0);
|
||||
hash2bit (word, &h[0]);
|
||||
for(i = 0; i < H_NUM; i++)
|
||||
if (putbit (file, h[i] % filter_size)== -1)
|
||||
return (-1);
|
||||
return(0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -81,7 +81,7 @@ return(0);
|
||||
** char *word - word to check
|
||||
** FILE *file - filter file descriptor
|
||||
** h_val filter_size - filter size in bits
|
||||
** RETURN:
|
||||
** OUTPUT:
|
||||
** int
|
||||
** 0 - word is not in dictionary
|
||||
** 1 - word is in dictionary
|
||||
@@ -90,11 +90,11 @@ return(0);
|
||||
int
|
||||
check_word(char *word, FILE *file, h_val filter_size)
|
||||
{
|
||||
h_val h[4];
|
||||
h_val h[H_NUM];
|
||||
int i = 0;
|
||||
|
||||
hash2bit (word, &h[0]);
|
||||
for(i = 0; i < 4; i++)
|
||||
for(i = 0; i < H_NUM; i++)
|
||||
{
|
||||
switch(getbit(file, h[i] % filter_size))
|
||||
{
|
||||
@@ -116,24 +116,25 @@ check_word(char *word, FILE *file, h_val filter_size)
|
||||
** open filter file and check is this the real bloom filter file
|
||||
** INPUT:
|
||||
** char * f_name - filter filename
|
||||
** RETURN:
|
||||
** const char *mode - "r" or "r+"
|
||||
** OUTPUT:
|
||||
** FILE * - file pointer
|
||||
** NULL - something wrong.
|
||||
*/
|
||||
FILE *
|
||||
open_filter(char * f_name)
|
||||
open_filter(char * f_name, const char *mode)
|
||||
{
|
||||
FILE *f;
|
||||
struct apg_bf_hdr bf_hdr;
|
||||
if ((f = fopen (f_name, "r+")) == NULL)
|
||||
if ((f = fopen (f_name, mode)) == NULL)
|
||||
return(NULL);
|
||||
if(fread ( (void *)&bf_hdr, APGBFHDRSIZE, 1, f) < APGBFHDRSIZE)
|
||||
if (fread ( (void *)&bf_hdr, APGBFHDRSIZE, 1, f) < APGBFHDRSIZE)
|
||||
if (ferror (f) != 0)
|
||||
return(NULL);
|
||||
if( (bf_hdr.id[0] != 'A') || (bf_hdr.id[1] != 'P') ||
|
||||
if ((bf_hdr.id[0] != 'A') || (bf_hdr.id[1] != 'P') ||
|
||||
(bf_hdr.id[2] != 'G') || (bf_hdr.id[3] != 'B') ||
|
||||
(bf_hdr.id[4] != 'F') || (bf_hdr.id[5] != '1') ||
|
||||
(bf_hdr.id[6] != '0') || (bf_hdr.id[7] != '0') ) return (NULL);
|
||||
(bf_hdr.id[6] != '0') || (bf_hdr.id[7] != '1') ) return (NULL);
|
||||
else
|
||||
{
|
||||
if (fseek (f, 0, SEEK_SET) == -1)
|
||||
@@ -142,47 +143,64 @@ open_filter(char * f_name)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
** close_filter - close APG Bloom filter file
|
||||
** close filter file
|
||||
** INPUT:
|
||||
** FILE * f_dsk - filter file pointer
|
||||
** OUTPUT:
|
||||
** int - same as fclose() return value
|
||||
*/
|
||||
int
|
||||
close_filter(FILE *f_dsk)
|
||||
{
|
||||
return(fclose(f_dsk));
|
||||
}
|
||||
|
||||
/*
|
||||
** get_filtersize - get APG Bloom filter size
|
||||
** INPUT:
|
||||
** FILE *f - filter file descriptor
|
||||
** RETURN:
|
||||
** h_val - size of APG Bloom filter.
|
||||
** OUTPUT:
|
||||
** h_val - size of APG Bloom filter.
|
||||
** 0 - something wrong
|
||||
*/
|
||||
h_val
|
||||
get_filtersize(FILE * f)
|
||||
{
|
||||
struct apg_bf_hdr bf_hdr;
|
||||
fread ( (void *)&bf_hdr, APGBFHDRSIZE, 1, f);
|
||||
fseek (f, 0, SEEK_SET);
|
||||
if (fread ( (void *)&bf_hdr, APGBFHDRSIZE, 1, f) < APGBFHDRSIZE)
|
||||
if (ferror (f) != 0)
|
||||
return(0);
|
||||
if (fseek (f, 0, SEEK_SET) == -1)
|
||||
return(0);
|
||||
return( (h_val)bf_hdr.fs);
|
||||
}
|
||||
|
||||
/*
|
||||
** create_filter - create initial(empty) filter file
|
||||
** 4 - number of hash functions
|
||||
** 5 - number of hash functions
|
||||
** 0.0001 (0.01%) - probability of false positives
|
||||
** INPUT:
|
||||
** char * f_name - filter filename
|
||||
** unsigned long int n_words - number of words in filter
|
||||
** RETURN:
|
||||
** OUTPUT:
|
||||
** FILE * - filter file descriptor
|
||||
** NULL - something wrong
|
||||
***********************************************************
|
||||
** n - number of words in the filter
|
||||
** N - size of filter(?)
|
||||
** NOTES:
|
||||
** n - number of words in the filter
|
||||
** N - size of filter(?)
|
||||
**
|
||||
** a=(1-(4/N))^n
|
||||
** 0.0001=(1-a)^4 ==> 1-a=0.1 ==> a=0.9 ==>
|
||||
** 0.9=(1-(4/N))^n ==> 0.9^(1/n)=1-(4/N) ==>
|
||||
** a=(1-(4/N))^n
|
||||
** 0.0001=(1-a)^5 ==> 1-a=0.15849... ==> a=0.84151068 ==>
|
||||
** 0.84151068=(1-(5/N))^n ==> 0.84151068^(1/n)=1-(5/N) ==>
|
||||
**
|
||||
** N=4/(1-[0.9^(1/n)])
|
||||
** N=5/(1-[0.84151068^(1/n)])
|
||||
**
|
||||
** 4
|
||||
** N = ----------
|
||||
** 1/n
|
||||
** 1 - 0.9
|
||||
** 5
|
||||
** N = -----------------
|
||||
** 1/n
|
||||
** 1 - 0.84151068
|
||||
*/
|
||||
FILE *
|
||||
create_filter(char * f_name, unsigned long int n_words)
|
||||
@@ -199,7 +217,7 @@ create_filter(char * f_name, unsigned long int n_words)
|
||||
bf_hdr.id[4] = 'F';
|
||||
bf_hdr.id[5] = '1';
|
||||
bf_hdr.id[6] = '0';
|
||||
bf_hdr.id[7] = '0';
|
||||
bf_hdr.id[7] = '1';
|
||||
bf_hdr.fs = FSIZE_BIT(n_words);
|
||||
|
||||
if ((f = fopen (f_name, "w+")) == NULL)
|
||||
@@ -223,18 +241,25 @@ create_filter(char * f_name, unsigned long int n_words)
|
||||
** count_words - count words in plain dictionary file
|
||||
** INPUT:
|
||||
** FILE *dict_file -plain dicionary file descriptor
|
||||
** RETURN:
|
||||
** OUTPUT:
|
||||
** h_val - amount of words in dictionary file
|
||||
** 0 - something wrong
|
||||
*/
|
||||
h_val
|
||||
count_words(FILE *dict_file)
|
||||
{
|
||||
h_val i = 0L; /* word counter */
|
||||
char *string; /* temp string holder */
|
||||
string = (char *) calloc(1,MAX_DICT_STRLEN);
|
||||
char *tmp; /* just tmp char pointer and nothing more it has no memory assigned */
|
||||
if ((string = (char *) calloc(1,MAX_DICT_STRLEN)) == NULL)
|
||||
return(0);
|
||||
while ((fgets(string, MAX_DICT_STRLEN, dict_file) != NULL))
|
||||
i++;
|
||||
fseek (dict_file, 0, SEEK_SET);
|
||||
{
|
||||
tmp = (char *)strtok (string," \t\n\0");
|
||||
if (tmp != NULL) i++;
|
||||
}
|
||||
if (fseek (dict_file, 0, SEEK_SET) == -1)
|
||||
return (0);
|
||||
free ((void *) string);
|
||||
return (i);
|
||||
}
|
||||
@@ -245,19 +270,19 @@ count_words(FILE *dict_file)
|
||||
** INPUT:
|
||||
** char *word - word to hash
|
||||
** h_val *b - pointer to bitnumber array
|
||||
** RETURN
|
||||
** OUTPUT:
|
||||
** h_val * - pointer to bitnumber array
|
||||
*/
|
||||
h_val *
|
||||
hash2bit(char * word, h_val *b)
|
||||
{
|
||||
struct apg_MD5Context context;
|
||||
unsigned char cs[16];
|
||||
apg_SHA_INFO context;
|
||||
BYTE cs[SHA_DIGESTSIZE];
|
||||
|
||||
apg_MD5Init (&context);
|
||||
apg_MD5Update (&context, word, strlen(word));
|
||||
apg_MD5Final (cs, &context);
|
||||
return ( (h_val *)memcpy( (void *)b, (void *)&cs[0], 16));
|
||||
apg_shaInit (&context);
|
||||
apg_shaUpdate (&context, (BYTE *)word, strlen(word));
|
||||
apg_shaFinal (&context, cs);
|
||||
return ( (h_val *)memcpy( (void *)b, (void *)&cs[0], SHA_DIGESTSIZE));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -265,7 +290,7 @@ hash2bit(char * word, h_val *b)
|
||||
** INPUT:
|
||||
** FILE *f - file descriptor
|
||||
** h_val bitnum - bit number
|
||||
** RETURN:
|
||||
** OUTPUT:
|
||||
** int
|
||||
** 0,1 - bit value
|
||||
** -1 - something wrong
|
||||
@@ -297,7 +322,7 @@ getbit(FILE * f, h_val bitnum)
|
||||
** INPUT:
|
||||
** FILE *f - file descriptor
|
||||
** h_val bitnum - bit number
|
||||
** RETURN:
|
||||
** OUTPUT:
|
||||
** int
|
||||
** 0 - everything OK
|
||||
** -1 - something wrong
|
||||
@@ -327,4 +352,4 @@ putbit(FILE * f, h_val bitnum)
|
||||
return (-1);
|
||||
return (0);
|
||||
}
|
||||
/* END OF bloom.c file */
|
||||
/* END OF bloom.c file */
|
||||
|
||||
Reference in New Issue
Block a user