Skip to content

Commit 5a1fe21

Browse files
pjsgChristian Krämer
authored andcommitted
Addition of a Bloom Filter object (nodemcu#2176)
* Initial checkin * Add bloom.md into mkdocs * Added reset and improved info * Update bloom.c * Update bloom.md * Add Wikipedia link
1 parent 513a782 commit 5a1fe21

File tree

4 files changed

+297
-0
lines changed

4 files changed

+297
-0
lines changed

app/include/user_modules.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
//#define LUA_USE_MODULES_AM2320
2424
//#define LUA_USE_MODULES_APA102
2525
#define LUA_USE_MODULES_BIT
26+
//#define LUA_USE_MODULES_BLOOM
2627
//#define LUA_USE_MODULES_BMP085
2728
//#define LUA_USE_MODULES_BME280
2829
//#define LUA_USE_MODULES_BME680

app/modules/bloom.c

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
/*
2+
* Module for bloom filters
3+
*
4+
* Philip Gladstone, N1DQ
5+
*/
6+
7+
#include "module.h"
8+
#include "lauxlib.h"
9+
#include "c_types.h"
10+
#include "../crypto/sha2.h"
11+
12+
#if defined(LUA_USE_MODULES_BLOOM) && !defined(SHA2_ENABLE)
13+
#error Must have SHA2_ENABLE set for BLOOM module
14+
#endif
15+
16+
typedef struct {
17+
uint8 fns;
18+
uint16 size;
19+
uint32 occupancy;
20+
uint32 buf[];
21+
} bloom_t;
22+
23+
static bool add_or_check(const uint8 *buf, size_t len, bloom_t *filter, bool add) {
24+
SHA256_CTX ctx;
25+
SHA256_Init(&ctx);
26+
SHA256_Update(&ctx, buf, len);
27+
28+
char hash[32];
29+
SHA256_Final(hash, &ctx);
30+
31+
int i;
32+
uint32 bits = filter->size << 5;
33+
uint8 *h = hash;
34+
bool prev = true;
35+
int hstep = filter->fns > 10 ? 2 : 3;
36+
for (i = 0; i < filter->fns; i++) {
37+
uint32 val = (((h[0] << 8) + h[1]) << 8) + h[2];
38+
h += hstep;
39+
val = val % bits;
40+
41+
uint32 offset = val >> 5;
42+
uint32 bit = 1 << (val & 31);
43+
44+
if (!(filter->buf[offset] & bit)) {
45+
prev = false;
46+
if (add) {
47+
filter->buf[offset] |= bit;
48+
filter->occupancy++;
49+
} else {
50+
break;
51+
}
52+
}
53+
}
54+
55+
return prev;
56+
}
57+
58+
static int bloom_filter_check(lua_State *L) {
59+
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
60+
size_t length;
61+
const uint8 *buffer = (uint8 *) luaL_checklstring(L, 2, &length);
62+
63+
bool rc = add_or_check(buffer, length, filter, false);
64+
65+
lua_pushboolean(L, rc);
66+
return 1;
67+
}
68+
69+
static int bloom_filter_add(lua_State *L) {
70+
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
71+
size_t length;
72+
const uint8 *buffer = (uint8 *) luaL_checklstring(L, 2, &length);
73+
74+
bool rc = add_or_check(buffer, length, filter, true);
75+
76+
lua_pushboolean(L, rc);
77+
return 1;
78+
}
79+
80+
static int bloom_filter_reset(lua_State *L) {
81+
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
82+
83+
memset(filter->buf, 0, filter->size << 2);
84+
filter->occupancy = 0;
85+
86+
return 0;
87+
}
88+
89+
static int bloom_filter_info(lua_State *L) {
90+
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
91+
92+
lua_pushinteger(L, filter->size << 5);
93+
lua_pushinteger(L, filter->fns);
94+
lua_pushinteger(L, filter->occupancy);
95+
96+
// Now calculate the chance that a FP will be returned
97+
uint64 prob = 1000000;
98+
if (filter->occupancy > 0) {
99+
unsigned int ratio = (filter->size << 5) / filter->occupancy;
100+
int i;
101+
102+
prob = ratio;
103+
104+
for (i = 1; i < filter->fns && prob < 1000000; i++) {
105+
prob = prob * ratio;
106+
}
107+
108+
if (prob < 1000000) {
109+
// try again with some scaling
110+
unsigned int ratio256 = (filter->size << 13) / filter->occupancy;
111+
112+
uint64 prob256 = ratio256;
113+
114+
for (i = 1; i < filter->fns && prob256 < 256000000; i++) {
115+
prob256 = (prob256 * ratio256) >> 8;
116+
}
117+
118+
prob = prob256 >> 8;
119+
}
120+
}
121+
122+
lua_pushinteger(L, prob > 1000000 ? 1000000 : (int) prob);
123+
124+
return 4;
125+
}
126+
127+
static int bloom_create(lua_State *L) {
128+
int items = luaL_checkinteger(L, 1);
129+
int error = luaL_checkinteger(L, 2);
130+
131+
int n = error;
132+
int logp = 0;
133+
while (n > 0) {
134+
n = n >> 1;
135+
logp--;
136+
}
137+
138+
int bits = -items * logp;
139+
bits += bits >> 1;
140+
141+
bits = (bits + 31) & ~31;
142+
143+
if (bits < 256) {
144+
bits = 256;
145+
}
146+
147+
int size = bits >> 3;
148+
149+
int fns = bits / items;
150+
fns = (fns >> 1) + fns / 6;
151+
152+
if (fns < 2) {
153+
fns = 2;
154+
}
155+
if (fns > 15) {
156+
fns = 15;
157+
}
158+
159+
bloom_t *filter = (bloom_t *) lua_newuserdata(L, sizeof(bloom_t) + size);
160+
//
161+
// Associate its metatable
162+
luaL_getmetatable(L, "bloom.filter");
163+
lua_setmetatable(L, -2);
164+
165+
memset(filter, 0, sizeof(bloom_t) + size);
166+
filter->size = size >> 2;
167+
filter->fns = fns;
168+
169+
return 1;
170+
}
171+
172+
static const LUA_REG_TYPE bloom_filter_map[] = {
173+
{ LSTRKEY( "add" ), LFUNCVAL( bloom_filter_add ) },
174+
{ LSTRKEY( "check" ), LFUNCVAL( bloom_filter_check ) },
175+
{ LSTRKEY( "reset" ), LFUNCVAL( bloom_filter_reset ) },
176+
{ LSTRKEY( "info" ), LFUNCVAL( bloom_filter_info ) },
177+
{ LSTRKEY( "__index" ), LROVAL( bloom_filter_map ) },
178+
{ LNILKEY, LNILVAL }
179+
};
180+
181+
// Module function map
182+
static const LUA_REG_TYPE bloom_map[] = {
183+
{ LSTRKEY( "create" ), LFUNCVAL( bloom_create ) },
184+
{ LNILKEY, LNILVAL }
185+
};
186+
187+
LUALIB_API int bloom_open(lua_State *L) {
188+
luaL_rometatable(L, "bloom.filter", (void *)bloom_filter_map);
189+
return 1;
190+
}
191+
192+
NODEMCU_MODULE(BLOOM, "bloom", bloom_map, bloom_open);

docs/en/modules/bloom.md

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# Bloom Module
2+
| Since | Origin / Contributor | Maintainer | Source |
3+
| :----- | :-------------------- | :---------- | :------ |
4+
| 2017-11-13 | [Philip Gladstone](https://github.com/pjsg) | [Philip Gladstone](https://github.com/pjsg) | [bloom.c](../../../app/modules/bloom.c)|
5+
6+
7+
This module implements a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter). This is a probabilistic data structure that is used to test for set membership. There are two operations -- `add` and `check` that allow
8+
arbitrary strings to be added to the set or tested for set membership. Since this is a probabilistic data structure, the answer returned can be incorrect. However,
9+
if the string *is* a member of the set, then the `check` operation will always return `true`.
10+
11+
## bloom.create()
12+
Create a filter object.
13+
14+
#### Syntax
15+
`bloom.create(elements, errorrate)`
16+
17+
#### Parameters
18+
- `elements` The largest number of elements to be added to the filter.
19+
- `errorrate` The error rate (the false positive rate). This is represented as `n` where the false positive rate is `1 / n`. This is the maximum rate of `check` returning true when the string is *not* in the set.
20+
21+
#### Returns
22+
A `filter` object.
23+
24+
#### Example
25+
26+
```
27+
filter = bloom.create(10000, 100) -- this will use around 11kB of memory
28+
```
29+
30+
## filter:add()
31+
Adds a string to the set and returns an indication of whether the string was already present.
32+
33+
#### Syntax
34+
`filter:add(string)`
35+
36+
#### Parameters
37+
- `string` The string to be added to the filter set.
38+
39+
#### Returns
40+
`true` if the string was already present in the filter. `false` otherwise.
41+
42+
#### Example
43+
44+
```
45+
if filter:add("apple") then
46+
print ("Seen an apple before!")
47+
else
48+
print ("Noted that the first apple has been seen")
49+
end
50+
```
51+
52+
## filter:check()
53+
Checks to see if a string is present in the filter set.
54+
55+
#### Syntax
56+
`present = filter:check(string)`
57+
58+
#### Parameters
59+
- `string` The string to be checked for membership in the set.
60+
61+
#### Returns
62+
`true` if the string was already present in the filter. `false` otherwise.
63+
64+
#### Example
65+
66+
```
67+
if filter:check("apple") then
68+
print ("Seen an apple before!")
69+
end
70+
```
71+
72+
73+
## filter:reset()
74+
Empties the filter.
75+
76+
#### Syntax
77+
`filter:reset()`
78+
79+
#### Returns
80+
Nothing
81+
82+
#### Example
83+
```
84+
filter:reset()
85+
```
86+
87+
## filter:info()
88+
Get some status information on the filter.
89+
90+
#### Syntax
91+
`bits, fns, occupancy, fprate = filter:info()`
92+
93+
#### Returns
94+
- `bits` The number of bits in the filter.
95+
- `fns` The number of hash functions in use.
96+
- `occupancy` The number of bits set in the filter.
97+
- `fprate` The approximate chance that the next `check` will return `true` when it should return `false`. This is represented as the inverse of the probability -- i.e. as the n in 1-in-n chance. This value is limited to 1,000,000.
98+
99+
#### Example
100+
```
101+
bits, fns, occupancy, fprate = filter:info()
102+
```
103+

mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ pages:
4141
- 'am2320': 'en/modules/am2320.md'
4242
- 'apa102': 'en/modules/apa102.md'
4343
- 'bit': 'en/modules/bit.md'
44+
- 'bloom' : 'en/modules/bloom.md'
4445
- 'bme280': 'en/modules/bme280.md'
4546
- 'bmp085': 'en/modules/bmp085.md'
4647
- 'cjson': 'en/modules/cjson.md'

0 commit comments

Comments
 (0)