Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions app/include/user_modules.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
//#define LUA_USE_MODULES_AM2320
//#define LUA_USE_MODULES_APA102
#define LUA_USE_MODULES_BIT
//#define LUA_USE_MODULES_BLOOM
//#define LUA_USE_MODULES_BMP085
//#define LUA_USE_MODULES_BME280
//#define LUA_USE_MODULES_COAP
Expand Down
192 changes: 192 additions & 0 deletions app/modules/bloom.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/*
* Module for bloom filters
*
* Philip Gladstone, N1DQ
*/

#include "module.h"
#include "lauxlib.h"
#include "c_types.h"
#include "../crypto/sha2.h"

#if defined(LUA_USE_MODULES_BLOOM) && !defined(SHA2_ENABLE)
#error Must have SHA2_ENABLE set for BLOOM module
#endif

typedef struct {
uint8 fns;
uint16 size;
uint32 occupancy;
uint32 buf[];
} bloom_t;

static bool add_or_check(const uint8 *buf, size_t len, bloom_t *filter, bool add) {
SHA256_CTX ctx;
SHA256_Init(&ctx);
SHA256_Update(&ctx, buf, len);

char hash[32];
SHA256_Final(hash, &ctx);

int i;
uint32 bits = filter->size << 5;
uint8 *h = hash;
bool prev = true;
int hstep = filter->fns > 10 ? 2 : 3;
for (i = 0; i < filter->fns; i++) {
uint32 val = (((h[0] << 8) + h[1]) << 8) + h[2];
h += hstep;
val = val % bits;

uint32 offset = val >> 5;
uint32 bit = 1 << (val & 31);

if (!(filter->buf[offset] & bit)) {
prev = false;
if (add) {
filter->buf[offset] |= bit;
filter->occupancy++;
} else {
break;
}
}
}

return prev;
}

static int bloom_filter_check(lua_State *L) {
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
size_t length;
const uint8 *buffer = (uint8 *) luaL_checklstring(L, 2, &length);

bool rc = add_or_check(buffer, length, filter, false);

lua_pushboolean(L, rc);
return 1;
}

static int bloom_filter_add(lua_State *L) {
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");
size_t length;
const uint8 *buffer = (uint8 *) luaL_checklstring(L, 2, &length);

bool rc = add_or_check(buffer, length, filter, true);

lua_pushboolean(L, rc);
return 1;
}

static int bloom_filter_reset(lua_State *L) {
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");

memset(filter->buf, 0, filter->size << 2);
filter->occupancy = 0;

return 0;
}

static int bloom_filter_info(lua_State *L) {
bloom_t *filter = (bloom_t *)luaL_checkudata(L, 1, "bloom.filter");

lua_pushinteger(L, filter->size << 5);
lua_pushinteger(L, filter->fns);
lua_pushinteger(L, filter->occupancy);

// Now calculate the chance that a FP will be returned
uint64 prob = 1000000;
if (filter->occupancy > 0) {
unsigned int ratio = (filter->size << 5) / filter->occupancy;
int i;

prob = ratio;

for (i = 1; i < filter->fns && prob < 1000000; i++) {
prob = prob * ratio;
}

if (prob < 1000000) {
// try again with some scaling
unsigned int ratio256 = (filter->size << 13) / filter->occupancy;

uint64 prob256 = ratio256;

for (i = 1; i < filter->fns && prob256 < 256000000; i++) {
prob256 = (prob256 * ratio256) >> 8;
}

prob = prob256 >> 8;
}
}

lua_pushinteger(L, prob > 1000000 ? 1000000 : (int) prob);

return 4;
}

static int bloom_create(lua_State *L) {
int items = luaL_checkinteger(L, 1);
int error = luaL_checkinteger(L, 2);

int n = error;
int logp = 0;
while (n > 0) {
n = n >> 1;
logp--;
}

int bits = -items * logp;
bits += bits >> 1;

bits = (bits + 31) & ~31;

if (bits < 256) {
bits = 256;
}

int size = bits >> 3;

int fns = bits / items;
fns = (fns >> 1) + fns / 6;

if (fns < 2) {
fns = 2;
}
if (fns > 15) {
fns = 15;
}

bloom_t *filter = (bloom_t *) lua_newuserdata(L, sizeof(bloom_t) + size);
//
// Associate its metatable
luaL_getmetatable(L, "bloom.filter");
lua_setmetatable(L, -2);

memset(filter, 0, sizeof(bloom_t) + size);
filter->size = size >> 2;
filter->fns = fns;

return 1;
}

static const LUA_REG_TYPE bloom_filter_map[] = {
{ LSTRKEY( "add" ), LFUNCVAL( bloom_filter_add ) },
{ LSTRKEY( "check" ), LFUNCVAL( bloom_filter_check ) },
{ LSTRKEY( "reset" ), LFUNCVAL( bloom_filter_reset ) },
{ LSTRKEY( "info" ), LFUNCVAL( bloom_filter_info ) },
{ LSTRKEY( "__index" ), LROVAL( bloom_filter_map ) },
{ LNILKEY, LNILVAL }
};

// Module function map
static const LUA_REG_TYPE bloom_map[] = {
{ LSTRKEY( "create" ), LFUNCVAL( bloom_create ) },
{ LNILKEY, LNILVAL }
};

LUALIB_API int bloom_open(lua_State *L) {
luaL_rometatable(L, "bloom.filter", (void *)bloom_filter_map);
return 1;
}

NODEMCU_MODULE(BLOOM, "bloom", bloom_map, bloom_open);
103 changes: 103 additions & 0 deletions docs/en/modules/bloom.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Bloom Module
| Since | Origin / Contributor | Maintainer | Source |
| :----- | :-------------------- | :---------- | :------ |
| 2017-11-13 | [Philip Gladstone](https://github.com/pjsg) | [Philip Gladstone](https://github.com/pjsg) | [bloom.c](../../../app/modules/bloom.c)|


This module implements a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter). This is a probabilistic data structure that is used to test for set membership. There are two operations -- `add` and `check` that allow
arbitrary strings to be added to the set or tested for set membership. Since this is a probabilistic data structure, the answer returned can be incorrect. However,
if the string *is* a member of the set, then the `check` operation will always return `true`.

## bloom.create()
Create a filter object.

#### Syntax
`bloom.create(elements, errorrate)`

#### Parameters
- `elements` The largest number of elements to be added to the filter.
- `errorrate` The error rate (the false positive rate). This is represented as `n` where the false positive rate is `1 / n`. This is the maximum rate of `check` returning true when the string is *not* in the set.

#### Returns
A `filter` object.

#### Example

```
filter = bloom.create(10000, 100) -- this will use around 11kB of memory
```

## filter:add()
Adds a string to the set and returns an indication of whether the string was already present.

#### Syntax
`filter:add(string)`

#### Parameters
- `string` The string to be added to the filter set.

#### Returns
`true` if the string was already present in the filter. `false` otherwise.

#### Example

```
if filter:add("apple") then
print ("Seen an apple before!")
else
print ("Noted that the first apple has been seen")
end
```

## filter:check()
Checks to see if a string is present in the filter set.

#### Syntax
`present = filter:check(string)`

#### Parameters
- `string` The string to be checked for membership in the set.

#### Returns
`true` if the string was already present in the filter. `false` otherwise.

#### Example

```
if filter:check("apple") then
print ("Seen an apple before!")
end
```


## filter:reset()
Empties the filter.

#### Syntax
`filter:reset()`

#### Returns
Nothing

#### Example
```
filter:reset()
```

## filter:info()
Get some status information on the filter.

#### Syntax
`bits, fns, occupancy, fprate = filter:info()`

#### Returns
- `bits` The number of bits in the filter.
- `fns` The number of hash functions in use.
- `occupancy` The number of bits set in the filter.
- `fprate` The approximate chance that the next `check` will return `true` when it should return `false`. This is represented as the inverse of the probability -- i.e. as the n in 1-in-n chance. This value is limited to 1,000,000.

#### Example
```
bits, fns, occupancy, fprate = filter:info()
```

1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ pages:
- 'am2320': 'en/modules/am2320.md'
- 'apa102': 'en/modules/apa102.md'
- 'bit': 'en/modules/bit.md'
- 'bloom' : 'en/modules/bloom.md'
- 'bme280': 'en/modules/bme280.md'
- 'bmp085': 'en/modules/bmp085.md'
- 'cjson': 'en/modules/cjson.md'
Expand Down