Commit 49bb40e9 authored by Rusty Russell's avatar Rusty Russell

invbloom: new module for IBLTs.

Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
parent 5d34cc6b
......@@ -59,6 +59,7 @@ MODS_WITH_SRC := antithread \
htable \
idtree \
ilog \
invbloom \
io \
isaac \
iscsi \
......
../../licenses/BSD-MIT
\ No newline at end of file
#include "config.h"
#include <stdio.h>
#include <string.h>
/**
* invbloom - implementation of invertible bloom lookup tables.
*
* This code implements a subset of invertible bloom lookup tables
* as described in[1].
*
* [1] Goodrich, Michael T., and Michael Mitzenmacher. "Invertible bloom
* lookup tables." Communication, Control, and Computing (Allerton), 2011
* 49th Annual Allerton Conference on. IEEE, 2011.
* http://arxiv.org/pdf/1101.2245
*
* License: BSD-MIT
*
* Example:
* #include <ccan/invbloom/invbloom.h>
* #include <stdio.h>
*
* int main(int argc, char *argv[])
* {
* unsigned int i, n;
* struct invbloom *ib = invbloom_new(NULL, char *, 16, 0);
*
* for (i = 1; i < argc; i++)
* invbloom_insert(ib, &argv[i]);
*
* n = 0;
* for (i = 1; i < argc; i++)
* n += invbloom_get(ib, &argv[i]);
*
* printf("%u out of %u are found\n", n, argc);
*
* n = 0;
* for (i = 1; i < argc; i++) {
* unsigned int j;
* char **p = invbloom_extract(NULL, ib);
*
* for (j = 1; j < argc; j++) {
* if (p == &argv[j])
* n++;
* }
* tal_free(p);
* }
* printf("%u out of %u were extracted\n", n, argc);
* return 0;
* }
*/
int main(int argc, char *argv[])
{
/* Expect exactly one argument */
if (argc != 2)
return 1;
if (strcmp(argv[1], "depends") == 0) {
printf("ccan/endian\n");
printf("ccan/hash\n");
printf("ccan/short_types\n");
printf("ccan/tal\n");
return 0;
}
return 1;
}
/* Licensed under BSD-MIT - see LICENSE file for details */
#include "invbloom.h"
#include <ccan/hash/hash.h>
#include <ccan/endian/endian.h>
#include <assert.h>
/* "We will show that hash_count values of 3 or 4 work well in practice"
From:
Eppstein, David, et al. "What's the difference?: efficient set reconciliation without prior context." ACM SIGCOMM Computer Communication Review. Vol. 41. No. 4. ACM, 2011. http://conferences.sigcomm.org/sigcomm/2011/papers/sigcomm/p218.pdf
*/
#define NUM_HASHES 4
struct invbloom *invbloom_new_(const tal_t *ctx,
size_t id_size,
size_t n_elems,
u32 salt)
{
struct invbloom *ib = tal(ctx, struct invbloom);
if (ib) {
ib->n_elems = n_elems;
ib->id_size = id_size;
ib->salt = salt;
ib->count = tal_arrz(ib, s32, n_elems);
ib->idsum = tal_arrz(ib, u8, id_size * n_elems);
if (!ib->count || !ib->idsum)
ib = tal_free(ib);
}
return ib;
}
static size_t hash_bucket(const struct invbloom *ib, const void *id, size_t i)
{
return hash((const char *)id, ib->id_size, ib->salt+i*7) % ib->n_elems;
}
static u8 *idsum_ptr(const struct invbloom *ib, size_t bucket)
{
return (u8 *)ib->idsum + bucket * ib->id_size;
}
static void add_to_bucket(struct invbloom *ib, size_t n, const u8 *id)
{
size_t i;
u8 *idsum = idsum_ptr(ib, n);
ib->count[n]++;
for (i = 0; i < ib->id_size; i++)
idsum[i] ^= id[i];
}
static void remove_from_bucket(struct invbloom *ib, size_t n, const u8 *id)
{
size_t i;
u8 *idsum = idsum_ptr(ib, n);
ib->count[n]--;
for (i = 0; i < ib->id_size; i++)
idsum[i] ^= id[i];
}
void invbloom_insert(struct invbloom *ib, const void *id)
{
unsigned int i;
for (i = 0; i < NUM_HASHES; i++)
add_to_bucket(ib, hash_bucket(ib, id, i), id);
}
void invbloom_delete(struct invbloom *ib, const void *id)
{
unsigned int i;
for (i = 0; i < NUM_HASHES; i++)
remove_from_bucket(ib, hash_bucket(ib, id, i), id);
}
static bool all_zero(const u8 *mem, size_t size)
{
unsigned int i;
for (i = 0; i < size; i++)
if (mem[i])
return false;
return true;
}
bool invbloom_get(const struct invbloom *ib, const void *id)
{
unsigned int i;
for (i = 0; i < NUM_HASHES; i++) {
size_t h = hash_bucket(ib, id, i);
u8 *idsum = idsum_ptr(ib, h);
if (ib->count[h] == 0 && all_zero(idsum, ib->id_size))
return false;
if (ib->count[h] == 1)
return (memcmp(idsum, id, ib->id_size) == 0);
}
return false;
}
static void *extract(const tal_t *ctx, struct invbloom *ib, int count)
{
size_t i;
/* FIXME: this makes full extraction O(n^2). */
for (i = 0; i < ib->n_elems; i++) {
void *id;
if (ib->count[i] != count)
continue;
id = tal_dup(ctx, u8, idsum_ptr(ib, i), ib->id_size, 0);
return id;
}
return NULL;
}
void *invbloom_extract(const tal_t *ctx, struct invbloom *ib)
{
void *id;
id = extract(ctx, ib, 1);
if (id)
invbloom_delete(ib, id);
return id;
}
void *invbloom_extract_negative(const tal_t *ctx, struct invbloom *ib)
{
void *id;
id = extract(ctx, ib, -1);
if (id)
invbloom_insert(ib, id);
return id;
}
void invbloom_subtract(struct invbloom *ib1, const struct invbloom *ib2)
{
size_t i;
assert(ib1->n_elems == ib2->n_elems);
assert(ib1->id_size == ib2->id_size);
assert(ib1->salt == ib2->salt);
for (i = 0; i < ib1->n_elems; i++)
ib1->count[i] -= ib2->count[i];
for (i = 0; i < ib1->n_elems * ib1->id_size; i++)
ib1->idsum[i] ^= ib2->idsum[i];
}
bool invbloom_empty(const struct invbloom *ib)
{
size_t i;
for (i = 0; i < ib->n_elems; i++) {
if (ib->count[i])
return false;
if (!all_zero(idsum_ptr(ib, i), ib->id_size))
return false;
}
return true;
}
/* Licensed under BSD-MIT - see LICENSE file for details */
#ifndef CCAN_INVBLOOM_H
#define CCAN_INVBLOOM_H
#include <ccan/short_types/short_types.h>
#include <ccan/tal/tal.h>
struct invbloom {
size_t n_elems;
size_t id_size;
u32 hashsum_bytes; /* 0 - 4. */
u32 salt;
s32 *count; /* [n_elems] */
u8 *idsum; /* [n_elems][id_size] */
};
/**
* invbloom_new - create a new invertable bloom lookup table
* @ctx: context to tal() from, or NULL.
* @type: type to place into the buckets (must not contain padding)
* @n_elems: number of entries in table
* @salt: 32 bit seed for table
*
* Returns a new table, which can be freed with tal_free().
*/
#define invbloom_new(ctx, type, n_elems, salt) \
invbloom_new_((ctx), sizeof(type), (n_elems), (salt))
struct invbloom *invbloom_new_(const tal_t *ctx,
size_t id_size,
size_t n_elems, u32 salt);
/**
* invbloom_insert - add a new element
* @ib: the invertable bloom lookup table.
* @elem: the element
*
* This is guaranteed to be the inverse of invbloom_delete.
*/
void invbloom_insert(struct invbloom *ib, const void *elem);
/**
* invbloom_delete - remove an element
* @ib: the invertable bloom lookup table.
* @elem: the element
*
* Note that this doesn't check the element was previously added (as
* that can not be done in general anyway).
*
* This is guaranteed to be the inverse of invbloom_delete.
*/
void invbloom_delete(struct invbloom *ib, const void *elem);
/**
* invbloom_get - check if an element is (probably) in the table.
* @ib: the invertable bloom lookup table.
* @elem: the element
*
* This may return a false negative if the table is too full.
*
* It will only return a false positive if deletions and insertions
* don't match, and have managed to produce a result which matches the
* element. This is much less likely.
*/
bool invbloom_get(const struct invbloom *ib, const void *elem);
/**
* invbloom_extract - try to recover an entry added to a bloom lookup table.
* @ctx: the context to tal() the return value from.
* @ib: the invertable bloom lookup table.
*
* This may be able to recover (and delete) an element which was
* invbloom_insert()ed into the table (and not deleted). This will
* not work if the table is too full.
*
* It may return a bogus element if deletions and insertions don't
* match.
*/
void *invbloom_extract(const tal_t *ctx, struct invbloom *ib);
/**
* invbloom_extract_negative - try to recover an entry deleted from a bloom lookup table.
* @ctx: the context to tal() the return value from.
* @ib: the invertable bloom lookup table.
*
* This may be able to recover (and insert/undo) an element which was
* invbloom_delete()ed from the table (and not inserted). This will
* not work if the table is too full.
*
* It may return a bogus element if deletions and insertions don't
* match.
*/
void *invbloom_extract_negative(const tal_t *ctx, struct invbloom *ib);
/**
* invbloom_subtract - return the differences of two IBLTs.
* @ib1: the invertable bloom lookup table to alter
* @ib2: the invertable bloom lookup table to subtract.
*
* This produces exactly the same result as if a new table had all the
* elements only in @ib1 inserted, then all the elements onlt in @ib2
* deleted.
*
* ie. if @ib1 and @ib2 are similar, the result may be a usable by
* invbloom_extract and invbloom_extract_negative.
*/
void invbloom_subtract(struct invbloom *ib1, const struct invbloom *ib2);
/**
* invbloom_empty - is an invertable bloom lookup table completely clean?
* @ib: the invertable bloom lookup table
*
* This is always true if @ib has had the same elements inserted and
* deleted. It is far less likely to be true if different ones were
* deleted than inserted.
*/
bool invbloom_empty(const struct invbloom *ib);
#endif /* CCAN_INVBLOOM_H */
#include <ccan/invbloom/invbloom.h>
/* Include the C files directly. */
#include <ccan/invbloom/invbloom.c>
#include <ccan/tap/tap.h>
int main(void)
{
struct invbloom *ib1, *ib2;
const tal_t *ctx = tal(NULL, char);
int val = 1, val2 = 2, *ip;
/* This is how many tests you plan to run */
plan_tests(8);
ib1 = invbloom_new(ctx, int, 1024, 0);
ib2 = invbloom_new(ctx, int, 1024, 0);
invbloom_insert(ib1, &val);
invbloom_insert(ib2, &val2);
invbloom_subtract(ib1, ib2);
ip = invbloom_extract(ctx, ib1);
ok1(ip);
ok1(tal_parent(ip) == ctx);
ok1(*ip == val);
ip = invbloom_extract(ctx, ib1);
ok1(!ip);
ip = invbloom_extract_negative(ctx, ib1);
ok1(ip);
ok1(tal_parent(ip) == ctx);
ok1(*ip == val2);
ip = invbloom_extract_negative(ctx, ib1);
ok1(!ip);
tal_free(ctx);
/* This exits depending on whether all tests passed */
return exit_status();
}
#include <ccan/invbloom/invbloom.h>
/* Include the C files directly. */
#include <ccan/invbloom/invbloom.c>
#include <ccan/tap/tap.h>
int main(void)
{
struct invbloom *ib;
const tal_t *ctx = tal(NULL, char);
int val, val2, *ip, *ip2, i;
/* This is how many tests you plan to run */
plan_tests(127);
ib = invbloom_new(ctx, int, 1, 100);
ok1(tal_parent(ib) == ctx);
ok1(ib->id_size == sizeof(int));
ok1(ib->salt == 100);
ok1(ib->n_elems == 1);
ok1(invbloom_empty(ib));
val = 0;
invbloom_insert(ib, &val);
ok1(ib->count[0] == NUM_HASHES);
ok1(!invbloom_empty(ib));
invbloom_delete(ib, &val);
ok1(invbloom_empty(ib));
val2 = 2;
invbloom_insert(ib, &val);
invbloom_insert(ib, &val2);
ok1(!invbloom_empty(ib));
invbloom_delete(ib, &val);
ok1(!invbloom_empty(ib));
invbloom_delete(ib, &val2);
ok1(invbloom_empty(ib));
tal_free(ib);
ib = invbloom_new(ctx, int, 1, 100);
ok1(tal_parent(ib) == ctx);
ok1(ib->id_size == sizeof(int));
ok1(ib->salt == 100);
ok1(ib->n_elems == 1);
ok1(invbloom_empty(ib));
val = 0;
invbloom_insert(ib, &val);
ok1(ib->count[0] == NUM_HASHES);
ok1(!invbloom_empty(ib));
invbloom_delete(ib, &val);
ok1(invbloom_empty(ib));
val2 = 2;
invbloom_insert(ib, &val);
invbloom_insert(ib, &val2);
ok1(!invbloom_empty(ib));
invbloom_delete(ib, &val);
ok1(!invbloom_empty(ib));
invbloom_delete(ib, &val2);
ok1(invbloom_empty(ib));
tal_free(ib);
/* Now, a more realistic test. */
for (i = 0; i < 5; i++) {
ib = invbloom_new(ctx, int, 1024, i);
invbloom_insert(ib, &val);
invbloom_insert(ib, &val2);
ok1(invbloom_get(ib, &val));
ok1(invbloom_get(ib, &val2));
ip = invbloom_extract_negative(ctx, ib);
ok1(!ip);
ip = invbloom_extract(ctx, ib);
ok1(ip);
ok1(tal_parent(ip) == ctx);
ok1(*ip == val || *ip == val2);
ip2 = invbloom_extract(ctx, ib);
ok1(ip2);
ok1(tal_parent(ip2) == ctx);
ok1(*ip2 == val || *ip2 == val2);
ok1(*ip2 != *ip);
ok1(invbloom_extract(ctx, ib) == NULL);
invbloom_delete(ib, &val);
invbloom_delete(ib, &val2);
ip = invbloom_extract(ctx, ib);
ok1(!ip);
ip = invbloom_extract_negative(ctx, ib);
ok1(ip);
ok1(tal_parent(ip) == ctx);
ok1(*ip == val || *ip == val2);
ip2 = invbloom_extract_negative(ctx, ib);
ok1(ip2);
ok1(tal_parent(ip2) == ctx);
ok1(*ip2 == val || *ip2 == val2);
ok1(*ip2 != *ip);
ok1(invbloom_extract_negative(ctx, ib) == NULL);
ok1(invbloom_empty(ib));
tal_free(ib);
}
tal_free(ctx);
/* This exits depending on whether all tests passed */
return exit_status();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment