Commit 94ff9fc8 authored by Rusty Russell's avatar Rusty Russell

crc sync module.

parent c6a86ac7
#include <stdio.h>
#include <string.h>
#include "config.h"
/**
* crcsync - routines to use crc for an rsync-like protocol.
*
* This is a complete library for synchronization using a variant of the
* rsync protocol.
*
* Example:
* // Calculate checksums of file (3-arg mode)
* // Or print differences between file and checksums (4+ arg mode)
* #include <ccan/crcsync/crcsync.h>
* #include <ccan/grab_file/grab_file.h>
* #include <stdio.h>
* #include <stdlib.h>
* #include <err.h>
*
* static void print_result(long result)
* {
* if (result < 0)
* printf("MATCHED CRC %lu\n", -result - 1);
* else if (result > 0)
* printf("%lu literal bytes\n", result);
* }
*
* int main(int argc, char *argv[])
* {
* size_t len, used, blocksize;
* char *file;
* struct crc_context *ctx;
* uint32_t *crcs;
* long res, i;
*
* if (argc < 3 || (blocksize = atoi(argv[1])) == 0)
* errx(1, "Usage: %s <blocksize> <file> <crc>...\n"
* "OR: %s <blocksize> <file>", argv[0], argv[0]);
*
* file = grab_file(NULL, argv[2], &len);
* if (!file)
* err(1, "Opening file %s", argv[2]);
*
* if (argc == 3) {
* // Short form prints CRCs of file for use in long form.
* used = (len + blocksize - 1) / blocksize;
* crcs = malloc(used * sizeof(uint32_t));
* crc_of_blocks(file, len, blocksize, 32, crcs);
* for (i = 0; i < used; i++)
* printf("%i ", crcs[i]);
* printf("\n");
* return 0;
* }
*
* crcs = malloc((argc - 3) * sizeof(uint32_t));
* for (i = 0; i < argc-3; i++)
* crcs[i] = atoi(argv[3+i]);
*
* ctx = crc_context_new(blocksize, 32, crcs, argc-3);
* for (used = 0; used < len; ) {
* used += crc_read_block(ctx, &res, file+used, len-used);
* print_result(res);
* }
* while ((res = crc_read_flush(ctx)) != 0)
* print_result(res);
*
* return 0;
* }
*
* Licence: LGPL (v2 or any later version)
*/
int main(int argc, char *argv[])
{
if (argc != 2)
return 1;
if (strcmp(argv[1], "depends") == 0) {
printf("ccan/crc\n");
return 0;
}
return 1;
}
#include "crcsync.h"
#include <ccan/crc/crc.h>
#include <string.h>
#include <assert.h>
void crc_of_blocks(const void *data, size_t len, unsigned int block_size,
unsigned int crcbits, uint32_t crc[])
{
unsigned int i;
const uint8_t *buf = data;
uint32_t crcmask = crcbits < 32 ? (1 << crcbits) - 1 : 0xFFFFFFFF;
for (i = 0; len >= block_size; i++) {
crc[i] = (crc32c(0, buf, block_size) & crcmask);
buf += block_size;
len -= block_size;
}
if (len)
crc[i] = (crc32c(0, buf, len) & crcmask);
}
struct crc_context {
size_t block_size;
uint32_t crcmask;
/* Saved old buffer bytes (block_size bytes). */
void *buffer;
size_t buffer_start, buffer_end;
/* Progress so far. */
uint32_t running_crc;
size_t literal_bytes;
size_t total_bytes;
int have_match;
/* Uncrc tab. */
uint32_t uncrc_tab[256];
unsigned int num_crcs;
uint32_t crc[];
};
/* Calculate the how the crc changes when we take a give char out of the
* crc'd area. */
static void init_uncrc_tab(uint32_t uncrc_tab[], unsigned int wsize)
{
unsigned int i;
uint32_t part_crc;
uint8_t buffer[wsize];
/* Calculate crc(buffer+1, wsize-1) once, since it doesn't change. */
memset(buffer, 0, wsize);
part_crc = crc32c(0, buffer+1, wsize-1);
for (i = 0; i < 256; i++) {
buffer[0] = i;
uncrc_tab[i] = (crc32c(0, buffer, wsize) ^ part_crc);
}
}
struct crc_context *crc_context_new(size_t block_size, unsigned crcbits,
const uint32_t crc[], unsigned num_crcs)
{
struct crc_context *ctx;
ctx = malloc(sizeof(*ctx) + sizeof(crc[0])*num_crcs);
if (ctx) {
ctx->block_size = block_size;
/* Technically, 1 << 32 is undefined. */
if (crcbits >= 32)
ctx->crcmask = 0xFFFFFFFF;
else
ctx->crcmask = (1 << crcbits)-1;
ctx->num_crcs = num_crcs;
memcpy(ctx->crc, crc, sizeof(crc[0])*num_crcs);
ctx->buffer_end = 0;
ctx->buffer_start = 0;
ctx->running_crc = 0;
ctx->literal_bytes = 0;
ctx->total_bytes = 0;
ctx->have_match = -1;
init_uncrc_tab(ctx->uncrc_tab, block_size);
ctx->buffer = malloc(block_size);
if (!ctx->buffer) {
free(ctx);
ctx = NULL;
}
}
return ctx;
}
/* Return -1 or index into matching crc. */
static int crc_matches(const struct crc_context *ctx)
{
unsigned int i;
if (ctx->literal_bytes < ctx->block_size)
return -1;
for (i = 0; i < ctx->num_crcs; i++)
if ((ctx->running_crc & ctx->crcmask) == ctx->crc[i])
return i;
return -1;
}
static uint32_t crc_add_byte(uint32_t crc, uint8_t newbyte)
{
return crc32c(crc, &newbyte, 1);
}
static uint32_t crc_remove_byte(uint32_t crc, uint8_t oldbyte,
const uint32_t uncrc_tab[])
{
return crc ^ uncrc_tab[oldbyte];
}
static uint32_t crc_roll(uint32_t crc, uint8_t oldbyte, uint8_t newbyte,
const uint32_t uncrc_tab[])
{
return crc_add_byte(crc_remove_byte(crc, oldbyte, uncrc_tab), newbyte);
}
static size_t buffer_size(const struct crc_context *ctx)
{
return ctx->buffer_end - ctx->buffer_start;
}
size_t crc_read_block(struct crc_context *ctx, long *result,
const void *buf, size_t buflen)
{
size_t consumed = 0, len;
int crcmatch = -1;
const uint8_t *old, *p = buf;
/* Simple optimization, if we found a match last time. */
if (ctx->have_match >= 0) {
crcmatch = ctx->have_match;
goto have_match;
}
if (buffer_size(ctx) >= ctx->block_size)
old = ctx->buffer + ctx->buffer_start;
else
old = NULL;
while (!old || (crcmatch = crc_matches(ctx)) < 0) {
if (consumed == buflen)
break;
/* Update crc. */
if (old) {
ctx->running_crc = crc_roll(ctx->running_crc,
*old, *p,
ctx->uncrc_tab);
old++;
if (old == ctx->buffer + ctx->buffer_end)
old = buf;
} else {
ctx->running_crc = crc_add_byte(ctx->running_crc, *p);
if (p == (uint8_t *)buf + ctx->block_size - 1)
old = buf;
}
ctx->literal_bytes++;
ctx->total_bytes++;
consumed++;
p++;
}
/* Make sure we have a copy of the last block_size bytes.
* First, copy down the old data. */
if (buffer_size(ctx)) {
memmove(ctx->buffer, ctx->buffer + ctx->buffer_start,
buffer_size(ctx));
ctx->buffer_end -= ctx->buffer_start;
ctx->buffer_start = 0;
}
if (crcmatch >= 0) {
/* We have a match! */
if (ctx->literal_bytes > ctx->block_size) {
/* Output literal first. */
*result = ctx->literal_bytes - ctx->block_size;
ctx->literal_bytes = ctx->block_size;
/* Remember for next time! */
ctx->have_match = crcmatch;
} else {
have_match:
*result = -crcmatch-1;
ctx->literal_bytes -= ctx->block_size;
assert(ctx->literal_bytes == 0);
ctx->have_match = -1;
ctx->running_crc = 0;
}
} else {
/* Output literal if it's more than 1 block ago. */
if (ctx->literal_bytes > ctx->block_size) {
*result = ctx->literal_bytes - ctx->block_size;
ctx->literal_bytes = ctx->block_size;
} else
*result = 0;
/* Now save any literal bytes we'll need in future. */
len = ctx->literal_bytes - buffer_size(ctx);
memcpy(ctx->buffer + ctx->buffer_end, buf, len);
ctx->buffer_end += len;
assert(buffer_size(ctx) <= ctx->block_size);
}
return consumed;
}
/* We could try many techniques to match the final block. We can
* simply try to checksum whatever's left at the end and see if it
* matches the final block checksum. This works for the exact-match
* case.
*
* We can do slightly better than this: if we try to match the checksum
* on every block (starting with block_size 1) from where we end to EOF,
* we can capture the "data appended" case as well.
*/
static size_t final_block_match(struct crc_context *ctx)
{
size_t size;
uint32_t crc;
if (ctx->num_crcs == 0)
return 0;
crc = 0;
for (size = 0; size < buffer_size(ctx); size++) {
const uint8_t *p = ctx->buffer;
crc = crc_add_byte(crc, p[ctx->buffer_start+size]);
if ((crc & ctx->crcmask) == ctx->crc[ctx->num_crcs-1])
return size+1;
}
return 0;
}
long crc_read_flush(struct crc_context *ctx)
{
long ret;
/* In case we ended on a whole block match. */
if (ctx->have_match == -1) {
size_t final;
final = final_block_match(ctx);
if (!final) {
/* This is how many bytes we're about to consume. */
ret = buffer_size(ctx);
ctx->buffer_start += ret;
ctx->literal_bytes -= ret;
return ret;
}
ctx->buffer_start += final;
ctx->literal_bytes -= final;
ctx->have_match = ctx->num_crcs-1;
}
/* It might be a partial block match, so no assert */
ctx->literal_bytes = 0;
ret = -ctx->have_match-1;
ctx->have_match = -1;
return ret;
}
/**
* crc_context_free - free a context returned from crc_context_new.
* @ctx: the context returned from crc_context_new, or NULL.
*/
void crc_context_free(struct crc_context *ctx)
{
free(ctx->buffer);
free(ctx);
}
#ifndef CCAN_CRCSYNC_H
#define CCAN_CRCSYNC_H
#include <stdint.h>
#include <stddef.h>
/**
* crc_of_blocks - calculate the crc of the blocks.
* @data: pointer to the buffer to CRC
* @len: length of the buffer
* @blocksize: CRC of each block (final block may be shorter)
* @crcbits: the number of bits of crc you want (currently 32 maximum)
* @crc: the crcs (array will have (len + blocksize-1)/blocksize entries).
*
* Calculates the CRC of each block, and output the lower @crcbits to
* @crc array.
*/
void crc_of_blocks(const void *data, size_t len, unsigned int blocksize,
unsigned int crcbits, uint32_t crc[]);
/**
* crc_context_new - allocate and initialize state for crc_find_block
* @blocksize: the sie of each block
* @crcbits: the bits valid in the CRCs (<= 32)
* @crc: array of block crcs
* @num_crcs: number of block crcs
*
* Returns an allocated pointer to the structure for crc_find_block,
* or NULL. Makes a copy of @crc and @num_crcs.
*/
struct crc_context *crc_context_new(size_t blocksize, unsigned crcbits,
const uint32_t crc[], unsigned num_crcs);
/**
* crc_read_block - search for block matches in the buffer.
* @ctx: struct crc_context from crc_context_new.
* @result: unmatched bytecount, or crc which matched.
* @buf: pointer to bytes
* @buflen: length of buffer
*
* Returns the number of bytes of the buffer which have been digested,
* and sets @result either to a negagive number (== -crc_number - 1)
* to show that a block matched a crc, or zero or more to represent a
* length of unmatched data.
*
* Note that multiple lengths of unmatched data might be returned in a row:
* you'll probably want to merge them yourself.
*/
size_t crc_read_block(struct crc_context *ctx, long *result,
const void *buf, size_t buflen);
/**
* crc_read_flush - flush any remaining data from crc_read_block.
* @ctx: the context passed to crc_read_block.
*
* Matches the final data. This can be used to create a boundary, or
* simply flush the final data. Keep calling it until it returns 0.
*/
long crc_read_flush(struct crc_context *ctx);
/**
* crc_context_free - free a context returned from crc_context_new.
* @ctx: the context returned from crc_context_new, or NULL.
*/
void crc_context_free(struct crc_context *ctx);
#endif /* CCAN_CRCSYNC_H */
#include "crcsync/crcsync.h"
#include "crcsync/crcsync.c"
#include "tap/tap.h"
#include <stdlib.h>
#include <stdbool.h>
/* FIXME: ccanize. */
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
struct result {
enum {
LITERAL, BLOCK
} type;
/* Block number, or length of literal. */
size_t val;
};
static inline size_t num_blocks(size_t len, size_t block_size)
{
return (len + block_size - 1) / block_size;
}
static void check_finalized_result(size_t curr_literal,
const struct result results[],
size_t num_results,
size_t *curr_result)
{
if (curr_literal == 0)
return;
ok1(*curr_result < num_results);
ok1(results[*curr_result].type == LITERAL);
ok1(results[*curr_result].val == curr_literal);
(*curr_result)++;
}
static void check_result(long result,
size_t *curr_literal,
const struct result results[], size_t num_results,
size_t *curr_result)
{
/* We append multiple literals into one. */
if (result >= 0) {
*curr_literal += result;
return;
}
/* Check outstanding literals. */
if (*curr_literal) {
check_finalized_result(*curr_literal, results, num_results,
curr_result);
*curr_literal = 0;
}
ok1(*curr_result < num_results);
ok1(results[*curr_result].type == BLOCK);
ok1(results[*curr_result].val == -result - 1);
(*curr_result)++;
}
/* Start with buffer1 and sync to buffer2. */
static void test_sync(const char *buffer1, size_t len1,
const char *buffer2, size_t len2,
size_t block_size,
const struct result results[], size_t num_results)
{
struct crc_context *ctx;
size_t used, ret, i, curr_literal = 0;
long result;
uint32_t crcs[num_blocks(len1, block_size)];
crc_of_blocks(buffer1, len1, block_size, 32, crcs);
ctx = crc_context_new(block_size, 32, crcs, ARRAY_SIZE(crcs));
for (used = 0, i = 0; used < len2; used += ret) {
ret = crc_read_block(ctx, &result, buffer2+used, len2-used);
check_result(result, &curr_literal, results, num_results, &i);
}
while ((result = crc_read_flush(ctx)) != 0)
check_result(result, &curr_literal, results, num_results, &i);
check_finalized_result(curr_literal, results, num_results, &i);
/* We must have achieved everything we expected. */
ok1(i == num_results);
crc_context_free(ctx);
}
int main(int argc, char *argv[])
{
char *buffer1, *buffer2;
unsigned int i;
uint32_t crcs1[12], crcs2[12];
plan_tests(733);
buffer1 = calloc(1024, 1);
buffer2 = calloc(1024, 1);
/* Truncated end block test. */
crcs1[11] = 0xdeadbeef;
crc_of_blocks(buffer1, 1024, 100, 32, crcs1);
ok1(crcs1[11] == 0xdeadbeef);
crc_of_blocks(buffer2, 1024, 100, 32, crcs2);
ok1(memcmp(crcs1, crcs2, sizeof(crcs1[0])*11) == 0);
/* Fill with non-zero pattern, retest. */
for (i = 0; i < 1024; i++)
buffer1[i] = buffer2[i] = i + i/128;
crcs1[11] = 0xdeadbeef;
crc_of_blocks(buffer1, 1024, 100, 32, crcs1);
ok1(crcs1[11] == 0xdeadbeef);
crc_of_blocks(buffer2, 1024, 100, 32, crcs2);
ok1(memcmp(crcs1, crcs2, sizeof(crcs1[0])*11) == 0);
/* Check that it correctly masks bits. */
crc_of_blocks(buffer1, 1024, 128, 32, crcs1);
crc_of_blocks(buffer2, 1024, 128, 8, crcs2);
for (i = 0; i < 1024/128; i++)
ok1(crcs2[i] == (crcs1[i] & 0xFF));
/* Now test the "exact match" "round blocks" case. */
{
struct result res[] = {
{ BLOCK, 0 },
{ BLOCK, 1 },
{ BLOCK, 2 },
{ BLOCK, 3 },
{ BLOCK, 4 },
{ BLOCK, 5 },
{ BLOCK, 6 },
{ BLOCK, 7 } };
test_sync(buffer1, 1024, buffer2, 1024, 128,
res, ARRAY_SIZE(res));
}
/* Now test the "exact match" with end block case. */
{
struct result res[] = {
{ BLOCK, 0 },
{ BLOCK, 1 },
{ BLOCK, 2 },
{ BLOCK, 3 },
{ BLOCK, 4 },
{ BLOCK, 5 },
{ BLOCK, 6 },
{ BLOCK, 7 },
{ BLOCK, 8 },
{ BLOCK, 9 },
{ BLOCK, 10 } };
test_sync(buffer1, 1024, buffer2, 1024, 100,
res, ARRAY_SIZE(res));
}
/* Now test the "one byte append" "round blocks" case. */
{
struct result res[] = {
{ BLOCK, 0 },
{ BLOCK, 1 },
{ BLOCK, 2 },
{ BLOCK, 3 },
{ BLOCK, 4 },
{ BLOCK, 5 },
{ BLOCK, 6 },
{ LITERAL, 1 } };
test_sync(buffer1, 1024-128, buffer2, 1024-127, 128,
res, ARRAY_SIZE(res));
}
/* Now test the "one byte append" with end block case. */
{
struct result res[] = {
{ BLOCK, 0 },
{ BLOCK, 1 },
{ BLOCK, 2 },
{ BLOCK, 3 },
{ BLOCK, 4 },
{ BLOCK, 5 },
{ BLOCK, 6 },
{ BLOCK, 7 },
{ BLOCK, 8 },
{ BLOCK, 9 },
{ BLOCK, 10 },
{ LITERAL, 1 } };
test_sync(buffer1, 1023, buffer2, 1024, 100,
res, ARRAY_SIZE(res));
}
/* Now try changing one block at a time, check we get right results. */
for (i = 0; i < 1024/128; i++) {
unsigned int j;
struct result res[8];
/* Mess with block. */
memcpy(buffer2, buffer1, 1024);
buffer2[i * 128]++;
for (j = 0; j < ARRAY_SIZE(res); j++) {
if (j == i) {
res[j].type = LITERAL;
res[j].val = 128;
} else {
res[j].type = BLOCK;
res[j].val = j;
}
}
test_sync(buffer1, 1024, buffer2, 1024, 128,
res, ARRAY_SIZE(res));
}
/* Now try shrinking one block at a time, check we get right results. */
for (i = 0; i < 1024/128; i++) {
unsigned int j;
struct result res[8];
/* Shrink block. */
memcpy(buffer2, buffer1, i * 128 + 64);
memcpy(buffer2 + i * 128 + 64, buffer1 + i * 128 + 65,
1024 - (i * 128 + 65));
for (j = 0; j < ARRAY_SIZE(res); j++) {
if (j == i) {
res[j].type = LITERAL;
res[j].val = 127;
} else {
res[j].type = BLOCK;
res[j].val = j;
}
}
test_sync(buffer1, 1024, buffer2, 1023, 128,
res, ARRAY_SIZE(res));
}
/* Now try shrinking one block at a time, check we get right results. */
for (i = 0; i < 1024/128; i++) {
unsigned int j;
struct result res[8];
/* Shrink block. */
memcpy(buffer2, buffer1, i * 128 + 64);
memcpy(buffer2 + i * 128 + 64, buffer1 + i * 128 + 65,
1024 - (i * 128 + 65));
for (j = 0; j < ARRAY_SIZE(res); j++) {
if (j == i) {
res[j].type = LITERAL;
res[j].val = 127;
} else {
res[j].type = BLOCK;
res[j].val = j;
}
}
test_sync(buffer1, 1024, buffer2, 1023, 128,
res, ARRAY_SIZE(res));
}
return exit_status();
}
#include "crcsync/crcsync.h"
#include "crcsync/crcsync.c"
#include "tap/tap.h"
int main(int argc, char *argv[])
{
char buffer[1024];
unsigned int i, j;
uint32_t crcs[12] = { 0xFFFFF, 0xdeadf00d };
plan_tests(3 + 8192);
/* Simple test (we know currently crc of 0s is 0) */
memset(buffer, 0, sizeof(buffer));
crc_of_blocks(buffer, sizeof(buffer), sizeof(buffer), 32, crcs);
ok1(crcs[0] == 0);
crc_of_blocks(buffer, sizeof(buffer), sizeof(buffer)/2, 32, crcs);
ok1(crcs[0] == 0);
ok1(crcs[1] == 0);
/* We know they're using crc32c. */
for (i = 0; i < sizeof(buffer); i++) {
buffer[i] = i;
crc_of_blocks(buffer, sizeof(buffer), sizeof(buffer)/7,
32, crcs);
for (j = 0; j < sizeof(buffer); j += sizeof(buffer)/7) {
unsigned int len = sizeof(buffer)/7;
if (j + len > sizeof(buffer))
len = sizeof(buffer) - j;
ok1(crc32c(0, buffer + j, len) == crcs[j/(sizeof(buffer)/7)]);
}
}
return exit_status();
}
#include "crcsync/crcsync.h"
#include "crcsync/crcsync.c"
#include "tap/tap.h"
#include <stdlib.h>
#include <stdbool.h>
/* FIXME: ccanize. */
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
static void test_roll(unsigned int wsize)
{
uint8_t data[wsize * 2];
uint32_t uncrc_tab[256];
unsigned int i;
init_uncrc_tab(uncrc_tab, wsize);
for (i = 0; i < ARRAY_SIZE(data); i++)
data[i] = random();
for (i = 1; i < ARRAY_SIZE(data) - wsize; i++) {
uint32_t rollcrc, crc;
crc = crc32c(0, data+i, wsize);
rollcrc = crc_roll(crc32c(0, data+i-1, wsize),
data[i-1], data[i+wsize-1], uncrc_tab);
ok(crc == rollcrc, "wsize %u, i %u", wsize, i);
}
}
int main(int argc, char *argv[])
{
plan_tests(100 - 1 + 128 - 1);
test_roll(100);
test_roll(128);
return exit_status();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment