Merge.

3c81225f · Rusty Russell · a57ff6cb · 33480b45 · 3c81225f · 3c81225f
Commit 3c81225f authored Jul 18, 2009 by Rusty Russell
28 changed files
--- a/ccan/block_pool/_info
+++ b/ccan/block_pool/_info
+#include <stdio.h>
+#include <string.h>
+#include "config.h"
+
+/**
+ * block_pool - An efficient allocator for blocks that don't need to be
+ * resized or freed.
+ *
+ * block_pool allocates blocks by packing them into buffers, making the
+ * overhead per block virtually zero.  Because of this, you cannot resize or
+ * free individual blocks, but you can free the entire block_pool.
+ *
+ * The rationale behind block_pool is that talloc uses a lot of bytes per
+ * block (48 on 32-bit, 80 on 64-bit).  Nevertheless, talloc is an excellent
+ * tool for C programmers of all ages.  Because a block_pool is a talloc
+ * context, it can be useful in talloc-based applications where many small
+ * blocks need to be allocated.
+ *
+ * Example:
+ *
+ * #include <ccan/block_pool/block_pool.h>
+ *
+ * int main(void) {
+ *      struct block_pool *bp = block_pool_new(NULL);
+ *
+ *      void *buffer = block_pool_alloc(bp, 4096);
+ *      char *string = block_pool_strdup(bp, "A string");
+ *
+ *      int array[] = {0,1,1,2,3,5,8,13,21,34};
+ *      int *array_copy = block_pool_memdup(bp, array, sizeof(array));
+ *
+ *      block_pool_free(bp);
+ *    return 0;
+ * }
+ *
+ *	Author: Joey Adams
+ *	License: BSD
+ */
+int main(int argc, char *argv[])
+{
+	/* Expect exactly one argument */
+	if (argc != 2)
+		return 1;
+
+	if (strcmp(argv[1], "depends") == 0) {
+		printf("ccan/talloc\n");
+		return 0;
+	}
+
+	return 1;
+}
--- a/ccan/block_pool/block_pool.c
+++ b/ccan/block_pool/block_pool.c
+/*
+        Copyright (c) 2009  Joseph A. Adams
+        All rights reserved.
+        
+        Redistribution and use in source and binary forms, with or without
+        modification, are permitted provided that the following conditions
+        are met:
+        1. Redistributions of source code must retain the above copyright
+           notice, this list of conditions and the following disclaimer.
+        2. Redistributions in binary form must reproduce the above copyright
+           notice, this list of conditions and the following disclaimer in the
+           documentation and/or other materials provided with the distribution.
+        3. The name of the author may not be used to endorse or promote products
+           derived from this software without specific prior written permission.
+        
+        THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+        IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+        OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+        IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+        INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+        NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+        DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+        THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+        THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "block_pool.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+//must be a power of 2
+#define BLOCK_SIZE 4096
+
+struct block {
+	size_t remaining;
+	size_t size;
+	char *data;
+};
+
+struct block_pool {
+	size_t count;
+	size_t alloc; //2^n - 1, where n is an integer > 1
+	struct block *block;
+	
+	//blocks are arranged in a max-heap by the .remaining field
+	// (except the root block does not percolate down until it is filled)
+};
+
+static int destructor(struct block_pool *bp) {
+	struct block *block = bp->block;
+	size_t d = bp->count;
+	
+	for (;d--;block++)
+		free(block->data);
+	free(bp->block);
+	
+	return 0;
+}
+
+struct block_pool *block_pool_new(void *ctx) {
+	struct block_pool *bp = talloc(ctx, struct block_pool);
+	talloc_set_destructor(bp, destructor);
+	
+	bp->count = 0;
+	bp->alloc = 7;
+	bp->block = malloc(bp->alloc * sizeof(struct block));
+	
+	return bp;
+}
+
+static void *new_block(struct block *b, size_t needed) {
+	b->size = (needed+(BLOCK_SIZE-1)) & ~(BLOCK_SIZE-1);
+	b->remaining = b->size - needed;
+	b->data = malloc(b->size);
+	return b->data;
+}
+
+//for the first block, keep the memory usage low in case it's the only block.
+static void *new_block_tiny(struct block *b, size_t needed) {
+	if (needed < 256)
+		b->size = 256;
+	else
+		b->size = (needed+(BLOCK_SIZE-1)) & ~(BLOCK_SIZE-1);
+	b->remaining = b->size - needed;
+	b->data = malloc(b->size);
+	return b->data;
+}
+
+static void *try_block(struct block *b, size_t size, size_t align) {
+	size_t offset = b->size - b->remaining;
+	offset = (offset+align) & ~align;
+	
+	if (b->size-offset >= size) {
+		//good, we can use this block
+		void *ret = b->data + offset;
+		b->remaining = b->size-offset-size;
+		
+		return ret;
+	}
+	
+	return NULL;
+}
+
+#define L(node) (node+node+1)
+#define R(node) (node+node+2)
+#define P(node) ((node-1)>>1)
+
+#define V(node) (bp->block[node].remaining)
+
+static void percolate_down(struct block_pool *bp, size_t node) {
+	size_t child = L(node);
+	struct block tmp;
+	
+	//get the maximum child
+	if (child >= bp->count)
+		return;
+	if (child+1 < bp->count && V(child+1) > V(child))
+		child++;
+	
+	if (V(child) <= V(node))
+		return;
+	
+	tmp = bp->block[node];
+	bp->block[node] = bp->block[child];
+	bp->block[child] = tmp;
+	
+	percolate_down(bp, child);
+}
+
+//note:  percolates up to either 1 or 2 as a root
+static void percolate_up(struct block_pool *bp, size_t node) {
+	size_t parent = P(node);
+	struct block tmp;
+	
+	if (node<3 || V(parent) >= V(node))
+		return;
+	
+	tmp = bp->block[node];
+	bp->block[node] = bp->block[parent];
+	bp->block[parent] = tmp;
+	
+	percolate_up(bp, parent);
+}
+
+void *block_pool_alloc_align(struct block_pool *bp, size_t size, size_t align) {
+	void *ret;
+	
+	if (align)
+		align--;
+	
+	//if there aren't any blocks, make a new one
+	if (!bp->count) {
+		bp->count = 1;
+		return new_block_tiny(bp->block, size);
+	}
+	
+	//try the root block
+	ret = try_block(bp->block, size, align);
+	if (ret)
+		return ret;
+	
+	//root block is filled, percolate down and try the biggest one
+	percolate_down(bp, 0);
+	ret = try_block(bp->block, size, align);
+	if (ret)
+		return ret;
+	
+	//the biggest wasn't big enough; we need a new block
+	if (bp->count >= bp->alloc) {
+		//make room for another block
+		bp->alloc += bp->alloc;
+		bp->alloc++;
+		bp->block = realloc(bp->block, bp->alloc * sizeof(struct block));
+	}
+	ret = new_block(bp->block+(bp->count++), size);
+	
+	//fix the heap after adding the new block
+	percolate_up(bp, bp->count-1);
+	
+	return ret;
+}
+
+#undef L
+#undef R
+#undef P
+#undef V
+
+char *block_pool_strdup(struct block_pool *bp, const char *str) {
+	size_t size = strlen(str)+1;
+	char *ret = block_pool_alloc_align(bp, size, 1);
+	
+	memcpy(ret, str, size);
+	return ret;
+}
--- a/ccan/block_pool/block_pool.h
+++ b/ccan/block_pool/block_pool.h
+/*
+        Copyright (c) 2009  Joseph A. Adams
+        All rights reserved.
+        
+        Redistribution and use in source and binary forms, with or without
+        modification, are permitted provided that the following conditions
+        are met:
+        1. Redistributions of source code must retain the above copyright
+           notice, this list of conditions and the following disclaimer.
+        2. Redistributions in binary form must reproduce the above copyright
+           notice, this list of conditions and the following disclaimer in the
+           documentation and/or other materials provided with the distribution.
+        3. The name of the author may not be used to endorse or promote products
+           derived from this software without specific prior written permission.
+        
+        THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+        IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+        OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+        IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+        INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+        NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+        DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+        THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+        THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CCAN_BLOCK_POOL
+#define CCAN_BLOCK_POOL
+
+#include <ccan/talloc/talloc.h>
+#include <string.h>
+
+struct block_pool;
+
+/* Construct a new block pool.
+   ctx is a talloc context (or NULL if you don't know what talloc is ;) ) */
+struct block_pool *block_pool_new(void *ctx);
+
+/* Same as block_pool_alloc, but allows you to manually specify alignment.
+   For instance, strings need not be aligned, so set align=1 for them.
+   align must be a power of two. */
+void *block_pool_alloc_align(struct block_pool *bp, size_t size, size_t align);
+
+/* Allocate a block of a given size.  The returned pointer will remain valid
+   for the life of the block_pool.  The block cannot be resized or
+   freed individually. */
+static inline void *block_pool_alloc(struct block_pool *bp, size_t size) {
+	size_t align = size & -size; //greatest power of two by which size is divisible
+	if (align > 16)
+		align = 16;
+	return block_pool_alloc_align(bp, size, align);
+}
+
+static inline void block_pool_free(struct block_pool *bp) {
+	talloc_free(bp);
+}
+
+
+char *block_pool_strdup(struct block_pool *bp, const char *str);
+
+static inline void *block_pool_memdup(struct block_pool *bp, const void *src, size_t size) {
+	void *ret = block_pool_alloc(bp, size);
+	memcpy(ret, src, size);
+	return ret;
+}
+
+#endif
--- a/ccan/block_pool/test/run.c
+++ b/ccan/block_pool/test/run.c
+#include "block_pool/block_pool.h"
+#include "block_pool/block_pool.c"
+#include "tap/tap.h"
+
+struct alloc_record {
+	size_t size;
+	char *ptr;
+};
+
+static int compar_alloc_record_by_ptr(const void *ap, const void *bp) {
+	const struct alloc_record *a=ap, *b=bp;
+	
+	if (a->ptr < b->ptr)
+		return -1;
+	else if (a->ptr > b->ptr)
+		return 1;
+	else
+		return 0;
+}
+
+static size_t random_block_size(void) {
+	int scale = random() % 11;
+	switch (scale) {
+		case 0:
+		case 1:
+		case 2:
+		case 3:
+		case 4: return random() % 25;
+		case 5:
+		case 6:
+		case 7: return random() % 100;
+		case 8:
+		case 9: return random() % 1000;
+		case 10: return random() % 10000;
+		default:
+			fprintf(stderr, "random() %% 3 returned %d somehow!\n", scale);
+			exit(EXIT_FAILURE);
+	}
+}
+
+#define L(node) (node+node+1)
+#define R(node) (node+node+2)
+#define P(node) ((node-1)>>1)
+
+#define V(node) (bp->block[node].remaining)
+
+//used by test_block_pool to make sure the pool's block array is a max heap
+//set node=0 to scan the whole heap (starting at the root)
+//returns nonzero on success
+static int check_heap(struct block_pool *bp, size_t node) {
+	if (node < bp->count) {
+		if (node) { //the root node need not be the max, but its subtrees must be valid
+			if (L(node) < bp->count && V(L(node)) > V(node))
+				return 0;
+			if (R(node) < bp->count && V(R(node)) > V(node))
+				return 0;
+		}
+		return check_heap(bp, L(node)) && check_heap(bp, R(node));
+	} else
+		return 1;
+}
+
+#undef L
+#undef R
+#undef P
+#undef V
+
+/* Performs a self-test of block_pool.
+   Returns 1 on success, 0 on failure.
+   If verify_heap is nonzero, the test will check the heap structure every
+   single allocation, making test_block_pool take n^2 time. */
+static int test_block_pool(size_t blocks_to_try, FILE *out, int verify_heap) {
+	struct block_pool *bp = block_pool_new(NULL);
+	struct alloc_record *record = malloc(sizeof(*record) * blocks_to_try);
+	size_t i;
+	size_t bytes_allocated = 0;
+	#define print(...) do { \
+			if (out) \
+				printf(__VA_ARGS__); \
+		} while(0)
+	
+	print("Allocating %zu blocks...\n", blocks_to_try);
+	
+	for (i=0; i<blocks_to_try; i++) {
+		record[i].size = random_block_size();
+		record[i].ptr = block_pool_alloc(bp, record[i].size);
+		
+		bytes_allocated += record[i].size;
+		
+		memset(record[i].ptr, 0x55, record[i].size);
+		
+		if (verify_heap && !check_heap(bp, 0)) {
+			print("Block pool's max-heap is wrong (allocation %zu)\n", i);
+			return 0;
+		}
+	}
+	
+	print("Finished allocating\n"
+	       "    %zu blocks\n"
+	       "    %zu bytes\n"
+	       "    %zu pages\n",
+		blocks_to_try, bytes_allocated, bp->count);
+	
+	qsort(record, blocks_to_try,
+		sizeof(*record), compar_alloc_record_by_ptr);
+	
+	print("Making sure block ranges are unique...\n");
+	//print("0: %p ... %p\n", record[0].ptr, record[0].ptr+record[0].size);
+	for (i=1; i<blocks_to_try; i++) {
+		struct alloc_record *a = &record[i-1];
+		struct alloc_record *b = &record[i];
+		
+		//print("%zu: %p ... %p\n", i, b->ptr, b->ptr+b->size);
+		
+		if (a->ptr > b->ptr) {
+			struct alloc_record *tmp = a;
+			a = b;
+			b = tmp;
+		}
+		
+		if (a->ptr <= b->ptr && a->ptr+a->size <= b->ptr)
+			continue;
+		
+		print("Allocations %zu and %zu overlap\n", i-1, i);
+		return 0;
+	}
+	
+	print("Checking heap structure...\n");
+	if (!check_heap(bp, 0)) {
+		print("Block pool's max-heap is wrong\n");
+			return 0;
+	}
+	
+	block_pool_free(bp);
+	free(record);
+	
+	return 1;
+	
+	#undef print
+}
+
+
+int main(void)
+{
+	plan_tests(1);
+	
+	//test a few blocks with heap verification
+	ok1(test_block_pool(10000, NULL, 1));
+	
+	return exit_status();
+}
--- a/ccan/ccan_tokenizer/_info
+++ b/ccan/ccan_tokenizer/_info
+#include <string.h>
+#include <stdio.h>
+#include "config.h"
+
+/**
+ * ccan_tokenizer - A full-text lexer for C source files
+ *
+ * ccan_tokenizer generates a list of tokens given the contents of a C source
+ * or header file.
+ *
+ * Example:
+ *
+ * #include <ccan/ccan_tokenizer/ccan_tokenizer.h>
+ * #include <ccan/grab_file/grab_file.h>
+ * #include <err.h>
+ *
+ * void token_list_stats(const struct token_list *tl) {
+ * 	size_t comment=0, white=0, stray=0, code=0, total=0;
+ * 	size_t count = 0;
+ * 	const struct token *i;
+ *
+ * 	for (i=tl->first; i; i=i->next) {
+ * 		size_t size = i->orig_size;
+ * 		total += size;
+ * 		count++;
+ *
+ * 		if (token_type_is_comment(i->type))
+ * 			comment += size;
+ * 		else if (i->type == TOK_WHITE)
+ * 			white += size;
+ * 		else if (i->type == TOK_STRAY)
+ * 			stray += size;
+ * 		else
+ * 			code += size;
+ * 	}
+ *
+ * 	printf("Code:        %.02f%%\n"
+ * 	       "White space: %.02f%%\n"
+ * 	       "Comments:    %.02f%%\n",
+ * 	       (double)code    * 100.0 / (double)total,
+ * 	       (double)white   * 100.0 / (double)total,
+ * 	       (double)comment * 100.0 / (double)total);
+ * 	if (stray)
+ * 		printf("Stray:       %.02f%%\n",
+ * 			(double)stray * 100.0 / (double)total);
+ * 	printf("Total size:  %zu bytes with %zu tokens\n",
+ * 		total, count);
+ * }
+ *
+ * int main(int argc, char *argv[]) {
+ * 	size_t len;
+ * 	char *file;
+ * 	struct token_list *tl;
+ * 	tok_message_queue mq;
+ * 	queue_init(mq, NULL);
+ *
+ * 	//grab the file
+ * 	if (argc != 2) {
+ * 		fprintf(stderr, "Usage: %s source_file\n", argv[0]);
+ * 		return 1;
+ * 	}
+ * 	file = grab_file(NULL, argv[1], &len);
+ * 	if (!file)
+ * 		err(1, "Could not read file %s", argv[1]);
+ *
+ * 	//tokenize the contents
+ * 	tl = tokenize(file, len, &mq);
+ *
+ * 	//print warnings, errors, etc.
+ * 	while (queue_count(mq)) {
+ * 		struct tok_message msg = dequeue(mq);
+ * 		tok_message_print(&msg, tl);
+ * 	}
+ *
+ * 	//do neat stuff with the token list
+ * 	token_list_stats(tl);
+ *
+ * 	//free stuff
+ * 	talloc_free(file); //implicitly frees tl
+ * 	queue_free(mq);
+ *
+ * 	return 0;
+ * }
+ */
+int main(int argc, char *argv[])
+{
+	/* Expect exactly one argument */
+	if (argc != 2)
+		return 1;
+
+	if (strcmp(argv[1], "depends") == 0) {
+		printf("ccan/array\n");
+		return 0;
+	}
+
+	return 1;
+}
--- a/ccan/ccan_tokenizer/ccan_tokenizer.c
+++ b/ccan/ccan_tokenizer/ccan_tokenizer.c
--- a/ccan/ccan_tokenizer/ccan_tokenizer.h
+++ b/ccan/ccan_tokenizer/ccan_tokenizer.h
+/*
+        Copyright (c) 2009  Joseph A. Adams
+        All rights reserved.
+        
+        Redistribution and use in source and binary forms, with or without
+        modification, are permitted provided that the following conditions
+        are met:
+        1. Redistributions of source code must retain the above copyright
+           notice, this list of conditions and the following disclaimer.
+        2. Redistributions in binary form must reproduce the above copyright
+           notice, this list of conditions and the following disclaimer in the
+           documentation and/or other materials provided with the distribution.
+        3. The name of the author may not be used to endorse or promote products
+           derived from this software without specific prior written permission.
+        
+        THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+        IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+        OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+        IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+        INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+        NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+        DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+        THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+        THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CCAN_TOKENIZER_H
+#define CCAN_TOKENIZER_H
+
+#include <ccan/array/array.h>
+#include "charflag.h"
+#include "dict.h"
+#include "queue.h"
+#include <stdint.h>
+#include <errno.h> //for readui
+
+/* Definition of tokens and the token list */
+
+enum token_type {
+	TOK_INTEGER,	   //integer (e.g. 5, 1000L, 0x5)
+	TOK_FLOATING,	   //floating point number (e.g. 5.0, 7.0f, etc.)
+	TOK_OPERATOR,	   //operator (e.g. +, -, (, ), ++, etc.)
+	
+	#define token_type_is_identifier(type) ((type)>=TOK_KEYWORD && (type)<=TOK_IDENTIFIER)
+	TOK_KEYWORD,	   //keyword (e.g. char, _Bool, ifdef)
+	TOK_IDENTIFIER,	   //identifier or unprocessed keyword (e.g. int, token, pp_conditions)
+	
+	TOK_CHAR,	   //character literal (e.g. 'a' or even '1234')
+	TOK_STRING,	   //string literal (e.g. "hello" or "zero\0inside")
+	TOK_LEADING_POUND, //leading # in a preprocessor directive (e.g. # include)
+	TOK_STRING_IQUOTE, // "config.h"
+	TOK_STRING_IANGLE, // <stdio.h>
+	
+	#define token_type_is_ignored(type) ((type)>=TOK_CCOMMENT && (type)<=TOK_WHITE)
+	#define token_type_is_comment(type) ((type)>=TOK_CCOMMENT && (type)<=TOK_CPPCOMMENT)
+	TOK_CCOMMENT, //C comment (e.g. /* comment */)
+	TOK_CPPCOMMENT, //C++ comment (e.g. //comment )
+	TOK_WHITE, //whitespace (span of \t\n\v\f\r and space)
+	
+	TOK_STARTLINE,	//beginning of line (txt/txtsize is always empty)
+	TOK_STRAY, //control characters, weird characters, and extended characters where they shouldn't be
+};
+
+enum tok_suffix {
+	TOK_NOSUFFIX = 0,
+	
+	TOK_U  = 1,  //unsigned
+	TOK_L  = 2,  //long or double-precision float
+	TOK_LL = 4,  //long long (note that TOK_L and TOK_LL are mutually exclusive)
+	TOK_F  = 8,  //float (single-precision)
+	TOK_I  = 16, //imaginary
+	
+	TOK_UL  = TOK_U | TOK_L,  //unsigned long
+	TOK_ULL = TOK_U | TOK_LL, //unsigned long long
+	
+	//Imaginary combo meals
+	TOK_IMAG_U   = TOK_I | TOK_U,
+	TOK_IMAG_L   = TOK_I | TOK_L,
+	TOK_IMAG_LL  = TOK_I | TOK_LL,
+	TOK_IMAG_F   = TOK_I | TOK_F,
+	
+	TOK_IMAG_UL  = TOK_I | TOK_UL,
+	TOK_IMAG_ULL = TOK_I | TOK_ULL,
+};
+
+struct tok_integer {
+	uint64_t v;
+	int base; //one of 2, 8, 10, or 16
+	enum tok_suffix suffix;
+};
+
+struct tok_floating {
+	long double v;
+	enum tok_suffix suffix;
+};
+
+//Operator/keyword naming conventions taken from Jeff Lee's Yacc grammar:
+//http://www.lysator.liu.se/c/ANSI-C-grammar-y.html
+enum tok_opkw {
+	/* Permute these regularly */
+	PTR_OP=128, INC_OP, DEC_OP, LEFT_OP, RIGHT_OP, LE_OP, GE_OP, EQ_OP, NE_OP,
+	AND_OP, OR_OP,
+	MUL_ASSIGN, DIV_ASSIGN, MOD_ASSIGN,
+	ADD_ASSIGN, SUB_ASSIGN,
+	AND_ASSIGN, XOR_ASSIGN, OR_ASSIGN,
+	LEFT_ASSIGN, RIGHT_ASSIGN,
+	ELLIPSIS,
+	DOUBLE_POUND,
+	
+	//Keywords
+	_BOOL,
+	_COMPLEX,
+	_IMAGINARY,
+	BREAK,
+	CASE,
+	CHAR,
+	CONST,
+	CONTINUE,
+	DEFAULT,
+	DO,
+	DOUBLE,
+	ELSE,
+	ENUM,
+	EXTERN,
+	FLOAT,
+	FOR,
+	GOTO,
+	IF,
+	INLINE,
+	INT,
+	LONG,
+	REGISTER,
+	RESTRICT,
+	RETURN,
+	SHORT,
+	SIGNED,
+	SIZEOF,
+	STATIC,
+	STRUCT,
+	SWITCH,
+	TYPEDEF,
+	UNION,
+	UNSIGNED,
+	VOID,
+	VOLATILE,
+	WHILE,
+	
+	//Preprocessor keywords (except those already defined)
+	VA_ARGS,
+	#define opkw_is_directive_only(opkw) ((opkw)>=DEFINE && (opkw)<=WARNING)
+	#define opkw_is_directive(opkw) (opkw_is_directive_only(opkw) || (opkw)==ELSE || (opkw)==IF)
+	DEFINE,
+	ELIF,
+	//ELSE,
+	ENDIF,
+	ERROR,
+	//IF,
+	IFDEF,
+	IFNDEF,
+	INCLUDE,
+	LINE,
+	PRAGMA,
+	UNDEF,
+	WARNING, /* gcc extension */
+};
+
+struct token_flags {
+	unsigned short
+		pp:1, //is token part of a preprocessor line
+		pp_directive:1; //does token follow a TOK_LEADING_POUND (e.g. # include)
+};
+
+struct token {
+	struct token *prev, *next;
+	
+	struct token_flags flags;
+	short type; //enum token_type
+	union {
+		struct tok_integer integer;
+		struct tok_floating floating;
+		int opkw; //operator or keyword ID (e.g. '+', INC_OP (++), ADD_ASSIGN (+=))
+		array_char string; //applies to TOK_CHAR and TOK_STRING
+		char *include; //applies to TOK_STRING_IQUOTE and TOK_STRING_IANGLE
+	};
+	
+	//text this token represents (with backslash-broken lines merged)
+	const char *txt;
+	size_t txt_size;
+	
+	//text this token represents (untouched)
+	const char *orig;
+	size_t orig_size;
+	
+	//zero-based line and column number of this token
+	size_t line, col;
+};
+
+//keywords such as int, long, etc. may be defined over, making them identifiers in a sense
+static inline int token_is_identifier(const struct token *tok) {
+	return token_type_is_identifier(tok->type);
+}
+
+static inline int token_is_ignored(const struct token *tok) {
+	return token_type_is_ignored(tok->type);
+}
+
+static inline int token_is_op(const struct token *tok, int opkw) {
+	return tok->type==TOK_OPERATOR && tok->opkw==opkw;
+}
+
+static inline int token_is_kw(const struct token *tok, int opkw) {
+	return tok->type==TOK_KEYWORD && tok->opkw==opkw;
+}
+
+static inline int token_txt_is(const struct token *tok, const char *str) {
+	size_t len = strlen(str);
+	return tok->txt_size==len && !memcmp(tok->txt, str, len);
+}
+
+struct token_list {
+	struct token *first, *last;
+	
+	//Points to original input as given
+	const char *orig;
+	size_t orig_size;
+	
+	//position of the start of each real line with respect to orig
+	const char * const *olines;
+	size_t olines_size;
+	
+	//Copy of original input without backslash-broken lines
+	const char *txt;
+	size_t txt_size;
+	
+	//position of the start of each real line with respect to txt
+	const char * const *tlines;
+	size_t tlines_size;
+	
+	//Set me so tok_message_print will know what file name to display
+	const char *filename;
+};
+
+extern struct dict *tokenizer_dict;
+
+typedef queue(struct tok_message) tok_message_queue;
+
+//the token_list is allocated as a child of orig
+struct token_list *tokenize(const char *orig, size_t orig_size, tok_message_queue *mq);
+
+size_t token_list_count(const struct token_list *tl);
+
+//used for debugging
+int token_list_sanity_check(const struct token_list *tl, FILE *err);
+void token_list_dump(const struct token_list *tl, FILE *f);
+
+/* tok_point_lookup is used to locate a pointer that is within a token list's
+   txt or orig fields */
+
+struct tok_point {
+	const char *txt, *orig;
+	size_t line, col;
+};
+
+//returns nonzero if the pointer could be resolved
+int tok_point_lookup(struct tok_point *out, const char *ptr,
+			const struct token_list *tl);
+
+
+/* Tokenizer message queue; used to gather and report warnings, errors, etc. */
+
+enum tok_message_level {TM_DEBUG, TM_INFO, TM_WARN, TM_ERROR, TM_BUG};
+
+struct tok_message {
+	enum tok_message_level level;
+	const char *path;
+		//Unique slash-delimited name of the message
+		//e.g. tokenize/read_cstring/ambiguous_octal
+	const char *message;
+		//Human-readable description
+		//e.g. `Octal \007 followed by digit`
+	const char *location;
+		//Pointer (typically within the token list's txt or orig) of the error
+};
+
+#define tok_msg_debug(name, loc, fmt, ...) tok_message_add(mq, TM_DEBUG, MESSAGE_PATH #name, loc, fmt, ##__VA_ARGS__)
+#define tok_msg_info(name, loc, fmt, ...) tok_message_add(mq, TM_INFO, MESSAGE_PATH #name, loc, fmt, ##__VA_ARGS__)
+#define tok_msg_warn(name, loc, fmt, ...) tok_message_add(mq, TM_WARN, MESSAGE_PATH #name, loc, fmt, ##__VA_ARGS__)
+#define tok_msg_error(name, loc, fmt, ...) tok_message_add(mq, TM_ERROR, MESSAGE_PATH #name, loc, fmt, ##__VA_ARGS__)
+#define tok_msg_bug(name, loc, fmt, ...) tok_message_add(mq, TM_BUG, MESSAGE_PATH #name, loc, fmt, ##__VA_ARGS__)
+
+void tok_message_add(tok_message_queue *mq, enum tok_message_level level,
+	const char *path, const char *loc, const char *fmt, ...);
+
+void tok_message_print(struct tok_message *m, struct token_list *tl);
+
+void tok_message_dump(struct tok_message *m);
+void tok_message_queue_dump(const tok_message_queue *mq);
+
+
+/* Miscellaneous internal components */
+
+char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq);
+char *read_cnumber(struct token *tok, const char *s, const char *e, tok_message_queue *mq);
+
+
+typedef unsigned int readui_base;
+
+#define READUI_ALLOWHIGHERDIGITS 256
+#define READUI_ALLOWCAPLETTERS 512
+#define READUI_ALLOWLCASELETTERS 1024
+#define READUI_ALLOWLETTERS (READUI_ALLOWCAPLETTERS | READUI_ALLOWLCASELETTERS)
+
+#define READUI_DEC      ((readui_base)(10))
+#define READUI_HEX      ((readui_base)(16 | READUI_ALLOWLETTERS))
+#define READUI_OCT      ((readui_base)(8))
+#define READUI_BIN      ((readui_base)(2))
+
+uint64_t readui(const char **sp, const char *e, readui_base base);
+
+#endif
--- a/ccan/ccan_tokenizer/charflag.c
+++ b/ccan/ccan_tokenizer/charflag.c
+#include "charflag.h"
+
+#define C CF_CONTROL
+#define S CF_SPACE
+#define R CF_RETURN
+#define D CF_DIGIT
+#define L CF_LETTER
+#define H CF_HEX
+#define Y CF_SYMBOL
+
+unsigned char charflag[256] = {
+   C,C,C,C,C,C,C,C,C,
+   S, // \t
+   R, // \n
+   S, // \v
+   S, // \f
+   R, // \r
+   C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,
+   S, // space
+   Y,   // !
+   Y,   // "
+   Y,   // #
+   Y,   // $
+   Y,   // %
+   Y,   // &
+   Y,   // '
+   Y,   // (
+   Y,   // )
+   Y,   // *
+   Y,   // +
+   Y,   // ,
+   Y,   // -
+   Y,   // .
+   Y,   // /
+   D|H, // 0
+   D|H, // 1
+   D|H, // 2
+   D|H, // 3
+   D|H, // 4
+   D|H, // 5
+   D|H, // 6
+   D|H, // 7
+   D|H, // 8
+   D|H, // 9
+   Y,   // :
+   Y,   // ;
+   Y,   // <
+   Y,   // =
+   Y,   // >
+   Y,   // ?
+   Y,   // @
+   L|H, // A
+   L|H, // B
+   L|H, // C
+   L|H, // D
+   L|H, // E
+   L|H, // F
+   L,   // G
+   L,   // H
+   L,   // I
+   L,   // J
+   L,   // K
+   L,   // L
+   L,   // M
+   L,   // N
+   L,   // O
+   L,   // P
+   L,   // Q
+   L,   // R
+   L,   // S
+   L,   // T
+   L,   // U
+   L,   // V
+   L,   // W
+   L,   // X
+   L,   // Y
+   L,   // Z
+   Y,   // [
+   Y,   // \ (backslash)
+   Y,   // ]
+   Y,   // ^
+   Y,   // _
+   Y,   // `
+   L|H, // a
+   L|H, // b
+   L|H, // c
+   L|H, // d
+   L|H, // e
+   L|H, // f
+   L,   // g
+   L,   // h
+   L,   // i
+   L,   // j
+   L,   // k
+   L,   // l
+   L,   // m
+   L,   // n
+   L,   // o
+   L,   // p
+   L,   // q
+   L,   // r
+   L,   // s
+   L,   // t
+   L,   // u
+   L,   // v
+   L,   // w
+   L,   // x
+   L,   // y
+   L,   // z
+   Y,   // {
+   Y,   // |
+   Y,   // }
+   Y,   // ~
+   C,   // DEL
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+#undef C
+#undef S
+#undef R
+#undef D
+#undef L
+#undef H
+#undef Y
--- a/ccan/ccan_tokenizer/charflag.h
+++ b/ccan/ccan_tokenizer/charflag.h
+#ifndef CCAN_CHARFLAG_H
+#define CCAN_CHARFLAG_H
+
+//All of these macros evaluate the argument exactly once
+
+#define ccontrol(c)  (charflag(c) & CF_CONTROL) //Weird characters that shouldn't be in text
+#define cspace(c)    (charflag(c) & CF_SPACE)   //Space, tab, vertical tab, form feed
+#define creturn(c)   (charflag(c) & CF_RETURN)  //Newline
+#define cwhite(c)    (charflag(c) & CF_WHITE)   //cspace or creturn
+#define cdigit(c)    (charflag(c) & CF_DIGIT)   //0-9
+#define cletter(c)   (charflag(c) & CF_LETTER)  //A-Za-z
+#define chex(c)      (charflag(c) & CF_HEX)     //0-9A-Fa-f
+#define csymbol(c)   (charflag(c) & CF_SYMBOL)
+	// !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
+	//If it's ASCII, prints a non-blank character, and is not a digit or letter, it's a symbol
+#define cextended(c) (charflag(c) == 0)         //Characters >= 128
+
+/* To test:
+
+All charflag macros should evaluate exactly once
+
+*/
+
+extern unsigned char charflag[256];
+#define charflag(c) (charflag[(unsigned int)(unsigned char)(c)])
+
+#define CF_CONTROL ((unsigned char)  1)
+#define CF_SPACE   ((unsigned char)  2)
+#define CF_RETURN  ((unsigned char)  4)
+#define CF_DIGIT   ((unsigned char)  8)
+#define CF_LETTER  ((unsigned char) 16)
+#define CF_HEX     ((unsigned char) 32)
+#define CF_SYMBOL  ((unsigned char) 64)
+
+#define CF_WHITE (CF_SPACE|CF_RETURN)
+
+#endif
--- a/ccan/ccan_tokenizer/dict.c
+++ b/ccan/ccan_tokenizer/dict.c
+#include "dict.h"
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+//compare dict_entries by first letter ascending, then by length descending
+static int compar_dict_entry(const void *ap, const void *bp) {
+	const struct dict_entry *a=ap, *b=bp;
+	unsigned int first_a = (unsigned int)a->str[0];
+	unsigned int first_b = (unsigned int)b->str[0];
+	if (first_a < first_b)
+		return -1;
+	else if (first_a > first_b)
+		return 1;
+	else {
+		size_t len_a = strlen(a->str);
+		size_t len_b = strlen(b->str);
+		if (len_a > len_b)
+			return -1;
+		else if (len_a < len_b)
+			return 1;
+		else
+			return 0;
+	}
+}
+
+struct dict *dict_build(void *ctx, const struct dict_entry *entries, size_t count) {
+	struct dict *dict = talloc_zero(ctx, struct dict);
+	struct dict_entry *ent;
+	int i;
+	
+	if (!count)
+		return dict;
+	
+	ent = talloc_array(dict, struct dict_entry, count);
+	memcpy(ent, entries, count*sizeof(struct dict_entry));
+	qsort(ent, count, sizeof(*ent), compar_dict_entry);
+	
+	if (ent->str[0]==0) {
+		dict->zero = ent;
+		ent++, count--;
+		
+		if (count && ent->str[0]==0) {
+			fprintf(stderr, "dict_entry array contains multiple empty strings\n");
+			exit(EXIT_FAILURE);
+		}
+	}
+	
+	for (i=1; i<256; i++) {
+		if (!count)
+			break;
+		if (ent->str[0] == (char)i)
+			dict->by_first_letter[i-1] = ent;
+		while (count && ent->str[0] == (char)i)
+			ent++, count--;
+	}
+	
+	return dict;
+}
+
+struct dict_entry *dict_lookup(struct dict *dict, const char **sp, const char *e) {
+	struct dict_entry *de;
+	unsigned int first;
+	if (*sp >= e)
+		return NULL;
+	first = (unsigned int)**sp & 0xFF;
+	
+	if (!first) {
+		if (dict->zero)
+			(*sp)++;
+		return dict->zero;
+	}
+	
+	de = dict->by_first_letter[first-1];
+	if (!de)
+		return NULL;
+	
+	for (;de->str[0]==(char)first; de++) {
+		const char *s = *sp;
+		const char *ds = de->str;
+		for (;;s++,ds++) {
+			if (!*ds) {
+				*sp = s;
+				return de;
+			}
+			if (s>=e || *s!=*ds)
+				break;
+		}
+	}
+	
+	return NULL;
+}
--- a/ccan/ccan_tokenizer/dict.h
+++ b/ccan/ccan_tokenizer/dict.h
+#ifndef CCAN_TOKENIZER_DICT_H
+#define CCAN_TOKENIZER_DICT_H
+
+#include <stdint.h>
+#include <ccan/talloc/talloc.h>
+	//needed for freeing the struct dict*
+
+struct dict_entry {
+	int id;
+	const char *str;
+};
+
+struct dict {
+	struct dict_entry *zero;
+	struct dict_entry *by_first_letter[256];
+};
+
+struct dict *dict_build(void *ctx, const struct dict_entry *entries, size_t count);
+struct dict_entry *dict_lookup(struct dict *dict, const char **sp, const char *e);
+
+#endif
--- a/ccan/ccan_tokenizer/documentation
+++ b/ccan/ccan_tokenizer/documentation
+readui - Flexible function for reading a 64-bit unsigned integer
+@sp: Pointer to scanning pointer
+@e:  Pointer to end of string
+@base:  Typically one of READUI_DEC, READUI_HEX, READUI_OCT, or READUI_BIN.
+
+readui() converts the string of digits from *sp to e to a number, setting *sp to the first invalid character or e if the entire string is valid or empty.  It does not look at prefixes or suffixes, only digits.  It skips preceding whitespace.
+
+readui() uses errno to indicate success or failure.  It will set errno to one of the following:
+
+0:  Input is valid and non-empty
+EINVAL:  Input is empty, does not start with any valid digits, or base is 0
+ERANGE:  Number given is greater than ULLONG_MAX
+
+Example (UNTESTED):
+
+uint64_t read_number(const char *str) {
+	const char *s = str, *e = strchr(str, 0);
+	readui_base base = READUI_DEC;
+	uint64_t result;
+	
+	//See if the number has a 0x (for hex) or 0 (for octal) prefix
+	if (s+2<=e && *s=='0') {
+		s++;
+		if (*s=='x' || *s=='X') {
+			base = READUI_HEX;
+			s++;
+		} else
+			base = READUI_OCT;
+	}
+	
+	result = readui(&s, e, base);
+	
+	if (errno)
+		perror("read_number");
+	
+	return result;
+}
+
+Rules for a token list:
+
+It always has and starts with a TOK_STARTLINE
+
+
+Misc.:
+
+If the world were intuitive, the tokenizer would never report warnings or bugs on a source file that compiles successfully.  However, one case where it does is when erroneous tokens appear within an #if 0 block.  Example:
+
+#if 0
+0b101.0p0
+#endif
+
--- a/ccan/ccan_tokenizer/number_constant.guppy
+++ b/ccan/ccan_tokenizer/number_constant.guppy
+/*
+guppy is a pattern-matching language by Joey Adams that's not implemented or formalized yet.
+See http://www.funsitelots.com/pub/guppy.g for a near self-definition
+
+This is a guppy representation of integer and floating point formatting in C.
+It is based on http://c0x.coding-guidelines.com/6.4.4.1.html and http://c0x.coding-guidelines.com/6.4.4.2.html
+*/
+
+number_constant: [
+	integer_constant()
+	floating_constant()
+]
+
+integer_constant: [
+	([1-9] [0-9]*)           //decimal
+	(0 [0-7]*)               //octal
+	(0 [X x] [0-9 A-F a-f]*) //hexadecimal
+]
+
+integer_suffix: [
+	([U u] [L l]*0..2)
+	([L l]*1..2 [U u]*0..1)
+]
+
+floating_constant: [
+	decimal_floating_constant()
+	hexadecimal_floating_constant()
+]
+
+decimal_floating_constant: [
+	([0-9]* '.' [0-9]+ exponent_part()*0..1 floating_suffix())
+	([0-9]+ '.' exponent_part()*0..1 floating_suffix())
+	([0-9]+ exponent_part() floating_suffix())
+]
+
+exponent_part:
+	([E e] ['+' '-']*0..1 [0-9]+)
+
+hexadecimal_floating_constant:
+	(0 [X x] [
+		[0-9 A-F a-f]* '.' [0-9 A-F a-f]+
+		[0-9 A-F a-f]+ '.'
+		[0-9 A-F a-f]+
+	] [P p] ['+' '-']*0..1 [0-9]+ floating_suffix())
+
+floating_suffix: [F L f l]*0..1
+
+scan_number:
+(
+	[
+		(0 [X x] [0-9 A-F a-f '.']*)
+		(0 [B b] [0-1] [0-9 '.']*)
+		([0-9 '.']*)
+	]
+	( [E P e p] ['+' '-']*0..1 [0-9]* )*0..1
+	[0-9 A-Z a-z '.' '_' '$']*
+)
+
+/*
+Notes:
+
+A numeric constant can begin with any of:
+	0-9 '.'
+and can contain any of:
+	0-9 a-f e f l p u x '.' '+' '-'
+along with capital equivalents.
+
+If scanning finds something starting with a '.' but no decimal digit after it, it is the '.' operator and not a number.
+
+*/
--- a/ccan/ccan_tokenizer/queue.c
+++ b/ccan/ccan_tokenizer/queue.c
+/*
+        Copyright (c) 2009  Joseph A. Adams
+        All rights reserved.
+        
+        Redistribution and use in source and binary forms, with or without
+        modification, are permitted provided that the following conditions
+        are met:
+        1. Redistributions of source code must retain the above copyright
+           notice, this list of conditions and the following disclaimer.
+        2. Redistributions in binary form must reproduce the above copyright
+           notice, this list of conditions and the following disclaimer in the
+           documentation and/or other materials provided with the distribution.
+        3. The name of the author may not be used to endorse or promote products
+           derived from this software without specific prior written permission.
+        
+        THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+        IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+        OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+        IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+        INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+        NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+        DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+        THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+        THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "queue.h"
+#include <ccan/talloc/talloc.h>
+#include <string.h>
+
+void queue_enqueue_helper(void *qp, size_t itemSize) {
+	queue(char) *q = qp;
+	size_t oldsize = q->flag+1;
+	q->flag += oldsize;
+	q->item = talloc_realloc_size(NULL, q->item, (q->flag+1)*itemSize);
+	memcpy(q->item+(q->head+oldsize)*itemSize, q->item+q->head*itemSize, (oldsize-q->head)*itemSize);
+	q->head += oldsize;
+}
+
+int queue_alias_helper(const void *a, const void *b) {
+	(void)a, (void)b;
+	return 0;
+}
--- a/ccan/ccan_tokenizer/queue.h
+++ b/ccan/ccan_tokenizer/queue.h
+/*
+        Copyright (c) 2009  Joseph A. Adams
+        All rights reserved.
+        
+        Redistribution and use in source and binary forms, with or without
+        modification, are permitted provided that the following conditions
+        are met:
+        1. Redistributions of source code must retain the above copyright
+           notice, this list of conditions and the following disclaimer.
+        2. Redistributions in binary form must reproduce the above copyright
+           notice, this list of conditions and the following disclaimer in the
+           documentation and/or other materials provided with the distribution.
+        3. The name of the author may not be used to endorse or promote products
+           derived from this software without specific prior written permission.
+        
+        THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+        IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+        OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+        IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+        INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+        NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+        DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+        THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+        THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CCAN_QUEUE_H
+#define CCAN_QUEUE_H
+
+#include <stdint.h>
+#include <ccan/talloc/talloc.h>
+
+#ifndef HAVE_ATTRIBUTE_MAY_ALIAS
+#define HAVE_ATTRIBUTE_MAY_ALIAS 1
+#endif
+
+#if HAVE_ATTRIBUTE_MAY_ALIAS==1
+#define queue_alias(ptr) /* nothing */
+#define queue(type) struct {size_t head, tail, flag; type *item;} __attribute__((__may_alias__))
+#else
+#define queue_alias(ptr) qsort(ptr, 0, 1, queue_alias_helper) //hack
+#define queue(type) struct {size_t head, tail, flag; type *item;}
+#endif
+
+int queue_alias_helper(const void *a, const void *b);
+
+#define queue_init(queue, ctx) do {(queue).head = (queue).tail = 0; (queue).flag = 3; (queue).item = talloc_size(ctx, sizeof(*(queue).item)*4);} while(0)
+#define queue_free(queue) do {talloc_free((queue).item);} while(0)
+
+#define queue_count(queue) (((queue).tail-(queue).head) & (queue).flag)
+#define enqueue(queue, ...) \
+	do { \
+		(queue).item[(queue).tail++] = (__VA_ARGS__); \
+		(queue).tail &= (queue).flag; \
+		if ((queue).tail == (queue).head) { \
+			queue_enqueue_helper(&(queue), sizeof(*(queue).item)); \
+			queue_alias(&(queue)); \
+		} \
+	} while(0)
+#define dequeue_check(queue) ((queue).head != (queue).tail ? dequeue(queue) : NULL)
+#define dequeue(queue) ((queue).item[queue_dequeue_helper(&(queue).head, (queue).flag)])
+
+//TODO:  Test us
+#define queue_next(queue) ((queue).item[(queue).head])
+#define queue_item(queue, pos) ((queue).item[((queue).head+(pos)) & (queue).flag])
+#define queue_skip(queue) do {(queue).head++; (queue).head &= (queue).flag;} while(0)
+
+void queue_enqueue_helper(void *qp, size_t itemSize);
+
+static inline size_t queue_dequeue_helper(size_t *head, size_t flag) {
+	size_t ret = (*head)++;
+	*head &= flag;
+	return ret;
+}
+
+#endif
--- a/ccan/ccan_tokenizer/read_cnumber.c
+++ b/ccan/ccan_tokenizer/read_cnumber.c
+
+//for strtold
+#define _ISOC99_SOURCE
+#include <stdlib.h>
+#undef _ISOC99_SOURCE
+
+#include "ccan_tokenizer.h"
+
+#ifndef ULLONG_MAX
+#define ULLONG_MAX 18446744073709551615ULL
+#endif
+
+static const char *skipnum(const char *s, const char *e, readui_base base) {
+	for (;s<e;s++) {
+		unsigned int c = (unsigned char)*s;
+		
+		if (cdigit(c)) {
+			if ( c-'0' >= (base & 0xFF) &&
+			    !(base & READUI_ALLOWHIGHERDIGITS) )
+				break;
+		} else if (c>='A' && c<='Z') {
+			if (!(base & READUI_ALLOWCAPLETTERS))
+				break;
+			if ( c-'A'+10 >= (base & 0xFF) &&
+			    !(base & READUI_ALLOWHIGHERDIGITS))
+				break;
+		} else if (c>='a' && c<='z') {
+			if (!(base & READUI_ALLOWLCASELETTERS))
+				break;
+			if ( c-'a'+10 >= (base & 0xFF) &&
+			    !(base & READUI_ALLOWHIGHERDIGITS))
+				break;
+		} else
+			break;
+	}
+	
+	return s;
+}
+
+static uint64_t readui_valid(const char *s, const char *e, readui_base base) {
+	uint64_t ret = 0;
+	uint64_t multiplier = 1;
+	uint64_t digit_value;
+	
+	//64-bit multiplication with overflow checking
+	#define multiply(dest, src) do { \
+		uint32_t a0 = (uint64_t)(dest) & 0xFFFFFFFF; \
+		uint32_t a1 = (uint64_t)(dest) >> 32; \
+		uint32_t b0 = (uint64_t)(src) & 0xFFFFFFFF; \
+		uint32_t b1 = (uint64_t)(src) >> 32; \
+		uint64_t a, b; \
+		\
+		if (a1 && b1) \
+			goto overflowed; \
+		a = (uint64_t)a1*b0 + (uint64_t)a0*b1; \
+		if (a > 0xFFFFFFFF) \
+			goto overflowed; \
+		a <<= 32; \
+		b = (uint64_t)a0*b0; \
+		\
+		if (a+b < a) \
+			goto overflowed; \
+		(dest) = a+b; \
+	} while(0)
+	
+	if (s >= e || ((base&0xFF) < 1)) {
+		errno = EINVAL;
+		return 0;
+	}
+	
+	while (s<e && *s=='0') s++;
+	
+	if (e > s) {
+		for (;;) {
+			char c = *--e;
+			
+			//this series of if statements takes advantage of the fact that 'a'>'A'>'0'
+			if (c >= 'a')
+				c -= 'a'-10;
+			else if (c >= 'A')
+				c -= 'A'-10;
+			else
+				c -= '0';
+			digit_value = c;
+			
+			//TODO:  Write/find a testcase where temp *= multiplier does overflow
+			multiply(digit_value, multiplier);
+			
+			if (ret+digit_value < ret)
+				goto overflowed;
+			ret += digit_value;
+			
+			if (e <= s)
+				break;
+			
+			multiply(multiplier, base & 0xFF);
+		}
+	}
+	errno = 0;
+	return ret;
+	
+overflowed:
+	errno = ERANGE;
+	return ULLONG_MAX;
+	
+	#undef multiply
+}
+
+uint64_t readui(const char **sp, const char *e, readui_base base) {
+	const char *s = *sp;
+	
+	while (s<e && cwhite(*s)) s++;
+	e = skipnum(s, e, base);
+	
+	*sp = e;
+	return readui_valid(s, e, base);
+}
+
+
+#define MESSAGE_PATH "tokenize/read_cnumber/"
+
+struct scan_number {
+/*
+ * Each of the pointers points to the first character of a given component.
+ * Consider 0x50.1p+1f .  It would be broken down into:
+ */
+	const char *prefix;   // 0x
+	const char *digits;   // 50.1
+	const char *exponent; // p+1
+	const char *suffix;   // f
+	const char *end;
+	size_t dots_found;    // 1
+};
+
+/*
+ * Scans past all the characters in a number token, fills the struct, and
+ * returns one of TOK_INTEGER or TOK_FLOATING to indicate the type.
+ *
+ * First character must be [0-9 '.']
+ */
+static enum token_type scan_number(struct scan_number *sn,
+					const char *s, const char *e) {
+	enum token_type type;
+	
+	sn->dots_found = 0;
+	
+	sn->prefix = s;
+	sn->digits = s;
+	if (s+3<=e && s[0]=='0') {
+		if (s[1]=='X' || s[1]=='x') {
+		//hexadecimal
+			s += 2;
+			sn->digits = s;
+			for (;s<e;s++) {
+				if (*s == '.')
+					sn->dots_found++;
+				else if (!chex(*s))
+					break;
+			}
+			goto done_scanning_digits;
+		} else if (s[1]=='B' || s[1]=='b') {
+		//binary
+			s += 2;
+			if (*s!='0' && *s!='1')
+				s -= 2;
+			sn->digits = s;
+		}
+	}
+	
+	//binary, decimal, or octal
+	for (;s<e;s++) {
+		if (*s == '.')
+			sn->dots_found++;
+		else if (!cdigit(*s))
+			break;
+	}
+
+done_scanning_digits:
+	
+	sn->exponent = s;
+	if (s<e && (
+		(sn->prefix==sn->digits && (*s=='E' || *s=='e')) ||
+		(sn->prefix < sn->digits && (*s=='P' || *s=='p'))
+	)) {
+		s++;
+		if (s<e && (*s=='+' || *s=='-'))
+			s++;
+		while (s<e && cdigit(*s)) s++;
+	}
+	
+	sn->suffix = s;
+	while (s<e && (cdigit(*s) || cletter(*s) ||
+		*s=='.' || *s=='_' || *s=='$')) s++;
+	
+	sn->end = s;
+	
+	//Now we're done scanning, but now we want to know what type this is
+	type = TOK_INTEGER;
+	if (sn->dots_found)
+		type = TOK_FLOATING;
+	if (sn->exponent < sn->suffix)
+		type = TOK_FLOATING;
+	
+	//if this is an octal, make the leading 0 a prefix
+	if (type==TOK_INTEGER && sn->prefix==sn->digits &&
+			sn->digits < s && sn->digits[0]=='0')
+		sn->digits++;
+	
+	return type;
+}
+
+static enum tok_suffix read_number_suffix(const char *s, const char *e,
+			enum token_type type, tok_message_queue *mq) {
+	const char *orig_s = s;
+	enum tok_suffix sfx = 0;
+	
+	//read the suffix in pieces
+	while (s<e) {
+		enum tok_suffix sfx_prev = sfx;
+		char c = *s++;
+		if (c>='a' && c<='z')
+			c -= 'a'-'A';
+		
+		if (c=='L') {
+			if (s<e && (*s=='L' || *s=='l')) {
+				s++;
+				sfx |= TOK_LL;
+				
+				//TOK_L and TOK_LL are mutually exclusive
+				if (sfx & TOK_L)
+					goto invalid;
+			} else {
+				sfx |= TOK_L;
+			}
+		}
+		else if (c=='U')
+			sfx |= TOK_U;
+		else if (c=='F')
+			sfx |= TOK_F;
+		else if (c=='I')
+			sfx |= TOK_I;
+		else
+			goto invalid;
+		
+		if (sfx == sfx_prev)
+			goto invalid; //suffix piece was repeated
+	}
+	
+	//make sure the suffix is appropriate for this number type
+	if (type==TOK_INTEGER && (sfx & TOK_F)) {
+		tok_msg_error(suffix_float_only, orig_s,
+		"Suffix only valid for floating point numbers");
+		sfx = TOK_NOSUFFIX;
+	}
+	if (type==TOK_FLOATING && (sfx & (TOK_U | TOK_LL))) {
+		tok_msg_error(suffix_integer_only, orig_s,
+		"Suffix only valid for integers");
+		sfx = TOK_NOSUFFIX;
+	}
+	
+	return sfx;
+	
+invalid:
+	if (type==TOK_INTEGER)
+		tok_msg_error(integer_suffix_invalid, orig_s,
+				"Integer suffix invalid");
+	else
+		tok_msg_error(floating_suffix_invalid, orig_s,
+				"Floating point suffix invalid");
+	return TOK_NOSUFFIX;
+}
+
+static void read_integer(struct tok_integer *out, const struct scan_number *sn,
+			tok_message_queue *mq) {
+	/*
+	Assertions about an integer's struct scan_number:
+		prefix is empty or [0 0B 0b 0X 0x]
+		sn->digits is not empty (i.e. sn->digits < sn->exponent)
+			*unless* the prefix is "0"
+		has no exponent
+		suffix is [0-9 A-Z a-z '.']*
+		dots_found == 0
+	*/
+	readui_base base = READUI_DEC;
+	const char *tokstart = sn->prefix;
+	const char *s = sn->digits, *e = sn->exponent;
+	
+	if (sn->prefix+1 < sn->digits) {
+		if (sn->prefix[1]=='X' || sn->prefix[1]=='x')
+			base = READUI_HEX;
+		else
+			base = READUI_OCT;
+	} else if (sn->prefix < sn->digits) {
+		base = READUI_OCT;
+	}
+	
+	if (s>=e && base==READUI_OCT) {
+		//octal contains no digits
+		out->v = 0;
+		out->base = 8;
+		goto suffix;
+	}
+	
+	out->v = readui(&s, sn->exponent, base);
+	out->base = base & 0xFF;
+	
+	if (s != e || errno == EINVAL) {
+		tok_msg_error(integer_invalid_digits, tokstart,
+			"Integer constant contains invalid digits");
+	} else if (errno) {
+		if (errno == ERANGE) {
+			tok_msg_error(integer_out_of_range, tokstart,
+				"Integer constant out of range");
+		} else {
+			tok_msg_bug(readui_unknown, tokstart,
+				"Unknown error returned by readui");
+		}
+	}
+	
+suffix:
+	out->suffix =
+		read_number_suffix(sn->suffix, sn->end, TOK_INTEGER, mq);
+	
+	return;
+}
+
+static void read_floating(struct tok_floating *out, const struct scan_number *sn,
+			tok_message_queue *mq) {
+	/*
+	Assertions about a float's struct scan_number:
+		prefix is empty or [0B 0b 0X 0x] (note: no octal prefix 0)
+		sn->digits not empty, ever
+		exponent may or may not exist
+		If exponent exists, it is valid and formatted as:
+			( [E P e p] ['+' '-']*0..1 [0-9]* )
+		An exponent starts with E if this is decimal, P if it is hex/binary
+		suffix is [0-9 A-Z a-z '.']*
+		dots_found can be anything
+	*/
+	const char *tokstart = sn->prefix;
+	const char *s = sn->prefix, *e = sn->suffix;
+	char borrow = *sn->end;
+	//long double strtold(const char *nptr, char **endptr);
+	
+	out->v = 0.0;
+	out->suffix = TOK_NOSUFFIX;
+	
+	if (sn->prefix < sn->digits) {
+		if (sn->prefix[1]=='B' || sn->prefix[1]=='b') {
+			tok_msg_error(binary_float, tokstart,
+				"Binary floating point constants not allowed");
+			return;
+		}
+		if (sn->exponent >= sn->suffix) {
+			tok_msg_error(hex_float_no_exponent, tokstart,
+				"Hex floating point constant missing exponent");
+			return;
+		}
+	}
+	
+	
+	/* Stick a null terminator at the end of the input so strtold
+	 * won't read beyond the given input.
+	 *
+	 * This is thread-safe because the input is from
+	 * token_list.txt, which was generated in the
+	 * tokenize function which is still running.
+	 */
+	*(char*)sn->end = 0;
+	errno = 0;
+	out->v = strtold(s, (char**)&s);
+	//don't forget to set it back
+	*(char*)sn->end = borrow;
+	
+	if (errno) {
+		//for some reason, strtold may errno to EDOM to indicate underrun
+		//open test/run.c and search "floating_out_of_range" for more details
+		if (errno == ERANGE || errno == EDOM) {
+			tok_msg_error(floating_out_of_range, tokstart,
+				"Floating point constant out of range");
+		} else {
+			tok_msg_bug(strtold_unknown, tokstart,
+				"Unknown error returned by strtold");
+		}
+	}
+	
+	if (s != e) {
+		tok_msg_error(floating_invalid_digits, tokstart,
+			"Floating point constant contains invalid digits");
+	}
+	
+	out->suffix =
+		read_number_suffix(sn->suffix, sn->end, TOK_FLOATING, mq);
+}
+
+char *read_cnumber(struct token *tok, const char *s, const char *e, tok_message_queue *mq) {
+	struct scan_number sn;
+	
+	tok->type = scan_number(&sn, s, e);
+	if (tok->type == TOK_INTEGER)
+		read_integer(&tok->integer, &sn, mq);
+	else
+		read_floating(&tok->floating, &sn, mq);
+	
+	return (char*)sn.end;
+}
+
+#undef MESSAGE_PATH
--- a/ccan/ccan_tokenizer/read_cstring.c
+++ b/ccan/ccan_tokenizer/read_cstring.c
+#include "ccan_tokenizer.h"
+
+static char *strdup_rng(const char *s, const char *e) {
+	char *ret = malloc(e-s+1);
+	memcpy(ret, s, e-s);
+	ret[e-s] = 0;
+	return ret;
+}
+
+#define MESSAGE_PATH "tokenize/read_cstring/"
+
+//Reads a C string starting at s until quoteChar is found or e is reached
+//  Returns the pointer to the terminating quote character or e if none was found
+char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) {
+	const char * const tokstart = s;
+	const char *p;
+	int has_endquote=0, has_newlines=0;
+	
+	//tok_msg_debug(called, s, "Called read_cstring on `%s`", s);
+	
+	#define append(startptr,endptr) array_append_items(*out, startptr, (endptr)-(startptr))
+	#define append_char(theChar) array_append(*out, theChar)
+	#define append_zero() do {array_append(*out, 0); out->size--;} while(0)
+	
+	p = s;
+	while (p<e) {
+		char c = *p++;
+		if (c == '\\') {
+			append(s, p-1);
+			s = p;
+			if (p >= e) {
+				append_char('\\');
+				tok_msg_error(ended_in_backslash, p-1,
+					"read_cstring input ended in backslash");
+				break;
+			}
+			c = *p++;
+			if (c>='0' && c<='9') {
+				unsigned int octal = c-'0';
+				size_t digit_count = 0;
+				while (p<e && *p>='0' && *p<='9') {
+					octal <<= 3;
+					octal += (*p++) - '0';
+					if (++digit_count >= 2)
+						break;
+				}
+				if (p<e && *p>='0' && *p<='9') {
+					tok_msg_info(ambiguous_octal, s-2,
+						"Octal followed by digit");
+				}
+				if (octal > 0xFF) {
+					tok_msg_warn(octal_overflow, s-2,
+						"Octal out of range");
+				}
+				c = octal;
+			} else {
+				switch (c) {
+					case 'x': {
+						size_t digit_count = 0;
+						size_t zero_count = 0;
+						unsigned int hex = 0;
+						while (p<e && *p=='0') p++, zero_count++;
+						for (;p<e;digit_count++) {
+							c = *p++;
+							if (c>='0' && c<='9')
+								c -= '0';
+							else if (c>='A' && c<='F')
+								c -= 'A'-10;
+							else if (c>='a' && c<='f')
+								c -= 'a'-10;
+							else {
+								p--;
+								break;
+							}
+							hex <<= 4;
+							hex += c;
+						}
+						if (zero_count+digit_count > 2) {
+							char *hex_string = strdup_rng(s-2, p);
+							tok_msg_warn(ambiguous_hex, s-2,
+								"Hex escape '%s' is ambiguous", hex_string);
+							if (digit_count > 2)
+								tok_msg_warn(hex_overflow, s-2,
+									"Hex escape '%s' out of range", hex_string);
+							free(hex_string);
+						}
+						c = hex & 0xFF;
+					}	break;
+					case 'a':
+						c=0x7;
+						break;
+					case 'b':
+						c=0x8;
+						break;
+					case 'e':
+						c=0x1B;
+						break;
+					case 'f':
+						c=0xC;
+						break;
+					case 'n':
+						c=0xA;
+						break;
+					case 'r':
+						c=0xD;
+						break;
+					case 't':
+						c=0x9;
+						break;
+					case 'v':
+						c=0xB;
+						break;
+					case '\\':
+						break;
+					default:
+						if (c == quoteChar)
+							break;
+						if (c=='\'' && quoteChar=='"') {
+							/* tok_msg_info(escaped_single_quote, s-2,
+								"Single quote characters need not be escaped within double quotes"); */
+							break;
+						}
+						if (c=='"' && quoteChar=='\'') {
+							/* tok_msg_info(escaped_double_quote, s-2,
+								"Double quote characters need not be escaped within single quotes"); */
+							break;
+						}
+						if (c=='?') // \? is needed in some situations to avoid building a trigraph
+							break;
+						tok_msg_warn(unknown_escape, s-2,
+							"Unknown escape sequence '\\%c'", c);
+						break;
+				}
+			}
+			s = p;
+			append_char(c);
+		} else if (c == quoteChar) {
+			p--;
+			has_endquote = 1;
+			break;
+		} else if (creturn(c)) {
+			has_newlines = 1;
+		}
+	}
+	append(s, p);
+	append_zero();
+	if (!has_endquote) {
+		tok_msg_error(missing_endquote, tokstart,
+			"Missing endquote on %s literal",
+			quoteChar=='\'' ? "character" : "string");
+	} else if (has_newlines) {
+		tok_msg_warn(quote_newlines, tokstart,
+			"%s literal contains newline character(s)",
+			quoteChar=='\'' ? "Character" : "String");
+	}
+	return (char*)p;
+	
+	#undef append
+	#undef append_char
+	#undef append_zero
+}
+
+#undef MESSAGE_PATH
--- a/ccan/ccan_tokenizer/scripts/message_dump_to_messages.sh
+++ b/ccan/ccan_tokenizer/scripts/message_dump_to_messages.sh
+#!/bin/sh
+sed 's/^D: /{.level=TM_DEBUG, .path="/' |
+sed 's/^I: /{.level=TM_INFO, .path="/' |
+sed 's/^W: /{.level=TM_WARN, .path="/' |
+sed 's/^BUG: /{.level=TM_BUG, .path="/' |
+sed 's/:.*/\"},/'
--- a/ccan/ccan_tokenizer/test/run-simple-token.c
+++ b/ccan/ccan_tokenizer/test/run-simple-token.c
+#include "ccan_tokenizer/read_cnumber.c"
+#include "ccan_tokenizer/read_cstring.c"
+#include "ccan_tokenizer/dict.c"
+#include "ccan_tokenizer/ccan_tokenizer.c"
+#include "ccan_tokenizer/queue.c"
+#include "ccan_tokenizer/charflag.c"
+#include "tap/tap.h"
+
+#define item(num) (toks->first[num])
+//sed 's/toks->array\.item\[\([^]]*\)\]/item(\1)/g'
+
+tok_message_queue *MQ = NULL;
+
+static const char *onechar_tokens = "!~#%^&*()=-+{}[]|;:,.<>/?";
+static const char *twochar_tokens = "!=##%=^=&=&&*=-=--->+=++==|=||<=<<>=>>/=";
+static const char *threechar_tokens = "<<=>>=...";
+static const char *char_token = "'x'";
+static const char *string_token = "\"string\"";
+static const char *ident_tokens = "doh abc f_o _ba b$f";
+
+static char *backslashify(const char *string)
+{
+	unsigned int i;
+	char *ret = talloc_size(NULL, strlen(string)*3 + 1);
+	for (i = 0; i < strlen(string); i++) {
+		ret[i*3] = string[i];
+		ret[i*3+1] = '\\';
+		ret[i*3+2] = '\n';
+	}
+	ret[i*3] = '\0';
+	return ret;
+}
+
+static char *spacify(const char *string, unsigned int num)
+{
+	unsigned int i;
+	char *ret = talloc_size(NULL, strlen(string)*2 + 1);
+	memset(ret, ' ', strlen(string)*2);
+
+	for (i = 0; i < strlen(string); i += num)
+		memcpy(&ret[i + i/num], string+i, num);
+	ret[i + i/num] = '\0';
+	return ret;
+}
+
+static struct token_list *test_tokens(const char *orig, unsigned int size)
+{
+	struct token_list *toks;
+	char *string = talloc_strdup(NULL, orig);
+	unsigned int i;
+
+	toks = tokenize(string, strlen(string), MQ);
+	ok1(token_list_sanity_check(toks, stdout));
+	
+	ok1(token_list_count(toks) == strlen(string)/size + 1);
+	ok1(item(0).type == TOK_STARTLINE);
+	for (i = 0; i < strlen(string)/size; i++) {
+		ok1(item(i+1).type == TOK_OPERATOR);
+		ok1(item(i+1).txt_size == size);
+		ok1(strncmp(item(i+1).txt, string + i*size, size) == 0);
+		ok1(item(i+1).orig_size == size);
+		ok1(item(i+1).orig == string + i*size);
+	}
+	return toks;
+}
+
+static struct token_list *test_tokens_spaced(const char *orig,
+					     unsigned int size)
+{
+	struct token_list *toks;
+	char *string = spacify(orig, size);
+	unsigned int i;
+
+	toks = tokenize(string, strlen(string), MQ);
+	ok1(token_list_sanity_check(toks, stdout));
+	
+	ok1(token_list_count(toks) == strlen(orig)/size*2 + 1);
+	ok1(item(0).type == TOK_STARTLINE);
+	for (i = 0; i < strlen(orig)/size; i++) {
+		ok1(item(i*2+1).type == TOK_OPERATOR);
+		ok1(item(i*2+1).txt_size == size);
+		ok1(!strncmp(item(i*2+1).txt, string + i*(size+1), size));
+		ok1(item(i*2+1).orig_size == size);
+		ok1(item(i*2+1).orig == string + i*(size+1));
+		ok1(item(i*2+2).type == TOK_WHITE);
+		ok1(item(i*2+2).txt_size == 1);
+		ok1(item(i*2+2).txt[0] == ' ');
+		ok1(item(i*2+2).orig_size == 1);
+		ok1(item(i*2+2).orig == string + i*(size+1) + size);
+	}
+	return toks;
+}
+
+static struct token_list *test_tokens_backslashed(const char *orig,
+						  unsigned int size)
+{
+	struct token_list *toks;
+	const char *string = backslashify(orig);
+	unsigned int i;
+
+	toks = tokenize(string, strlen(string), MQ);
+	ok1(token_list_sanity_check(toks, stdout));
+	
+	ok1(token_list_count(toks) == strlen(orig)/size + 1);
+	ok1(item(0).type == TOK_STARTLINE);
+	for (i = 0; i < strlen(orig)/size; i++) {
+		ok1(item(i+1).type == TOK_OPERATOR);
+		ok1(item(i+1).txt_size == size);
+		ok1(strncmp(item(i+1).txt, orig + i*size, size) == 0);
+		ok1(item(i+1).orig_size == size*3);
+		ok1(item(i+1).orig == string + i*size*3);
+	}
+	return toks;
+}
+
+static void onechar_checks(const struct token_list *toks, int mul)
+{
+	unsigned int i;
+	for (i = 0; i < strlen(onechar_tokens); i++)
+		ok1(item(i*mul+1).opkw == onechar_tokens[i]);
+}
+
+static void twochar_checks(const struct token_list *toks, int mul)
+{
+	ok1(item(1).opkw == NE_OP);
+	ok1(item(1*mul+1).opkw == DOUBLE_POUND);
+	ok1(item(2*mul+1).opkw == MOD_ASSIGN);
+	ok1(item(3*mul+1).opkw == XOR_ASSIGN);
+	ok1(item(4*mul+1).opkw == AND_ASSIGN);
+	ok1(item(5*mul+1).opkw == AND_OP);
+	ok1(item(6*mul+1).opkw == MUL_ASSIGN);
+	ok1(item(7*mul+1).opkw == SUB_ASSIGN);
+	ok1(item(8*mul+1).opkw == DEC_OP);
+	ok1(item(9*mul+1).opkw == PTR_OP);
+	ok1(item(10*mul+1).opkw == ADD_ASSIGN);
+	ok1(item(11*mul+1).opkw == INC_OP);
+	ok1(item(12*mul+1).opkw == EQ_OP);
+	ok1(item(13*mul+1).opkw == OR_ASSIGN);
+	ok1(item(14*mul+1).opkw == OR_OP);
+	ok1(item(15*mul+1).opkw == LE_OP);
+	ok1(item(16*mul+1).opkw == LEFT_OP);
+	ok1(item(17*mul+1).opkw == GE_OP);
+	ok1(item(18*mul+1).opkw == RIGHT_OP);
+	ok1(item(19*mul+1).opkw == DIV_ASSIGN);
+}
+
+static void threechar_checks(const struct token_list *toks, int mul)
+{
+	ok1(item(1).opkw == LEFT_ASSIGN);
+	ok1(item(1*mul+1).opkw == RIGHT_ASSIGN);
+	ok1(item(2*mul+1).opkw == ELLIPSIS);
+}
+
+int main(void)
+{
+	unsigned int i;
+	struct token_list *toks;
+	char *str;
+	char *backslashed_idents;
+
+	plan_tests(1243);
+	toks = test_tokens(onechar_tokens, 1);
+	onechar_checks(toks, 1);
+	talloc_free((char*)toks->orig);
+
+	toks = test_tokens(twochar_tokens, 2);
+	twochar_checks(toks, 1);
+	talloc_free((char*)toks->orig);
+
+	toks = test_tokens(threechar_tokens, 3);
+	threechar_checks(toks, 1);
+	talloc_free((char*)toks->orig);
+
+	/* char literal */
+	str = talloc_strdup(NULL, char_token);
+	toks = tokenize(str, strlen(str), MQ);
+	ok1(token_list_sanity_check(toks, stdout));
+	ok1(token_list_count(toks) == 2);
+	ok1(item(0).type == TOK_STARTLINE);
+	ok1(item(1).type == TOK_CHAR);
+	ok1(item(1).txt_size == strlen(str));
+	ok1(strncmp(item(1).txt, str, strlen(str)) == 0);
+	ok1(item(1).orig_size == strlen(str));
+	ok1(item(1).orig == str);
+	/* FIXME: test contents of string. */
+	talloc_free(str);
+
+	/* string literal */
+	str = talloc_strdup(NULL, string_token);
+	toks = tokenize(str, strlen(str), MQ);
+	ok1(token_list_sanity_check(toks, stdout));
+	ok1(token_list_count(toks) == 2);
+	ok1(item(0).type == TOK_STARTLINE);
+	ok1(item(1).type == TOK_STRING);
+	ok1(item(1).txt_size == strlen(str));
+	ok1(strncmp(item(1).txt, str, strlen(str)) == 0);
+	ok1(item(1).orig_size == strlen(str));
+	ok1(item(1).orig == str);
+	/* FIXME: test contents of string. */
+	talloc_free(str);
+
+	/* Identifiers */
+	str = talloc_strdup(NULL, ident_tokens);
+	toks = tokenize(str, strlen(str), MQ);
+	ok1(token_list_sanity_check(toks, stdout));
+	token_list_dump(toks, stdout);
+	ok1(token_list_count(toks) == 10);
+	ok1(item(0).type == TOK_STARTLINE);
+	for (i = 0; i < 5; i++) {
+		ok1(item(i*2+1).type == TOK_IDENTIFIER);
+		ok1(item(i*2+1).txt_size == 3);
+		ok1(strncmp(item(i*2+1).txt, str + i*4, 3) == 0);
+		ok1(item(i*2+1).orig_size == 3);
+		ok1(item(i*2+1).orig == str + i*4);
+		if (i == 4)
+			continue;
+		ok1(item(i*2+2).type == TOK_WHITE);
+		ok1(item(i*2+2).txt_size == 1);
+		ok1(item(i*2+2).txt[0] == ' ');
+		ok1(item(i*2+2).orig_size == 1);
+		ok1(item(i*2+2).orig == str + i*4 + 3);
+	}
+	talloc_free(str);
+
+	toks = test_tokens_spaced(onechar_tokens, 1);
+	onechar_checks(toks, 2);
+	talloc_free((char*)toks->orig);
+
+	toks = test_tokens_spaced(twochar_tokens, 2);
+	twochar_checks(toks, 2);
+	talloc_free((char*)toks->orig);
+
+	toks = test_tokens_spaced(threechar_tokens, 3);
+	threechar_checks(toks, 2);
+	talloc_free((char*)toks->orig);
+
+	toks = test_tokens_backslashed(onechar_tokens, 1);
+	onechar_checks(toks, 1);
+	talloc_free((char*)toks->orig);
+
+	toks = test_tokens_backslashed(twochar_tokens, 2);
+	twochar_checks(toks, 1);
+	talloc_free((char*)toks->orig);
+
+	toks = test_tokens_backslashed(threechar_tokens, 3);
+	threechar_checks(toks, 1);
+	talloc_free((char*)toks->orig);
+
+	/* Identifiers */
+	backslashed_idents = backslashify(ident_tokens);
+	toks = tokenize(backslashed_idents, strlen(backslashed_idents), MQ);
+	ok1(token_list_sanity_check(toks, stdout));
+	ok1(token_list_count(toks) == 10);
+	ok1(item(0).type == TOK_STARTLINE);
+	for (i = 0; i < 5; i++) {
+		ok1(item(i*2+1).type == TOK_IDENTIFIER);
+		ok1(item(i*2+1).txt_size == 3);
+		ok1(strncmp(item(i*2+1).txt, ident_tokens + i*4, 3) == 0);
+		ok1(item(i*2+1).orig_size == 9);
+		ok1(item(i*2+1).orig == backslashed_idents + i*12);
+		if (i == 4)
+			continue;
+		ok1(item(i*2+2).type == TOK_WHITE);
+		ok1(item(i*2+2).txt_size == 1);
+		ok1(item(i*2+2).txt[0] == ' ');
+		ok1(item(i*2+2).orig_size == 3);
+		ok1(item(i*2+2).orig == backslashed_idents + i*12 + 9);
+	}
+	talloc_free(backslashed_idents);
+
+	return exit_status();
+}
--- a/ccan/ccan_tokenizer/test/run.c
+++ b/ccan/ccan_tokenizer/test/run.c
--- a/ccan/ccan_tokenizer/todo
+++ b/ccan/ccan_tokenizer/todo
+Write test for empty_char_constant
+
+defined cannot be used as a macro name
+<strike>Add "defined" and only accept it in appropriate circumstances</strike>
+
+Update that simple tokenizer compulsory test so things will compile
+
+Handle cases like escaped question marks and pound symbols that I don't understand yet.
+
+(done) Fix #include <stdio.h> to read include directive correctly
+
+txt/orig state of affairs:
+
+The problem is that there are two ways to interpret line,col:
+	With respect to txt
+	With respect to orig
+
+This isn't a problem when txt and orig point to the same character, as in:
+
+int in\
+dex
+int \
+index /*Here, the backslash break should be gobbled up by the space identifier*/
+
+line,col has no ambiguity as to where it should point.  However, when they point to different characters (i.e. at the beginning of a line):
+
+\
+int index
+
+line,col could either point to orig or to the first real character.  Thus, we will do the latter.
+
+Moreover, will a newline followed by backslash breaks generate a token that gobbles up said breaks?  I believe it will, but no need to call this mandatory.
+
+Thus, on a lookup with a txt pointer, the line/col/orig should match the real character and not preceding backslash breaks.
+
+
+I've been assuming that every token starts with its first character, neglecting the case where a line starts with backslash breaks.  The question is, given the txt pointer to the first character, where should the derived orig land?
+
+Currently, the orig lands after the beginning backslash breaks, when instead it should probably land before them.
+
+Here's what the tokenizer's text anchoring needs:
+	Broken/unbroken text pointer -> line/col
+	Unbroken contents per token to identify identifier text
+	Original contents per token to rebuild the document
+	Ability to change "original contents" so the document will be saved with modifications
+	Ability to insert new tokens
+
+Solution:
+	New tokens will typically have identical txt and orig, yea even the same pointer.
+	txt/txt_size for unbroken contents, orig/orig_size for original
+	modify orig to change the document
+	txt identifies identifier text
+	Line lookup tables are used to resolve txt/orig pointers; other pointers can't be resolved in the same fashion and may require traversing backward through the list.
+
+What this means:
+	Token txt/txt_size, orig/orig_size, orig_lines, txt_lines, and tok_point_lookup are all still correct.
+	Token line,col will be removed
+	
+Other improvements to do:
+	Sanity check the point lookups like crazy
+	Remove the array() structures in token_list, as these are supposed to be read-only
+
+Make sure tok_point_lookup returns correct values for every single pointer possible, particularly those in orig that are on backslash-breaks
+
+Convert the tok_message_queue into an array of messages bound to tokens.
+
+Ask Rusty about the trailing newline in this case:
+
+/* Blah
+ * 
+ * blah
+ */
+
+Here, rather than the trailing space being blank, it is "blank" from the comment perspective.
+May require deeper analysis.
+
+Todos from ccan_tokenizer.h
+/*
+Assumption:  Every token fits in one and exactly one line
+Counterexamples:
+	Backslash-broken lines
+	Multiline comments
+
+Checks to implement in the tokenizer:
+
+is the $ character used in an identifier (some configurations of GCC allow this)
+are there potentially ambiguous sequences used in a string literal (e.g. "\0000")
+Are there stray characters?  (e.g. '\0', '@', '\b')
+Are there trailing spaces at the end of lines (unless said spaces consume the entire line)?
+	Are there trailing spaces after a backslash-broken line?
+
+
+Fixes todo:
+
+backslash-newline sequence should register as an empty character, and the tokenizer's line value should be incremented accordingly.
+*/
+
+Lex angle bracket strings in #include
+
+Check the rules in the documentation
+
+Examine the message queue as part of testing the tokenizer:
+	Make sure there are no bug messages
+	Make sure files compile with no warnings
+For the tokenizer sanity check, make sure integers and floats have valid suffixes respectively
+	(e.g. no TOK_F for an integer, no TOK_ULL for a floating)
+
+Update the scan_number sanity checks
+(done) Move scan_number et al. to a separate C file
+
+Test:
+	Overflow and underflow floats
+	0x.p0
+	(done) 0755f //octal 0755 with invalid suffix
+	(done) 0755e1 //floating 7550
+
+Figure out how keywords will be handled.
+	Preprocessor directives are <strike>case-insensitive</strike> actually case-sensitive (except __VA_ARGS__)
+	All C keywords are case sensitive
+	__VA_ARGS__ should be read as an identifier unless it's in the expansion of a macro.  Otherwise, GCC generates a warning.
+		We are in the expansion of a macro after <startline> <space> # <space> 
+	Don't forget about __attribute__
+	Except for __VA_ARGS__, all preprocessor keywords are proceeded by <startline> <space> # <space>
+
+Solution:
+	All the words themselves will go into one opkw dictionary, and for both type and opkw, no distinction will be made between preprocessor and normal keywords.
+	Instead, int type will become short type; unsigned short cpp:1;
+
+Merge
+Commit ccan_tokenizer to the ccan repo
+Introduce ccan_tokenizer to ccanlint
+
+Write testcases for scanning all available operators
+Support integer and floating point suffices (e.g. 500UL, 0.5f)
+Examine the message queue after tokenizing
+Make sure single-character operators have an opkw < 128
+Make sure c_dictionary has no duplicate entries
+Write verifiers for other types than TOK_WHITE
+
+What's been done:
+
+Operator table has been organized
+Merged Rusty's changes
+Fixed if -> while in finalize
+Fixed a couple mistakes in run-simple-token.c testcases themselves
+	Expected orig/orig_size sizes weren't right
+Made token_list_sanity_check a public function and used it throughout run-simple-token.c
+Tests succeed and pass valgrind
+
+Lines/columns of every token are recorded
+
+(done) Fix "0\nstatic"
+(done) Write tests to make sure backslash-broken lines have correct token locations.
+(done) Correctly handle backslash-broken lines
+	One plan:  Separate the scanning code from the reading code.  Scanning sends valid ranges to reading, and reading fills valid tokens for the tokenizer/scanner to properly add
+	Another plan:  Un-break backslash-broken lines into another copy of the input.  Create an array of the positions of each real line break so 
+Annotate message queue messages with current token
+
+Conversion to make:
+	From:
+		Position in unbroken text
+	To:
+		Real line number
+		Real offset from start of line
+
+Thus, we want an array of real line start locations wrt the unbroken text
+
+Here is a bro\
+ken line.  Here is a
+real line.
+
+<LINE>Here is a bro<LINE>ken line.  Here is a
+<LINE>real line.
+
+If we know the position of the token text wrt the unbroken text, we can look up the real line number and offset using only the array of real line start positions within the unbroken text.
+
+Because all we need is the orig and orig_size with respect to the unbroken text to orient 
\ No newline at end of file
--- a/ccan/ciniparser/ciniparser.c
+++ b/ccan/ciniparser/ciniparser.c
--- a/ccan/ciniparser/test/run-dictionary.c
+++ b/ccan/ciniparser/test/run-dictionary.c
--- a/ccan/stringmap/_info
+++ b/ccan/stringmap/_info
--- a/ccan/stringmap/stringmap.c
+++ b/ccan/stringmap/stringmap.c
--- a/ccan/stringmap/stringmap.h
+++ b/ccan/stringmap/stringmap.h
--- a/ccan/stringmap/test/run.c
+++ b/ccan/stringmap/test/run.c
--- a/tools/ccanlint/Makefile
+++ b/tools/ccanlint/Makefile
@@ -16,6 +16,9 @@ tools/ccanlint/generated-init-tests: $(TEST_CFILES)

 $(TEST_OBJS): tools/ccanlint/generated-init-tests

+# Otherwise, ccanlint.c et al. may fail to build
+$(CORE_OBJS): tools/ccanlint/generated-init-tests
+
 tools/ccanlint/ccanlint: $(OBJS)

 ccanlint-clean: