Commit 3c81225f authored by Rusty Russell's avatar Rusty Russell

Merge.

parents a57ff6cb 33480b45
#include <stdio.h>
#include <string.h>
#include "config.h"
/**
* block_pool - An efficient allocator for blocks that don't need to be
* resized or freed.
*
* block_pool allocates blocks by packing them into buffers, making the
* overhead per block virtually zero. Because of this, you cannot resize or
* free individual blocks, but you can free the entire block_pool.
*
* The rationale behind block_pool is that talloc uses a lot of bytes per
* block (48 on 32-bit, 80 on 64-bit). Nevertheless, talloc is an excellent
* tool for C programmers of all ages. Because a block_pool is a talloc
* context, it can be useful in talloc-based applications where many small
* blocks need to be allocated.
*
* Example:
*
* #include <ccan/block_pool/block_pool.h>
*
* int main(void) {
* struct block_pool *bp = block_pool_new(NULL);
*
* void *buffer = block_pool_alloc(bp, 4096);
* char *string = block_pool_strdup(bp, "A string");
*
* int array[] = {0,1,1,2,3,5,8,13,21,34};
* int *array_copy = block_pool_memdup(bp, array, sizeof(array));
*
* block_pool_free(bp);
* return 0;
* }
*
* Author: Joey Adams
* License: BSD
*/
int main(int argc, char *argv[])
{
/* Expect exactly one argument */
if (argc != 2)
return 1;
if (strcmp(argv[1], "depends") == 0) {
printf("ccan/talloc\n");
return 0;
}
return 1;
}
/*
Copyright (c) 2009 Joseph A. Adams
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "block_pool.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
//must be a power of 2
#define BLOCK_SIZE 4096
struct block {
size_t remaining;
size_t size;
char *data;
};
struct block_pool {
size_t count;
size_t alloc; //2^n - 1, where n is an integer > 1
struct block *block;
//blocks are arranged in a max-heap by the .remaining field
// (except the root block does not percolate down until it is filled)
};
static int destructor(struct block_pool *bp) {
struct block *block = bp->block;
size_t d = bp->count;
for (;d--;block++)
free(block->data);
free(bp->block);
return 0;
}
struct block_pool *block_pool_new(void *ctx) {
struct block_pool *bp = talloc(ctx, struct block_pool);
talloc_set_destructor(bp, destructor);
bp->count = 0;
bp->alloc = 7;
bp->block = malloc(bp->alloc * sizeof(struct block));
return bp;
}
static void *new_block(struct block *b, size_t needed) {
b->size = (needed+(BLOCK_SIZE-1)) & ~(BLOCK_SIZE-1);
b->remaining = b->size - needed;
b->data = malloc(b->size);
return b->data;
}
//for the first block, keep the memory usage low in case it's the only block.
static void *new_block_tiny(struct block *b, size_t needed) {
if (needed < 256)
b->size = 256;
else
b->size = (needed+(BLOCK_SIZE-1)) & ~(BLOCK_SIZE-1);
b->remaining = b->size - needed;
b->data = malloc(b->size);
return b->data;
}
static void *try_block(struct block *b, size_t size, size_t align) {
size_t offset = b->size - b->remaining;
offset = (offset+align) & ~align;
if (b->size-offset >= size) {
//good, we can use this block
void *ret = b->data + offset;
b->remaining = b->size-offset-size;
return ret;
}
return NULL;
}
#define L(node) (node+node+1)
#define R(node) (node+node+2)
#define P(node) ((node-1)>>1)
#define V(node) (bp->block[node].remaining)
static void percolate_down(struct block_pool *bp, size_t node) {
size_t child = L(node);
struct block tmp;
//get the maximum child
if (child >= bp->count)
return;
if (child+1 < bp->count && V(child+1) > V(child))
child++;
if (V(child) <= V(node))
return;
tmp = bp->block[node];
bp->block[node] = bp->block[child];
bp->block[child] = tmp;
percolate_down(bp, child);
}
//note: percolates up to either 1 or 2 as a root
static void percolate_up(struct block_pool *bp, size_t node) {
size_t parent = P(node);
struct block tmp;
if (node<3 || V(parent) >= V(node))
return;
tmp = bp->block[node];
bp->block[node] = bp->block[parent];
bp->block[parent] = tmp;
percolate_up(bp, parent);
}
void *block_pool_alloc_align(struct block_pool *bp, size_t size, size_t align) {
void *ret;
if (align)
align--;
//if there aren't any blocks, make a new one
if (!bp->count) {
bp->count = 1;
return new_block_tiny(bp->block, size);
}
//try the root block
ret = try_block(bp->block, size, align);
if (ret)
return ret;
//root block is filled, percolate down and try the biggest one
percolate_down(bp, 0);
ret = try_block(bp->block, size, align);
if (ret)
return ret;
//the biggest wasn't big enough; we need a new block
if (bp->count >= bp->alloc) {
//make room for another block
bp->alloc += bp->alloc;
bp->alloc++;
bp->block = realloc(bp->block, bp->alloc * sizeof(struct block));
}
ret = new_block(bp->block+(bp->count++), size);
//fix the heap after adding the new block
percolate_up(bp, bp->count-1);
return ret;
}
#undef L
#undef R
#undef P
#undef V
char *block_pool_strdup(struct block_pool *bp, const char *str) {
size_t size = strlen(str)+1;
char *ret = block_pool_alloc_align(bp, size, 1);
memcpy(ret, str, size);
return ret;
}
/*
Copyright (c) 2009 Joseph A. Adams
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CCAN_BLOCK_POOL
#define CCAN_BLOCK_POOL
#include <ccan/talloc/talloc.h>
#include <string.h>
struct block_pool;
/* Construct a new block pool.
ctx is a talloc context (or NULL if you don't know what talloc is ;) ) */
struct block_pool *block_pool_new(void *ctx);
/* Same as block_pool_alloc, but allows you to manually specify alignment.
For instance, strings need not be aligned, so set align=1 for them.
align must be a power of two. */
void *block_pool_alloc_align(struct block_pool *bp, size_t size, size_t align);
/* Allocate a block of a given size. The returned pointer will remain valid
for the life of the block_pool. The block cannot be resized or
freed individually. */
static inline void *block_pool_alloc(struct block_pool *bp, size_t size) {
size_t align = size & -size; //greatest power of two by which size is divisible
if (align > 16)
align = 16;
return block_pool_alloc_align(bp, size, align);
}
static inline void block_pool_free(struct block_pool *bp) {
talloc_free(bp);
}
char *block_pool_strdup(struct block_pool *bp, const char *str);
static inline void *block_pool_memdup(struct block_pool *bp, const void *src, size_t size) {
void *ret = block_pool_alloc(bp, size);
memcpy(ret, src, size);
return ret;
}
#endif
#include "block_pool/block_pool.h"
#include "block_pool/block_pool.c"
#include "tap/tap.h"
struct alloc_record {
size_t size;
char *ptr;
};
static int compar_alloc_record_by_ptr(const void *ap, const void *bp) {
const struct alloc_record *a=ap, *b=bp;
if (a->ptr < b->ptr)
return -1;
else if (a->ptr > b->ptr)
return 1;
else
return 0;
}
static size_t random_block_size(void) {
int scale = random() % 11;
switch (scale) {
case 0:
case 1:
case 2:
case 3:
case 4: return random() % 25;
case 5:
case 6:
case 7: return random() % 100;
case 8:
case 9: return random() % 1000;
case 10: return random() % 10000;
default:
fprintf(stderr, "random() %% 3 returned %d somehow!\n", scale);
exit(EXIT_FAILURE);
}
}
#define L(node) (node+node+1)
#define R(node) (node+node+2)
#define P(node) ((node-1)>>1)
#define V(node) (bp->block[node].remaining)
//used by test_block_pool to make sure the pool's block array is a max heap
//set node=0 to scan the whole heap (starting at the root)
//returns nonzero on success
static int check_heap(struct block_pool *bp, size_t node) {
if (node < bp->count) {
if (node) { //the root node need not be the max, but its subtrees must be valid
if (L(node) < bp->count && V(L(node)) > V(node))
return 0;
if (R(node) < bp->count && V(R(node)) > V(node))
return 0;
}
return check_heap(bp, L(node)) && check_heap(bp, R(node));
} else
return 1;
}
#undef L
#undef R
#undef P
#undef V
/* Performs a self-test of block_pool.
Returns 1 on success, 0 on failure.
If verify_heap is nonzero, the test will check the heap structure every
single allocation, making test_block_pool take n^2 time. */
static int test_block_pool(size_t blocks_to_try, FILE *out, int verify_heap) {
struct block_pool *bp = block_pool_new(NULL);
struct alloc_record *record = malloc(sizeof(*record) * blocks_to_try);
size_t i;
size_t bytes_allocated = 0;
#define print(...) do { \
if (out) \
printf(__VA_ARGS__); \
} while(0)
print("Allocating %zu blocks...\n", blocks_to_try);
for (i=0; i<blocks_to_try; i++) {
record[i].size = random_block_size();
record[i].ptr = block_pool_alloc(bp, record[i].size);
bytes_allocated += record[i].size;
memset(record[i].ptr, 0x55, record[i].size);
if (verify_heap && !check_heap(bp, 0)) {
print("Block pool's max-heap is wrong (allocation %zu)\n", i);
return 0;
}
}
print("Finished allocating\n"
" %zu blocks\n"
" %zu bytes\n"
" %zu pages\n",
blocks_to_try, bytes_allocated, bp->count);
qsort(record, blocks_to_try,
sizeof(*record), compar_alloc_record_by_ptr);
print("Making sure block ranges are unique...\n");
//print("0: %p ... %p\n", record[0].ptr, record[0].ptr+record[0].size);
for (i=1; i<blocks_to_try; i++) {
struct alloc_record *a = &record[i-1];
struct alloc_record *b = &record[i];
//print("%zu: %p ... %p\n", i, b->ptr, b->ptr+b->size);
if (a->ptr > b->ptr) {
struct alloc_record *tmp = a;
a = b;
b = tmp;
}
if (a->ptr <= b->ptr && a->ptr+a->size <= b->ptr)
continue;
print("Allocations %zu and %zu overlap\n", i-1, i);
return 0;
}
print("Checking heap structure...\n");
if (!check_heap(bp, 0)) {
print("Block pool's max-heap is wrong\n");
return 0;
}
block_pool_free(bp);
free(record);
return 1;
#undef print
}
int main(void)
{
plan_tests(1);
//test a few blocks with heap verification
ok1(test_block_pool(10000, NULL, 1));
return exit_status();
}
#include <string.h>
#include <stdio.h>
#include "config.h"
/**
* ccan_tokenizer - A full-text lexer for C source files
*
* ccan_tokenizer generates a list of tokens given the contents of a C source
* or header file.
*
* Example:
*
* #include <ccan/ccan_tokenizer/ccan_tokenizer.h>
* #include <ccan/grab_file/grab_file.h>
* #include <err.h>
*
* void token_list_stats(const struct token_list *tl) {
* size_t comment=0, white=0, stray=0, code=0, total=0;
* size_t count = 0;
* const struct token *i;
*
* for (i=tl->first; i; i=i->next) {
* size_t size = i->orig_size;
* total += size;
* count++;
*
* if (token_type_is_comment(i->type))
* comment += size;
* else if (i->type == TOK_WHITE)
* white += size;
* else if (i->type == TOK_STRAY)
* stray += size;
* else
* code += size;
* }
*
* printf("Code: %.02f%%\n"
* "White space: %.02f%%\n"
* "Comments: %.02f%%\n",
* (double)code * 100.0 / (double)total,
* (double)white * 100.0 / (double)total,
* (double)comment * 100.0 / (double)total);
* if (stray)
* printf("Stray: %.02f%%\n",
* (double)stray * 100.0 / (double)total);
* printf("Total size: %zu bytes with %zu tokens\n",
* total, count);
* }
*
* int main(int argc, char *argv[]) {
* size_t len;
* char *file;
* struct token_list *tl;
* tok_message_queue mq;
* queue_init(mq, NULL);
*
* //grab the file
* if (argc != 2) {
* fprintf(stderr, "Usage: %s source_file\n", argv[0]);
* return 1;
* }
* file = grab_file(NULL, argv[1], &len);
* if (!file)
* err(1, "Could not read file %s", argv[1]);
*
* //tokenize the contents
* tl = tokenize(file, len, &mq);
*
* //print warnings, errors, etc.
* while (queue_count(mq)) {
* struct tok_message msg = dequeue(mq);
* tok_message_print(&msg, tl);
* }
*
* //do neat stuff with the token list
* token_list_stats(tl);
*
* //free stuff
* talloc_free(file); //implicitly frees tl
* queue_free(mq);
*
* return 0;
* }
*/
int main(int argc, char *argv[])
{
/* Expect exactly one argument */
if (argc != 2)
return 1;
if (strcmp(argv[1], "depends") == 0) {
printf("ccan/array\n");
return 0;
}
return 1;
}
This diff is collapsed.
/*
Copyright (c) 2009 Joseph A. Adams
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CCAN_TOKENIZER_H
#define CCAN_TOKENIZER_H
#include <ccan/array/array.h>
#include "charflag.h"
#include "dict.h"
#include "queue.h"
#include <stdint.h>
#include <errno.h> //for readui
/* Definition of tokens and the token list */
enum token_type {
TOK_INTEGER, //integer (e.g. 5, 1000L, 0x5)
TOK_FLOATING, //floating point number (e.g. 5.0, 7.0f, etc.)
TOK_OPERATOR, //operator (e.g. +, -, (, ), ++, etc.)
#define token_type_is_identifier(type) ((type)>=TOK_KEYWORD && (type)<=TOK_IDENTIFIER)
TOK_KEYWORD, //keyword (e.g. char, _Bool, ifdef)
TOK_IDENTIFIER, //identifier or unprocessed keyword (e.g. int, token, pp_conditions)
TOK_CHAR, //character literal (e.g. 'a' or even '1234')
TOK_STRING, //string literal (e.g. "hello" or "zero\0inside")
TOK_LEADING_POUND, //leading # in a preprocessor directive (e.g. # include)
TOK_STRING_IQUOTE, // "config.h"
TOK_STRING_IANGLE, // <stdio.h>
#define token_type_is_ignored(type) ((type)>=TOK_CCOMMENT && (type)<=TOK_WHITE)
#define token_type_is_comment(type) ((type)>=TOK_CCOMMENT && (type)<=TOK_CPPCOMMENT)
TOK_CCOMMENT, //C comment (e.g. /* comment */)
TOK_CPPCOMMENT, //C++ comment (e.g. //comment )
TOK_WHITE, //whitespace (span of \t\n\v\f\r and space)
TOK_STARTLINE, //beginning of line (txt/txtsize is always empty)
TOK_STRAY, //control characters, weird characters, and extended characters where they shouldn't be
};
enum tok_suffix {
TOK_NOSUFFIX = 0,
TOK_U = 1, //unsigned
TOK_L = 2, //long or double-precision float
TOK_LL = 4, //long long (note that TOK_L and TOK_LL are mutually exclusive)
TOK_F = 8, //float (single-precision)
TOK_I = 16, //imaginary
TOK_UL = TOK_U | TOK_L, //unsigned long
TOK_ULL = TOK_U | TOK_LL, //unsigned long long
//Imaginary combo meals
TOK_IMAG_U = TOK_I | TOK_U,
TOK_IMAG_L = TOK_I | TOK_L,
TOK_IMAG_LL = TOK_I | TOK_LL,
TOK_IMAG_F = TOK_I | TOK_F,
TOK_IMAG_UL = TOK_I | TOK_UL,
TOK_IMAG_ULL = TOK_I | TOK_ULL,
};
struct tok_integer {
uint64_t v;
int base; //one of 2, 8, 10, or 16
enum tok_suffix suffix;
};
struct tok_floating {
long double v;
enum tok_suffix suffix;
};
//Operator/keyword naming conventions taken from Jeff Lee's Yacc grammar:
//http://www.lysator.liu.se/c/ANSI-C-grammar-y.html
enum tok_opkw {
/* Permute these regularly */
PTR_OP=128, INC_OP, DEC_OP, LEFT_OP, RIGHT_OP, LE_OP, GE_OP, EQ_OP, NE_OP,
AND_OP, OR_OP,
MUL_ASSIGN, DIV_ASSIGN, MOD_ASSIGN,
ADD_ASSIGN, SUB_ASSIGN,
AND_ASSIGN, XOR_ASSIGN, OR_ASSIGN,
LEFT_ASSIGN, RIGHT_ASSIGN,
ELLIPSIS,
DOUBLE_POUND,
//Keywords
_BOOL,
_COMPLEX,
_IMAGINARY,
BREAK,
CASE,
CHAR,
CONST,
CONTINUE,
DEFAULT,
DO,
DOUBLE,
ELSE,
ENUM,
EXTERN,
FLOAT,
FOR,
GOTO,
IF,
INLINE,
INT,
LONG,
REGISTER,
RESTRICT,
RETURN,
SHORT,
SIGNED,
SIZEOF,
STATIC,
STRUCT,
SWITCH,
TYPEDEF,
UNION,
UNSIGNED,
VOID,
VOLATILE,
WHILE,
//Preprocessor keywords (except those already defined)
VA_ARGS,
#define opkw_is_directive_only(opkw) ((opkw)>=DEFINE && (opkw)<=WARNING)
#define opkw_is_directive(opkw) (opkw_is_directive_only(opkw) || (opkw)==ELSE || (opkw)==IF)
DEFINE,
ELIF,
//ELSE,
ENDIF,
ERROR,
//IF,
IFDEF,
IFNDEF,
INCLUDE,
LINE,
PRAGMA,
UNDEF,
WARNING, /* gcc extension */
};
struct token_flags {
unsigned short
pp:1, //is token part of a preprocessor line
pp_directive:1; //does token follow a TOK_LEADING_POUND (e.g. # include)
};
struct token {
struct token *prev, *next;
struct token_flags flags;
short type; //enum token_type
union {
struct tok_integer integer;
struct tok_floating floating;
int opkw; //operator or keyword ID (e.g. '+', INC_OP (++), ADD_ASSIGN (+=))
array_char string; //applies to TOK_CHAR and TOK_STRING
char *include; //applies to TOK_STRING_IQUOTE and TOK_STRING_IANGLE
};
//text this token represents (with backslash-broken lines merged)
const char *txt;
size_t txt_size;
//text this token represents (untouched)
const char *orig;
size_t orig_size;
//zero-based line and column number of this token
size_t line, col;
};
//keywords such as int, long, etc. may be defined over, making them identifiers in a sense
static inline int token_is_identifier(const struct token *tok) {
return token_type_is_identifier(tok->type);
}
static inline int token_is_ignored(const struct token *tok) {
return token_type_is_ignored(tok->type);
}
static inline int token_is_op(const struct token *tok, int opkw) {
return tok->type==TOK_OPERATOR && tok->opkw==opkw;
}
static inline int token_is_kw(const struct token *tok, int opkw) {
return tok->type==TOK_KEYWORD && tok->opkw==opkw;
}
static inline int token_txt_is(const struct token *tok, const char *str) {
size_t len = strlen(str);
return tok->txt_size==len && !memcmp(tok->txt, str, len);
}
struct token_list {
struct token *first, *last;
//Points to original input as given
const char *orig;
size_t orig_size;
//position of the start of each real line with respect to orig
const char * const *olines;
size_t olines_size;
//Copy of original input without backslash-broken lines
const char *txt;
size_t txt_size;
//position of the start of each real line with respect to txt
const char * const *tlines;
size_t tlines_size;
//Set me so tok_message_print will know what file name to display
const char *filename;
};
extern struct dict *tokenizer_dict;
typedef queue(struct tok_message) tok_message_queue;
//the token_list is allocated as a child of orig
struct token_list *tokenize(const char *orig, size_t orig_size, tok_message_queue *mq);
size_t token_list_count(const struct token_list *tl);
//used for debugging
int token_list_sanity_check(const struct token_list *tl, FILE *err);
void token_list_dump(const struct token_list *tl, FILE *f);
/* tok_point_lookup is used to locate a pointer that is within a token list's
txt or orig fields */
struct tok_point {
const char *txt, *orig;
size_t line, col;
};
//returns nonzero if the pointer could be resolved
int tok_point_lookup(struct tok_point *out, const char *ptr,
const struct token_list *tl);
/* Tokenizer message queue; used to gather and report warnings, errors, etc. */
enum tok_message_level {TM_DEBUG, TM_INFO, TM_WARN, TM_ERROR, TM_BUG};
struct tok_message {
enum tok_message_level level;
const char *path;
//Unique slash-delimited name of the message
//e.g. tokenize/read_cstring/ambiguous_octal
const char *message;
//Human-readable description
//e.g. `Octal \007 followed by digit`
const char *location;
//Pointer (typically within the token list's txt or orig) of the error
};
#define tok_msg_debug(name, loc, fmt, ...) tok_message_add(mq, TM_DEBUG, MESSAGE_PATH #name, loc, fmt, ##__VA_ARGS__)
#define tok_msg_info(name, loc, fmt, ...) tok_message_add(mq, TM_INFO, MESSAGE_PATH #name, loc, fmt, ##__VA_ARGS__)
#define tok_msg_warn(name, loc, fmt, ...) tok_message_add(mq, TM_WARN, MESSAGE_PATH #name, loc, fmt, ##__VA_ARGS__)
#define tok_msg_error(name, loc, fmt, ...) tok_message_add(mq, TM_ERROR, MESSAGE_PATH #name, loc, fmt, ##__VA_ARGS__)
#define tok_msg_bug(name, loc, fmt, ...) tok_message_add(mq, TM_BUG, MESSAGE_PATH #name, loc, fmt, ##__VA_ARGS__)
void tok_message_add(tok_message_queue *mq, enum tok_message_level level,
const char *path, const char *loc, const char *fmt, ...);
void tok_message_print(struct tok_message *m, struct token_list *tl);
void tok_message_dump(struct tok_message *m);
void tok_message_queue_dump(const tok_message_queue *mq);
/* Miscellaneous internal components */
char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq);
char *read_cnumber(struct token *tok, const char *s, const char *e, tok_message_queue *mq);
typedef unsigned int readui_base;
#define READUI_ALLOWHIGHERDIGITS 256
#define READUI_ALLOWCAPLETTERS 512
#define READUI_ALLOWLCASELETTERS 1024
#define READUI_ALLOWLETTERS (READUI_ALLOWCAPLETTERS | READUI_ALLOWLCASELETTERS)
#define READUI_DEC ((readui_base)(10))
#define READUI_HEX ((readui_base)(16 | READUI_ALLOWLETTERS))
#define READUI_OCT ((readui_base)(8))
#define READUI_BIN ((readui_base)(2))
uint64_t readui(const char **sp, const char *e, readui_base base);
#endif
#include "charflag.h"
#define C CF_CONTROL
#define S CF_SPACE
#define R CF_RETURN
#define D CF_DIGIT
#define L CF_LETTER
#define H CF_HEX
#define Y CF_SYMBOL
unsigned char charflag[256] = {
C,C,C,C,C,C,C,C,C,
S, // \t
R, // \n
S, // \v
S, // \f
R, // \r
C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,
S, // space
Y, // !
Y, // "
Y, // #
Y, // $
Y, // %
Y, // &
Y, // '
Y, // (
Y, // )
Y, // *
Y, // +
Y, // ,
Y, // -
Y, // .
Y, // /
D|H, // 0
D|H, // 1
D|H, // 2
D|H, // 3
D|H, // 4
D|H, // 5
D|H, // 6
D|H, // 7
D|H, // 8
D|H, // 9
Y, // :
Y, // ;
Y, // <
Y, // =
Y, // >
Y, // ?
Y, // @
L|H, // A
L|H, // B
L|H, // C
L|H, // D
L|H, // E
L|H, // F
L, // G
L, // H
L, // I
L, // J
L, // K
L, // L
L, // M
L, // N
L, // O
L, // P
L, // Q
L, // R
L, // S
L, // T
L, // U
L, // V
L, // W
L, // X
L, // Y
L, // Z
Y, // [
Y, // \ (backslash)
Y, // ]
Y, // ^
Y, // _
Y, // `
L|H, // a
L|H, // b
L|H, // c
L|H, // d
L|H, // e
L|H, // f
L, // g
L, // h
L, // i
L, // j
L, // k
L, // l
L, // m
L, // n
L, // o
L, // p
L, // q
L, // r
L, // s
L, // t
L, // u
L, // v
L, // w
L, // x
L, // y
L, // z
Y, // {
Y, // |
Y, // }
Y, // ~
C, // DEL
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
#undef C
#undef S
#undef R
#undef D
#undef L
#undef H
#undef Y
#ifndef CCAN_CHARFLAG_H
#define CCAN_CHARFLAG_H
//All of these macros evaluate the argument exactly once
#define ccontrol(c) (charflag(c) & CF_CONTROL) //Weird characters that shouldn't be in text
#define cspace(c) (charflag(c) & CF_SPACE) //Space, tab, vertical tab, form feed
#define creturn(c) (charflag(c) & CF_RETURN) //Newline
#define cwhite(c) (charflag(c) & CF_WHITE) //cspace or creturn
#define cdigit(c) (charflag(c) & CF_DIGIT) //0-9
#define cletter(c) (charflag(c) & CF_LETTER) //A-Za-z
#define chex(c) (charflag(c) & CF_HEX) //0-9A-Fa-f
#define csymbol(c) (charflag(c) & CF_SYMBOL)
// !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
//If it's ASCII, prints a non-blank character, and is not a digit or letter, it's a symbol
#define cextended(c) (charflag(c) == 0) //Characters >= 128
/* To test:
All charflag macros should evaluate exactly once
*/
extern unsigned char charflag[256];
#define charflag(c) (charflag[(unsigned int)(unsigned char)(c)])
#define CF_CONTROL ((unsigned char) 1)
#define CF_SPACE ((unsigned char) 2)
#define CF_RETURN ((unsigned char) 4)
#define CF_DIGIT ((unsigned char) 8)
#define CF_LETTER ((unsigned char) 16)
#define CF_HEX ((unsigned char) 32)
#define CF_SYMBOL ((unsigned char) 64)
#define CF_WHITE (CF_SPACE|CF_RETURN)
#endif
#include "dict.h"
#include <string.h>
#include <stdlib.h>
#include <assert.h>
//compare dict_entries by first letter ascending, then by length descending
static int compar_dict_entry(const void *ap, const void *bp) {
const struct dict_entry *a=ap, *b=bp;
unsigned int first_a = (unsigned int)a->str[0];
unsigned int first_b = (unsigned int)b->str[0];
if (first_a < first_b)
return -1;
else if (first_a > first_b)
return 1;
else {
size_t len_a = strlen(a->str);
size_t len_b = strlen(b->str);
if (len_a > len_b)
return -1;
else if (len_a < len_b)
return 1;
else
return 0;
}
}
struct dict *dict_build(void *ctx, const struct dict_entry *entries, size_t count) {
struct dict *dict = talloc_zero(ctx, struct dict);
struct dict_entry *ent;
int i;
if (!count)
return dict;
ent = talloc_array(dict, struct dict_entry, count);
memcpy(ent, entries, count*sizeof(struct dict_entry));
qsort(ent, count, sizeof(*ent), compar_dict_entry);
if (ent->str[0]==0) {
dict->zero = ent;
ent++, count--;
if (count && ent->str[0]==0) {
fprintf(stderr, "dict_entry array contains multiple empty strings\n");
exit(EXIT_FAILURE);
}
}
for (i=1; i<256; i++) {
if (!count)
break;
if (ent->str[0] == (char)i)
dict->by_first_letter[i-1] = ent;
while (count && ent->str[0] == (char)i)
ent++, count--;
}
return dict;
}
struct dict_entry *dict_lookup(struct dict *dict, const char **sp, const char *e) {
struct dict_entry *de;
unsigned int first;
if (*sp >= e)
return NULL;
first = (unsigned int)**sp & 0xFF;
if (!first) {
if (dict->zero)
(*sp)++;
return dict->zero;
}
de = dict->by_first_letter[first-1];
if (!de)
return NULL;
for (;de->str[0]==(char)first; de++) {
const char *s = *sp;
const char *ds = de->str;
for (;;s++,ds++) {
if (!*ds) {
*sp = s;
return de;
}
if (s>=e || *s!=*ds)
break;
}
}
return NULL;
}
#ifndef CCAN_TOKENIZER_DICT_H
#define CCAN_TOKENIZER_DICT_H
#include <stdint.h>
#include <ccan/talloc/talloc.h>
//needed for freeing the struct dict*
struct dict_entry {
int id;
const char *str;
};
struct dict {
struct dict_entry *zero;
struct dict_entry *by_first_letter[256];
};
struct dict *dict_build(void *ctx, const struct dict_entry *entries, size_t count);
struct dict_entry *dict_lookup(struct dict *dict, const char **sp, const char *e);
#endif
readui - Flexible function for reading a 64-bit unsigned integer
@sp: Pointer to scanning pointer
@e: Pointer to end of string
@base: Typically one of READUI_DEC, READUI_HEX, READUI_OCT, or READUI_BIN.
readui() converts the string of digits from *sp to e to a number, setting *sp to the first invalid character or e if the entire string is valid or empty. It does not look at prefixes or suffixes, only digits. It skips preceding whitespace.
readui() uses errno to indicate success or failure. It will set errno to one of the following:
0: Input is valid and non-empty
EINVAL: Input is empty, does not start with any valid digits, or base is 0
ERANGE: Number given is greater than ULLONG_MAX
Example (UNTESTED):
uint64_t read_number(const char *str) {
const char *s = str, *e = strchr(str, 0);
readui_base base = READUI_DEC;
uint64_t result;
//See if the number has a 0x (for hex) or 0 (for octal) prefix
if (s+2<=e && *s=='0') {
s++;
if (*s=='x' || *s=='X') {
base = READUI_HEX;
s++;
} else
base = READUI_OCT;
}
result = readui(&s, e, base);
if (errno)
perror("read_number");
return result;
}
Rules for a token list:
It always has and starts with a TOK_STARTLINE
Misc.:
If the world were intuitive, the tokenizer would never report warnings or bugs on a source file that compiles successfully. However, one case where it does is when erroneous tokens appear within an #if 0 block. Example:
#if 0
0b101.0p0
#endif
/*
guppy is a pattern-matching language by Joey Adams that's not implemented or formalized yet.
See http://www.funsitelots.com/pub/guppy.g for a near self-definition
This is a guppy representation of integer and floating point formatting in C.
It is based on http://c0x.coding-guidelines.com/6.4.4.1.html and http://c0x.coding-guidelines.com/6.4.4.2.html
*/
number_constant: [
integer_constant()
floating_constant()
]
integer_constant: [
([1-9] [0-9]*) //decimal
(0 [0-7]*) //octal
(0 [X x] [0-9 A-F a-f]*) //hexadecimal
]
integer_suffix: [
([U u] [L l]*0..2)
([L l]*1..2 [U u]*0..1)
]
floating_constant: [
decimal_floating_constant()
hexadecimal_floating_constant()
]
decimal_floating_constant: [
([0-9]* '.' [0-9]+ exponent_part()*0..1 floating_suffix())
([0-9]+ '.' exponent_part()*0..1 floating_suffix())
([0-9]+ exponent_part() floating_suffix())
]
exponent_part:
([E e] ['+' '-']*0..1 [0-9]+)
hexadecimal_floating_constant:
(0 [X x] [
[0-9 A-F a-f]* '.' [0-9 A-F a-f]+
[0-9 A-F a-f]+ '.'
[0-9 A-F a-f]+
] [P p] ['+' '-']*0..1 [0-9]+ floating_suffix())
floating_suffix: [F L f l]*0..1
scan_number:
(
[
(0 [X x] [0-9 A-F a-f '.']*)
(0 [B b] [0-1] [0-9 '.']*)
([0-9 '.']*)
]
( [E P e p] ['+' '-']*0..1 [0-9]* )*0..1
[0-9 A-Z a-z '.' '_' '$']*
)
/*
Notes:
A numeric constant can begin with any of:
0-9 '.'
and can contain any of:
0-9 a-f e f l p u x '.' '+' '-'
along with capital equivalents.
If scanning finds something starting with a '.' but no decimal digit after it, it is the '.' operator and not a number.
*/
/*
Copyright (c) 2009 Joseph A. Adams
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "queue.h"
#include <ccan/talloc/talloc.h>
#include <string.h>
void queue_enqueue_helper(void *qp, size_t itemSize) {
queue(char) *q = qp;
size_t oldsize = q->flag+1;
q->flag += oldsize;
q->item = talloc_realloc_size(NULL, q->item, (q->flag+1)*itemSize);
memcpy(q->item+(q->head+oldsize)*itemSize, q->item+q->head*itemSize, (oldsize-q->head)*itemSize);
q->head += oldsize;
}
int queue_alias_helper(const void *a, const void *b) {
(void)a, (void)b;
return 0;
}
/*
Copyright (c) 2009 Joseph A. Adams
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef CCAN_QUEUE_H
#define CCAN_QUEUE_H
#include <stdint.h>
#include <ccan/talloc/talloc.h>
#ifndef HAVE_ATTRIBUTE_MAY_ALIAS
#define HAVE_ATTRIBUTE_MAY_ALIAS 1
#endif
#if HAVE_ATTRIBUTE_MAY_ALIAS==1
#define queue_alias(ptr) /* nothing */
#define queue(type) struct {size_t head, tail, flag; type *item;} __attribute__((__may_alias__))
#else
#define queue_alias(ptr) qsort(ptr, 0, 1, queue_alias_helper) //hack
#define queue(type) struct {size_t head, tail, flag; type *item;}
#endif
int queue_alias_helper(const void *a, const void *b);
#define queue_init(queue, ctx) do {(queue).head = (queue).tail = 0; (queue).flag = 3; (queue).item = talloc_size(ctx, sizeof(*(queue).item)*4);} while(0)
#define queue_free(queue) do {talloc_free((queue).item);} while(0)
#define queue_count(queue) (((queue).tail-(queue).head) & (queue).flag)
#define enqueue(queue, ...) \
do { \
(queue).item[(queue).tail++] = (__VA_ARGS__); \
(queue).tail &= (queue).flag; \
if ((queue).tail == (queue).head) { \
queue_enqueue_helper(&(queue), sizeof(*(queue).item)); \
queue_alias(&(queue)); \
} \
} while(0)
#define dequeue_check(queue) ((queue).head != (queue).tail ? dequeue(queue) : NULL)
#define dequeue(queue) ((queue).item[queue_dequeue_helper(&(queue).head, (queue).flag)])
//TODO: Test us
#define queue_next(queue) ((queue).item[(queue).head])
#define queue_item(queue, pos) ((queue).item[((queue).head+(pos)) & (queue).flag])
#define queue_skip(queue) do {(queue).head++; (queue).head &= (queue).flag;} while(0)
void queue_enqueue_helper(void *qp, size_t itemSize);
static inline size_t queue_dequeue_helper(size_t *head, size_t flag) {
size_t ret = (*head)++;
*head &= flag;
return ret;
}
#endif
//for strtold
#define _ISOC99_SOURCE
#include <stdlib.h>
#undef _ISOC99_SOURCE
#include "ccan_tokenizer.h"
#ifndef ULLONG_MAX
#define ULLONG_MAX 18446744073709551615ULL
#endif
static const char *skipnum(const char *s, const char *e, readui_base base) {
for (;s<e;s++) {
unsigned int c = (unsigned char)*s;
if (cdigit(c)) {
if ( c-'0' >= (base & 0xFF) &&
!(base & READUI_ALLOWHIGHERDIGITS) )
break;
} else if (c>='A' && c<='Z') {
if (!(base & READUI_ALLOWCAPLETTERS))
break;
if ( c-'A'+10 >= (base & 0xFF) &&
!(base & READUI_ALLOWHIGHERDIGITS))
break;
} else if (c>='a' && c<='z') {
if (!(base & READUI_ALLOWLCASELETTERS))
break;
if ( c-'a'+10 >= (base & 0xFF) &&
!(base & READUI_ALLOWHIGHERDIGITS))
break;
} else
break;
}
return s;
}
static uint64_t readui_valid(const char *s, const char *e, readui_base base) {
uint64_t ret = 0;
uint64_t multiplier = 1;
uint64_t digit_value;
//64-bit multiplication with overflow checking
#define multiply(dest, src) do { \
uint32_t a0 = (uint64_t)(dest) & 0xFFFFFFFF; \
uint32_t a1 = (uint64_t)(dest) >> 32; \
uint32_t b0 = (uint64_t)(src) & 0xFFFFFFFF; \
uint32_t b1 = (uint64_t)(src) >> 32; \
uint64_t a, b; \
\
if (a1 && b1) \
goto overflowed; \
a = (uint64_t)a1*b0 + (uint64_t)a0*b1; \
if (a > 0xFFFFFFFF) \
goto overflowed; \
a <<= 32; \
b = (uint64_t)a0*b0; \
\
if (a+b < a) \
goto overflowed; \
(dest) = a+b; \
} while(0)
if (s >= e || ((base&0xFF) < 1)) {
errno = EINVAL;
return 0;
}
while (s<e && *s=='0') s++;
if (e > s) {
for (;;) {
char c = *--e;
//this series of if statements takes advantage of the fact that 'a'>'A'>'0'
if (c >= 'a')
c -= 'a'-10;
else if (c >= 'A')
c -= 'A'-10;
else
c -= '0';
digit_value = c;
//TODO: Write/find a testcase where temp *= multiplier does overflow
multiply(digit_value, multiplier);
if (ret+digit_value < ret)
goto overflowed;
ret += digit_value;
if (e <= s)
break;
multiply(multiplier, base & 0xFF);
}
}
errno = 0;
return ret;
overflowed:
errno = ERANGE;
return ULLONG_MAX;
#undef multiply
}
uint64_t readui(const char **sp, const char *e, readui_base base) {
const char *s = *sp;
while (s<e && cwhite(*s)) s++;
e = skipnum(s, e, base);
*sp = e;
return readui_valid(s, e, base);
}
#define MESSAGE_PATH "tokenize/read_cnumber/"
struct scan_number {
/*
* Each of the pointers points to the first character of a given component.
* Consider 0x50.1p+1f . It would be broken down into:
*/
const char *prefix; // 0x
const char *digits; // 50.1
const char *exponent; // p+1
const char *suffix; // f
const char *end;
size_t dots_found; // 1
};
/*
* Scans past all the characters in a number token, fills the struct, and
* returns one of TOK_INTEGER or TOK_FLOATING to indicate the type.
*
* First character must be [0-9 '.']
*/
static enum token_type scan_number(struct scan_number *sn,
const char *s, const char *e) {
enum token_type type;
sn->dots_found = 0;
sn->prefix = s;
sn->digits = s;
if (s+3<=e && s[0]=='0') {
if (s[1]=='X' || s[1]=='x') {
//hexadecimal
s += 2;
sn->digits = s;
for (;s<e;s++) {
if (*s == '.')
sn->dots_found++;
else if (!chex(*s))
break;
}
goto done_scanning_digits;
} else if (s[1]=='B' || s[1]=='b') {
//binary
s += 2;
if (*s!='0' && *s!='1')
s -= 2;
sn->digits = s;
}
}
//binary, decimal, or octal
for (;s<e;s++) {
if (*s == '.')
sn->dots_found++;
else if (!cdigit(*s))
break;
}
done_scanning_digits:
sn->exponent = s;
if (s<e && (
(sn->prefix==sn->digits && (*s=='E' || *s=='e')) ||
(sn->prefix < sn->digits && (*s=='P' || *s=='p'))
)) {
s++;
if (s<e && (*s=='+' || *s=='-'))
s++;
while (s<e && cdigit(*s)) s++;
}
sn->suffix = s;
while (s<e && (cdigit(*s) || cletter(*s) ||
*s=='.' || *s=='_' || *s=='$')) s++;
sn->end = s;
//Now we're done scanning, but now we want to know what type this is
type = TOK_INTEGER;
if (sn->dots_found)
type = TOK_FLOATING;
if (sn->exponent < sn->suffix)
type = TOK_FLOATING;
//if this is an octal, make the leading 0 a prefix
if (type==TOK_INTEGER && sn->prefix==sn->digits &&
sn->digits < s && sn->digits[0]=='0')
sn->digits++;
return type;
}
static enum tok_suffix read_number_suffix(const char *s, const char *e,
enum token_type type, tok_message_queue *mq) {
const char *orig_s = s;
enum tok_suffix sfx = 0;
//read the suffix in pieces
while (s<e) {
enum tok_suffix sfx_prev = sfx;
char c = *s++;
if (c>='a' && c<='z')
c -= 'a'-'A';
if (c=='L') {
if (s<e && (*s=='L' || *s=='l')) {
s++;
sfx |= TOK_LL;
//TOK_L and TOK_LL are mutually exclusive
if (sfx & TOK_L)
goto invalid;
} else {
sfx |= TOK_L;
}
}
else if (c=='U')
sfx |= TOK_U;
else if (c=='F')
sfx |= TOK_F;
else if (c=='I')
sfx |= TOK_I;
else
goto invalid;
if (sfx == sfx_prev)
goto invalid; //suffix piece was repeated
}
//make sure the suffix is appropriate for this number type
if (type==TOK_INTEGER && (sfx & TOK_F)) {
tok_msg_error(suffix_float_only, orig_s,
"Suffix only valid for floating point numbers");
sfx = TOK_NOSUFFIX;
}
if (type==TOK_FLOATING && (sfx & (TOK_U | TOK_LL))) {
tok_msg_error(suffix_integer_only, orig_s,
"Suffix only valid for integers");
sfx = TOK_NOSUFFIX;
}
return sfx;
invalid:
if (type==TOK_INTEGER)
tok_msg_error(integer_suffix_invalid, orig_s,
"Integer suffix invalid");
else
tok_msg_error(floating_suffix_invalid, orig_s,
"Floating point suffix invalid");
return TOK_NOSUFFIX;
}
static void read_integer(struct tok_integer *out, const struct scan_number *sn,
tok_message_queue *mq) {
/*
Assertions about an integer's struct scan_number:
prefix is empty or [0 0B 0b 0X 0x]
sn->digits is not empty (i.e. sn->digits < sn->exponent)
*unless* the prefix is "0"
has no exponent
suffix is [0-9 A-Z a-z '.']*
dots_found == 0
*/
readui_base base = READUI_DEC;
const char *tokstart = sn->prefix;
const char *s = sn->digits, *e = sn->exponent;
if (sn->prefix+1 < sn->digits) {
if (sn->prefix[1]=='X' || sn->prefix[1]=='x')
base = READUI_HEX;
else
base = READUI_OCT;
} else if (sn->prefix < sn->digits) {
base = READUI_OCT;
}
if (s>=e && base==READUI_OCT) {
//octal contains no digits
out->v = 0;
out->base = 8;
goto suffix;
}
out->v = readui(&s, sn->exponent, base);
out->base = base & 0xFF;
if (s != e || errno == EINVAL) {
tok_msg_error(integer_invalid_digits, tokstart,
"Integer constant contains invalid digits");
} else if (errno) {
if (errno == ERANGE) {
tok_msg_error(integer_out_of_range, tokstart,
"Integer constant out of range");
} else {
tok_msg_bug(readui_unknown, tokstart,
"Unknown error returned by readui");
}
}
suffix:
out->suffix =
read_number_suffix(sn->suffix, sn->end, TOK_INTEGER, mq);
return;
}
static void read_floating(struct tok_floating *out, const struct scan_number *sn,
tok_message_queue *mq) {
/*
Assertions about a float's struct scan_number:
prefix is empty or [0B 0b 0X 0x] (note: no octal prefix 0)
sn->digits not empty, ever
exponent may or may not exist
If exponent exists, it is valid and formatted as:
( [E P e p] ['+' '-']*0..1 [0-9]* )
An exponent starts with E if this is decimal, P if it is hex/binary
suffix is [0-9 A-Z a-z '.']*
dots_found can be anything
*/
const char *tokstart = sn->prefix;
const char *s = sn->prefix, *e = sn->suffix;
char borrow = *sn->end;
//long double strtold(const char *nptr, char **endptr);
out->v = 0.0;
out->suffix = TOK_NOSUFFIX;
if (sn->prefix < sn->digits) {
if (sn->prefix[1]=='B' || sn->prefix[1]=='b') {
tok_msg_error(binary_float, tokstart,
"Binary floating point constants not allowed");
return;
}
if (sn->exponent >= sn->suffix) {
tok_msg_error(hex_float_no_exponent, tokstart,
"Hex floating point constant missing exponent");
return;
}
}
/* Stick a null terminator at the end of the input so strtold
* won't read beyond the given input.
*
* This is thread-safe because the input is from
* token_list.txt, which was generated in the
* tokenize function which is still running.
*/
*(char*)sn->end = 0;
errno = 0;
out->v = strtold(s, (char**)&s);
//don't forget to set it back
*(char*)sn->end = borrow;
if (errno) {
//for some reason, strtold may errno to EDOM to indicate underrun
//open test/run.c and search "floating_out_of_range" for more details
if (errno == ERANGE || errno == EDOM) {
tok_msg_error(floating_out_of_range, tokstart,
"Floating point constant out of range");
} else {
tok_msg_bug(strtold_unknown, tokstart,
"Unknown error returned by strtold");
}
}
if (s != e) {
tok_msg_error(floating_invalid_digits, tokstart,
"Floating point constant contains invalid digits");
}
out->suffix =
read_number_suffix(sn->suffix, sn->end, TOK_FLOATING, mq);
}
char *read_cnumber(struct token *tok, const char *s, const char *e, tok_message_queue *mq) {
struct scan_number sn;
tok->type = scan_number(&sn, s, e);
if (tok->type == TOK_INTEGER)
read_integer(&tok->integer, &sn, mq);
else
read_floating(&tok->floating, &sn, mq);
return (char*)sn.end;
}
#undef MESSAGE_PATH
#include "ccan_tokenizer.h"
static char *strdup_rng(const char *s, const char *e) {
char *ret = malloc(e-s+1);
memcpy(ret, s, e-s);
ret[e-s] = 0;
return ret;
}
#define MESSAGE_PATH "tokenize/read_cstring/"
//Reads a C string starting at s until quoteChar is found or e is reached
// Returns the pointer to the terminating quote character or e if none was found
char *read_cstring(array_char *out, const char *s, const char *e, char quoteChar, tok_message_queue *mq) {
const char * const tokstart = s;
const char *p;
int has_endquote=0, has_newlines=0;
//tok_msg_debug(called, s, "Called read_cstring on `%s`", s);
#define append(startptr,endptr) array_append_items(*out, startptr, (endptr)-(startptr))
#define append_char(theChar) array_append(*out, theChar)
#define append_zero() do {array_append(*out, 0); out->size--;} while(0)
p = s;
while (p<e) {
char c = *p++;
if (c == '\\') {
append(s, p-1);
s = p;
if (p >= e) {
append_char('\\');
tok_msg_error(ended_in_backslash, p-1,
"read_cstring input ended in backslash");
break;
}
c = *p++;
if (c>='0' && c<='9') {
unsigned int octal = c-'0';
size_t digit_count = 0;
while (p<e && *p>='0' && *p<='9') {
octal <<= 3;
octal += (*p++) - '0';
if (++digit_count >= 2)
break;
}
if (p<e && *p>='0' && *p<='9') {
tok_msg_info(ambiguous_octal, s-2,
"Octal followed by digit");
}
if (octal > 0xFF) {
tok_msg_warn(octal_overflow, s-2,
"Octal out of range");
}
c = octal;
} else {
switch (c) {
case 'x': {
size_t digit_count = 0;
size_t zero_count = 0;
unsigned int hex = 0;
while (p<e && *p=='0') p++, zero_count++;
for (;p<e;digit_count++) {
c = *p++;
if (c>='0' && c<='9')
c -= '0';
else if (c>='A' && c<='F')
c -= 'A'-10;
else if (c>='a' && c<='f')
c -= 'a'-10;
else {
p--;
break;
}
hex <<= 4;
hex += c;
}
if (zero_count+digit_count > 2) {
char *hex_string = strdup_rng(s-2, p);
tok_msg_warn(ambiguous_hex, s-2,
"Hex escape '%s' is ambiguous", hex_string);
if (digit_count > 2)
tok_msg_warn(hex_overflow, s-2,
"Hex escape '%s' out of range", hex_string);
free(hex_string);
}
c = hex & 0xFF;
} break;
case 'a':
c=0x7;
break;
case 'b':
c=0x8;
break;
case 'e':
c=0x1B;
break;
case 'f':
c=0xC;
break;
case 'n':
c=0xA;
break;
case 'r':
c=0xD;
break;
case 't':
c=0x9;
break;
case 'v':
c=0xB;
break;
case '\\':
break;
default:
if (c == quoteChar)
break;
if (c=='\'' && quoteChar=='"') {
/* tok_msg_info(escaped_single_quote, s-2,
"Single quote characters need not be escaped within double quotes"); */
break;
}
if (c=='"' && quoteChar=='\'') {
/* tok_msg_info(escaped_double_quote, s-2,
"Double quote characters need not be escaped within single quotes"); */
break;
}
if (c=='?') // \? is needed in some situations to avoid building a trigraph
break;
tok_msg_warn(unknown_escape, s-2,
"Unknown escape sequence '\\%c'", c);
break;
}
}
s = p;
append_char(c);
} else if (c == quoteChar) {
p--;
has_endquote = 1;
break;
} else if (creturn(c)) {
has_newlines = 1;
}
}
append(s, p);
append_zero();
if (!has_endquote) {
tok_msg_error(missing_endquote, tokstart,
"Missing endquote on %s literal",
quoteChar=='\'' ? "character" : "string");
} else if (has_newlines) {
tok_msg_warn(quote_newlines, tokstart,
"%s literal contains newline character(s)",
quoteChar=='\'' ? "Character" : "String");
}
return (char*)p;
#undef append
#undef append_char
#undef append_zero
}
#undef MESSAGE_PATH
#!/bin/sh
sed 's/^D: /{.level=TM_DEBUG, .path="/' |
sed 's/^I: /{.level=TM_INFO, .path="/' |
sed 's/^W: /{.level=TM_WARN, .path="/' |
sed 's/^BUG: /{.level=TM_BUG, .path="/' |
sed 's/:.*/\"},/'
#include "ccan_tokenizer/read_cnumber.c"
#include "ccan_tokenizer/read_cstring.c"
#include "ccan_tokenizer/dict.c"
#include "ccan_tokenizer/ccan_tokenizer.c"
#include "ccan_tokenizer/queue.c"
#include "ccan_tokenizer/charflag.c"
#include "tap/tap.h"
#define item(num) (toks->first[num])
//sed 's/toks->array\.item\[\([^]]*\)\]/item(\1)/g'
tok_message_queue *MQ = NULL;
static const char *onechar_tokens = "!~#%^&*()=-+{}[]|;:,.<>/?";
static const char *twochar_tokens = "!=##%=^=&=&&*=-=--->+=++==|=||<=<<>=>>/=";
static const char *threechar_tokens = "<<=>>=...";
static const char *char_token = "'x'";
static const char *string_token = "\"string\"";
static const char *ident_tokens = "doh abc f_o _ba b$f";
static char *backslashify(const char *string)
{
unsigned int i;
char *ret = talloc_size(NULL, strlen(string)*3 + 1);
for (i = 0; i < strlen(string); i++) {
ret[i*3] = string[i];
ret[i*3+1] = '\\';
ret[i*3+2] = '\n';
}
ret[i*3] = '\0';
return ret;
}
static char *spacify(const char *string, unsigned int num)
{
unsigned int i;
char *ret = talloc_size(NULL, strlen(string)*2 + 1);
memset(ret, ' ', strlen(string)*2);
for (i = 0; i < strlen(string); i += num)
memcpy(&ret[i + i/num], string+i, num);
ret[i + i/num] = '\0';
return ret;
}
static struct token_list *test_tokens(const char *orig, unsigned int size)
{
struct token_list *toks;
char *string = talloc_strdup(NULL, orig);
unsigned int i;
toks = tokenize(string, strlen(string), MQ);
ok1(token_list_sanity_check(toks, stdout));
ok1(token_list_count(toks) == strlen(string)/size + 1);
ok1(item(0).type == TOK_STARTLINE);
for (i = 0; i < strlen(string)/size; i++) {
ok1(item(i+1).type == TOK_OPERATOR);
ok1(item(i+1).txt_size == size);
ok1(strncmp(item(i+1).txt, string + i*size, size) == 0);
ok1(item(i+1).orig_size == size);
ok1(item(i+1).orig == string + i*size);
}
return toks;
}
static struct token_list *test_tokens_spaced(const char *orig,
unsigned int size)
{
struct token_list *toks;
char *string = spacify(orig, size);
unsigned int i;
toks = tokenize(string, strlen(string), MQ);
ok1(token_list_sanity_check(toks, stdout));
ok1(token_list_count(toks) == strlen(orig)/size*2 + 1);
ok1(item(0).type == TOK_STARTLINE);
for (i = 0; i < strlen(orig)/size; i++) {
ok1(item(i*2+1).type == TOK_OPERATOR);
ok1(item(i*2+1).txt_size == size);
ok1(!strncmp(item(i*2+1).txt, string + i*(size+1), size));
ok1(item(i*2+1).orig_size == size);
ok1(item(i*2+1).orig == string + i*(size+1));
ok1(item(i*2+2).type == TOK_WHITE);
ok1(item(i*2+2).txt_size == 1);
ok1(item(i*2+2).txt[0] == ' ');
ok1(item(i*2+2).orig_size == 1);
ok1(item(i*2+2).orig == string + i*(size+1) + size);
}
return toks;
}
static struct token_list *test_tokens_backslashed(const char *orig,
unsigned int size)
{
struct token_list *toks;
const char *string = backslashify(orig);
unsigned int i;
toks = tokenize(string, strlen(string), MQ);
ok1(token_list_sanity_check(toks, stdout));
ok1(token_list_count(toks) == strlen(orig)/size + 1);
ok1(item(0).type == TOK_STARTLINE);
for (i = 0; i < strlen(orig)/size; i++) {
ok1(item(i+1).type == TOK_OPERATOR);
ok1(item(i+1).txt_size == size);
ok1(strncmp(item(i+1).txt, orig + i*size, size) == 0);
ok1(item(i+1).orig_size == size*3);
ok1(item(i+1).orig == string + i*size*3);
}
return toks;
}
static void onechar_checks(const struct token_list *toks, int mul)
{
unsigned int i;
for (i = 0; i < strlen(onechar_tokens); i++)
ok1(item(i*mul+1).opkw == onechar_tokens[i]);
}
static void twochar_checks(const struct token_list *toks, int mul)
{
ok1(item(1).opkw == NE_OP);
ok1(item(1*mul+1).opkw == DOUBLE_POUND);
ok1(item(2*mul+1).opkw == MOD_ASSIGN);
ok1(item(3*mul+1).opkw == XOR_ASSIGN);
ok1(item(4*mul+1).opkw == AND_ASSIGN);
ok1(item(5*mul+1).opkw == AND_OP);
ok1(item(6*mul+1).opkw == MUL_ASSIGN);
ok1(item(7*mul+1).opkw == SUB_ASSIGN);
ok1(item(8*mul+1).opkw == DEC_OP);
ok1(item(9*mul+1).opkw == PTR_OP);
ok1(item(10*mul+1).opkw == ADD_ASSIGN);
ok1(item(11*mul+1).opkw == INC_OP);
ok1(item(12*mul+1).opkw == EQ_OP);
ok1(item(13*mul+1).opkw == OR_ASSIGN);
ok1(item(14*mul+1).opkw == OR_OP);
ok1(item(15*mul+1).opkw == LE_OP);
ok1(item(16*mul+1).opkw == LEFT_OP);
ok1(item(17*mul+1).opkw == GE_OP);
ok1(item(18*mul+1).opkw == RIGHT_OP);
ok1(item(19*mul+1).opkw == DIV_ASSIGN);
}
static void threechar_checks(const struct token_list *toks, int mul)
{
ok1(item(1).opkw == LEFT_ASSIGN);
ok1(item(1*mul+1).opkw == RIGHT_ASSIGN);
ok1(item(2*mul+1).opkw == ELLIPSIS);
}
int main(void)
{
unsigned int i;
struct token_list *toks;
char *str;
char *backslashed_idents;
plan_tests(1243);
toks = test_tokens(onechar_tokens, 1);
onechar_checks(toks, 1);
talloc_free((char*)toks->orig);
toks = test_tokens(twochar_tokens, 2);
twochar_checks(toks, 1);
talloc_free((char*)toks->orig);
toks = test_tokens(threechar_tokens, 3);
threechar_checks(toks, 1);
talloc_free((char*)toks->orig);
/* char literal */
str = talloc_strdup(NULL, char_token);
toks = tokenize(str, strlen(str), MQ);
ok1(token_list_sanity_check(toks, stdout));
ok1(token_list_count(toks) == 2);
ok1(item(0).type == TOK_STARTLINE);
ok1(item(1).type == TOK_CHAR);
ok1(item(1).txt_size == strlen(str));
ok1(strncmp(item(1).txt, str, strlen(str)) == 0);
ok1(item(1).orig_size == strlen(str));
ok1(item(1).orig == str);
/* FIXME: test contents of string. */
talloc_free(str);
/* string literal */
str = talloc_strdup(NULL, string_token);
toks = tokenize(str, strlen(str), MQ);
ok1(token_list_sanity_check(toks, stdout));
ok1(token_list_count(toks) == 2);
ok1(item(0).type == TOK_STARTLINE);
ok1(item(1).type == TOK_STRING);
ok1(item(1).txt_size == strlen(str));
ok1(strncmp(item(1).txt, str, strlen(str)) == 0);
ok1(item(1).orig_size == strlen(str));
ok1(item(1).orig == str);
/* FIXME: test contents of string. */
talloc_free(str);
/* Identifiers */
str = talloc_strdup(NULL, ident_tokens);
toks = tokenize(str, strlen(str), MQ);
ok1(token_list_sanity_check(toks, stdout));
token_list_dump(toks, stdout);
ok1(token_list_count(toks) == 10);
ok1(item(0).type == TOK_STARTLINE);
for (i = 0; i < 5; i++) {
ok1(item(i*2+1).type == TOK_IDENTIFIER);
ok1(item(i*2+1).txt_size == 3);
ok1(strncmp(item(i*2+1).txt, str + i*4, 3) == 0);
ok1(item(i*2+1).orig_size == 3);
ok1(item(i*2+1).orig == str + i*4);
if (i == 4)
continue;
ok1(item(i*2+2).type == TOK_WHITE);
ok1(item(i*2+2).txt_size == 1);
ok1(item(i*2+2).txt[0] == ' ');
ok1(item(i*2+2).orig_size == 1);
ok1(item(i*2+2).orig == str + i*4 + 3);
}
talloc_free(str);
toks = test_tokens_spaced(onechar_tokens, 1);
onechar_checks(toks, 2);
talloc_free((char*)toks->orig);
toks = test_tokens_spaced(twochar_tokens, 2);
twochar_checks(toks, 2);
talloc_free((char*)toks->orig);
toks = test_tokens_spaced(threechar_tokens, 3);
threechar_checks(toks, 2);
talloc_free((char*)toks->orig);
toks = test_tokens_backslashed(onechar_tokens, 1);
onechar_checks(toks, 1);
talloc_free((char*)toks->orig);
toks = test_tokens_backslashed(twochar_tokens, 2);
twochar_checks(toks, 1);
talloc_free((char*)toks->orig);
toks = test_tokens_backslashed(threechar_tokens, 3);
threechar_checks(toks, 1);
talloc_free((char*)toks->orig);
/* Identifiers */
backslashed_idents = backslashify(ident_tokens);
toks = tokenize(backslashed_idents, strlen(backslashed_idents), MQ);
ok1(token_list_sanity_check(toks, stdout));
ok1(token_list_count(toks) == 10);
ok1(item(0).type == TOK_STARTLINE);
for (i = 0; i < 5; i++) {
ok1(item(i*2+1).type == TOK_IDENTIFIER);
ok1(item(i*2+1).txt_size == 3);
ok1(strncmp(item(i*2+1).txt, ident_tokens + i*4, 3) == 0);
ok1(item(i*2+1).orig_size == 9);
ok1(item(i*2+1).orig == backslashed_idents + i*12);
if (i == 4)
continue;
ok1(item(i*2+2).type == TOK_WHITE);
ok1(item(i*2+2).txt_size == 1);
ok1(item(i*2+2).txt[0] == ' ');
ok1(item(i*2+2).orig_size == 3);
ok1(item(i*2+2).orig == backslashed_idents + i*12 + 9);
}
talloc_free(backslashed_idents);
return exit_status();
}
This diff is collapsed.
Write test for empty_char_constant
defined cannot be used as a macro name
<strike>Add "defined" and only accept it in appropriate circumstances</strike>
Update that simple tokenizer compulsory test so things will compile
Handle cases like escaped question marks and pound symbols that I don't understand yet.
(done) Fix #include <stdio.h> to read include directive correctly
txt/orig state of affairs:
The problem is that there are two ways to interpret line,col:
With respect to txt
With respect to orig
This isn't a problem when txt and orig point to the same character, as in:
int in\
dex
int \
index /*Here, the backslash break should be gobbled up by the space identifier*/
line,col has no ambiguity as to where it should point. However, when they point to different characters (i.e. at the beginning of a line):
\
int index
line,col could either point to orig or to the first real character. Thus, we will do the latter.
Moreover, will a newline followed by backslash breaks generate a token that gobbles up said breaks? I believe it will, but no need to call this mandatory.
Thus, on a lookup with a txt pointer, the line/col/orig should match the real character and not preceding backslash breaks.
I've been assuming that every token starts with its first character, neglecting the case where a line starts with backslash breaks. The question is, given the txt pointer to the first character, where should the derived orig land?
Currently, the orig lands after the beginning backslash breaks, when instead it should probably land before them.
Here's what the tokenizer's text anchoring needs:
Broken/unbroken text pointer -> line/col
Unbroken contents per token to identify identifier text
Original contents per token to rebuild the document
Ability to change "original contents" so the document will be saved with modifications
Ability to insert new tokens
Solution:
New tokens will typically have identical txt and orig, yea even the same pointer.
txt/txt_size for unbroken contents, orig/orig_size for original
modify orig to change the document
txt identifies identifier text
Line lookup tables are used to resolve txt/orig pointers; other pointers can't be resolved in the same fashion and may require traversing backward through the list.
What this means:
Token txt/txt_size, orig/orig_size, orig_lines, txt_lines, and tok_point_lookup are all still correct.
Token line,col will be removed
Other improvements to do:
Sanity check the point lookups like crazy
Remove the array() structures in token_list, as these are supposed to be read-only
Make sure tok_point_lookup returns correct values for every single pointer possible, particularly those in orig that are on backslash-breaks
Convert the tok_message_queue into an array of messages bound to tokens.
Ask Rusty about the trailing newline in this case:
/* Blah
*
* blah
*/
Here, rather than the trailing space being blank, it is "blank" from the comment perspective.
May require deeper analysis.
Todos from ccan_tokenizer.h
/*
Assumption: Every token fits in one and exactly one line
Counterexamples:
Backslash-broken lines
Multiline comments
Checks to implement in the tokenizer:
is the $ character used in an identifier (some configurations of GCC allow this)
are there potentially ambiguous sequences used in a string literal (e.g. "\0000")
Are there stray characters? (e.g. '\0', '@', '\b')
Are there trailing spaces at the end of lines (unless said spaces consume the entire line)?
Are there trailing spaces after a backslash-broken line?
Fixes todo:
backslash-newline sequence should register as an empty character, and the tokenizer's line value should be incremented accordingly.
*/
Lex angle bracket strings in #include
Check the rules in the documentation
Examine the message queue as part of testing the tokenizer:
Make sure there are no bug messages
Make sure files compile with no warnings
For the tokenizer sanity check, make sure integers and floats have valid suffixes respectively
(e.g. no TOK_F for an integer, no TOK_ULL for a floating)
Update the scan_number sanity checks
(done) Move scan_number et al. to a separate C file
Test:
Overflow and underflow floats
0x.p0
(done) 0755f //octal 0755 with invalid suffix
(done) 0755e1 //floating 7550
Figure out how keywords will be handled.
Preprocessor directives are <strike>case-insensitive</strike> actually case-sensitive (except __VA_ARGS__)
All C keywords are case sensitive
__VA_ARGS__ should be read as an identifier unless it's in the expansion of a macro. Otherwise, GCC generates a warning.
We are in the expansion of a macro after <startline> <space> # <space>
Don't forget about __attribute__
Except for __VA_ARGS__, all preprocessor keywords are proceeded by <startline> <space> # <space>
Solution:
All the words themselves will go into one opkw dictionary, and for both type and opkw, no distinction will be made between preprocessor and normal keywords.
Instead, int type will become short type; unsigned short cpp:1;
Merge
Commit ccan_tokenizer to the ccan repo
Introduce ccan_tokenizer to ccanlint
Write testcases for scanning all available operators
Support integer and floating point suffices (e.g. 500UL, 0.5f)
Examine the message queue after tokenizing
Make sure single-character operators have an opkw < 128
Make sure c_dictionary has no duplicate entries
Write verifiers for other types than TOK_WHITE
What's been done:
Operator table has been organized
Merged Rusty's changes
Fixed if -> while in finalize
Fixed a couple mistakes in run-simple-token.c testcases themselves
Expected orig/orig_size sizes weren't right
Made token_list_sanity_check a public function and used it throughout run-simple-token.c
Tests succeed and pass valgrind
Lines/columns of every token are recorded
(done) Fix "0\nstatic"
(done) Write tests to make sure backslash-broken lines have correct token locations.
(done) Correctly handle backslash-broken lines
One plan: Separate the scanning code from the reading code. Scanning sends valid ranges to reading, and reading fills valid tokens for the tokenizer/scanner to properly add
Another plan: Un-break backslash-broken lines into another copy of the input. Create an array of the positions of each real line break so
Annotate message queue messages with current token
Conversion to make:
From:
Position in unbroken text
To:
Real line number
Real offset from start of line
Thus, we want an array of real line start locations wrt the unbroken text
Here is a bro\
ken line. Here is a
real line.
<LINE>Here is a bro<LINE>ken line. Here is a
<LINE>real line.
If we know the position of the token text wrt the unbroken text, we can look up the real line number and offset using only the array of real line start positions within the unbroken text.
Because all we need is the orig and orig_size with respect to the unbroken text to orient
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -16,6 +16,9 @@ tools/ccanlint/generated-init-tests: $(TEST_CFILES)
$(TEST_OBJS): tools/ccanlint/generated-init-tests
# Otherwise, ccanlint.c et al. may fail to build
$(CORE_OBJS): tools/ccanlint/generated-init-tests
tools/ccanlint/ccanlint: $(OBJS)
ccanlint-clean:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment