Commit 2356c14e authored by Joey Adams's avatar Joey Adams

Make tokenizer throw error on empty char literal, along with some slight...

Make tokenizer throw error on empty char literal, along with some slight cleanups in ccan_tokenizer.h
parent 11d5072e
...@@ -473,12 +473,19 @@ struct token_list *tokenize(const char *orig, size_t orig_size, ...@@ -473,12 +473,19 @@ struct token_list *tokenize(const char *orig, size_t orig_size,
add(.type = type, add(.type = type,
{.include = include}); {.include = include});
} else if (c=='\'' || c=='\"') { //character or string literal } else if (c=='\'' || c=='\"') { //character or string literal
array_char string = array_new(tl); array_char string = array_new(tl);
s = read_cstring(&string, s, e, c, mq); s = read_cstring(&string, s, e, c, mq);
if (s<e) s++; //advance past endquote (if available) if (s<e) s++; //advance past endquote (if available)
add(.type = c=='\'' ? TOK_CHAR : TOK_STRING, add(.type = c=='\'' ? TOK_CHAR : TOK_STRING,
{.string = string}); {.string = string});
if (c=='\'' && string.size==0) {
tok_msg_error(empty_char_constant, orig,
"Empty character constant");
}
} else if (c=='/' && s<e && (*s=='*' || *s=='/')) { //comment } else if (c=='/' && s<e && (*s=='*' || *s=='/')) { //comment
if (*s++ == '*') { /* C-style comment */ if (*s++ == '*') { /* C-style comment */
const char *comment_start = s-2; const char *comment_start = s-2;
......
...@@ -41,8 +41,11 @@ enum token_type { ...@@ -41,8 +41,11 @@ enum token_type {
TOK_INTEGER, //integer (e.g. 5, 1000L, 0x5) TOK_INTEGER, //integer (e.g. 5, 1000L, 0x5)
TOK_FLOATING, //floating point number (e.g. 5.0, 7.0f, etc.) TOK_FLOATING, //floating point number (e.g. 5.0, 7.0f, etc.)
TOK_OPERATOR, //operator (e.g. +, -, (, ), ++, etc.) TOK_OPERATOR, //operator (e.g. +, -, (, ), ++, etc.)
#define token_type_is_identifier(type) ((type)>=TOK_KEYWORD && (type)<=TOK_IDENTIFIER)
TOK_KEYWORD, //keyword (e.g. char, _Bool, ifdef) TOK_KEYWORD, //keyword (e.g. char, _Bool, ifdef)
TOK_IDENTIFIER, //identifier or unprocessed keyword (e.g. int, token, pp_conditions) TOK_IDENTIFIER, //identifier or unprocessed keyword (e.g. int, token, pp_conditions)
TOK_CHAR, //character literal (e.g. 'a' or even '1234') TOK_CHAR, //character literal (e.g. 'a' or even '1234')
TOK_STRING, //string literal (e.g. "hello" or "zero\0inside") TOK_STRING, //string literal (e.g. "hello" or "zero\0inside")
TOK_LEADING_POUND, //leading # in a preprocessor directive (e.g. # include) TOK_LEADING_POUND, //leading # in a preprocessor directive (e.g. # include)
...@@ -54,6 +57,7 @@ enum token_type { ...@@ -54,6 +57,7 @@ enum token_type {
TOK_CCOMMENT, //C comment (e.g. /* comment */) TOK_CCOMMENT, //C comment (e.g. /* comment */)
TOK_CPPCOMMENT, //C++ comment (e.g. //comment ) TOK_CPPCOMMENT, //C++ comment (e.g. //comment )
TOK_WHITE, //whitespace (span of \t\n\v\f\r and space) TOK_WHITE, //whitespace (span of \t\n\v\f\r and space)
TOK_STARTLINE, //beginning of line (txt/txtsize is always empty) TOK_STARTLINE, //beginning of line (txt/txtsize is always empty)
TOK_STRAY, //control characters, weird characters, and extended characters where they shouldn't be TOK_STRAY, //control characters, weird characters, and extended characters where they shouldn't be
}; };
...@@ -192,6 +196,11 @@ struct token { ...@@ -192,6 +196,11 @@ struct token {
size_t line, col; size_t line, col;
}; };
//keywords such as int, long, etc. may be defined over, making them identifiers in a sense
static inline int token_is_identifier(const struct token *tok) {
return token_type_is_identifier(tok->type);
}
static inline int token_is_ignored(const struct token *tok) { static inline int token_is_ignored(const struct token *tok) {
return token_type_is_ignored(tok->type); return token_type_is_ignored(tok->type);
} }
...@@ -204,6 +213,11 @@ static inline int token_is_kw(const struct token *tok, int opkw) { ...@@ -204,6 +213,11 @@ static inline int token_is_kw(const struct token *tok, int opkw) {
return tok->type==TOK_KEYWORD && tok->opkw==opkw; return tok->type==TOK_KEYWORD && tok->opkw==opkw;
} }
static inline int token_txt_is(const struct token *tok, const char *str) {
size_t len = strlen(str);
return tok->txt_size==len && !memcmp(tok->txt, str, len);
}
struct token_list { struct token_list {
struct token *first, *last; struct token *first, *last;
......
Write test for empty_char_constant
defined cannot be used as a macro name
<strike>Add "defined" and only accept it in appropriate circumstances</strike>
Update that simple tokenizer compulsory test so things will compile Update that simple tokenizer compulsory test so things will compile
Handle cases like escaped question marks and pound symbols that I don't understand yet. Handle cases like escaped question marks and pound symbols that I don't understand yet.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment