Commit df833b07 authored by Guido van Rossum's avatar Guido van Rossum

* Parser/tokenizer.c: backup over illegal newline in string

        literal (for "completeness" test)
parent 534bf31a
/*********************************************************** /***********************************************************
Copyright 1991, 1992, 1993 by Stichting Mathematisch Centrum, Copyright 1991, 1992, 1993, 1994 by Stichting Mathematisch Centrum,
Amsterdam, The Netherlands. Amsterdam, The Netherlands.
All Rights Reserved All Rights Reserved
...@@ -24,19 +24,18 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ...@@ -24,19 +24,18 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
/* Tokenizer implementation */ /* Tokenizer implementation */
/* XXX This is rather old, should be restructured perhaps */
/* XXX Need a better interface to report errors than writing to stderr */
/* XXX Should use editor resource to fetch true tab size on Macintosh */
#include "pgenheaders.h" #include "pgenheaders.h"
#include <ctype.h> #include <ctype.h>
#include "string.h"
#include "fgetsintr.h"
#include "tokenizer.h" #include "tokenizer.h"
#include "errcode.h" #include "errcode.h"
extern char *my_readline PROTO((char *));
/* Return malloc'ed string including trailing \n;
empty malloc'ed string for EOF;
NULL if interrupted */
/* Don't ever change this -- it would break the portability of Python code */ /* Don't ever change this -- it would break the portability of Python code */
#define TABSIZE 8 #define TABSIZE 8
...@@ -99,7 +98,7 @@ tok_new() ...@@ -99,7 +98,7 @@ tok_new()
struct tok_state *tok = NEW(struct tok_state, 1); struct tok_state *tok = NEW(struct tok_state, 1);
if (tok == NULL) if (tok == NULL)
return NULL; return NULL;
tok->buf = tok->cur = tok->end = tok->inp = NULL; tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
tok->done = E_OK; tok->done = E_OK;
tok->fp = NULL; tok->fp = NULL;
tok->tabsize = TABSIZE; tok->tabsize = TABSIZE;
...@@ -158,7 +157,6 @@ void ...@@ -158,7 +157,6 @@ void
tok_free(tok) tok_free(tok)
struct tok_state *tok; struct tok_state *tok;
{ {
/* XXX really need a separate flag to say 'my buffer' */
if (tok->fp != NULL && tok->buf != NULL) if (tok->fp != NULL && tok->buf != NULL)
DEL(tok->buf); DEL(tok->buf);
DEL(tok); DEL(tok);
...@@ -180,45 +178,50 @@ tok_nextc(tok) ...@@ -180,45 +178,50 @@ tok_nextc(tok)
tok->done = E_EOF; tok->done = E_EOF;
return EOF; return EOF;
} }
#ifdef USE_READLINE
if (tok->prompt != NULL) { if (tok->prompt != NULL) {
extern char *readline PROTO((char *prompt)); char *new = my_readline(tok->prompt);
static int been_here;
if (!been_here) {
/* Force rebind of TAB to insert-tab */
extern int rl_insert();
rl_bind_key('\t', rl_insert);
been_here++;
}
if (tok->buf != NULL)
free(tok->buf);
tok->buf = readline(tok->prompt);
(void) intrcheck(); /* Clear pending interrupt */
if (tok->nextprompt != NULL) if (tok->nextprompt != NULL)
tok->prompt = tok->nextprompt; tok->prompt = tok->nextprompt;
if (tok->buf == NULL) { if (new == NULL)
tok->done = E_INTR;
else if (*new == '\0') {
free(new);
tok->done = E_EOF; tok->done = E_EOF;
} }
else if (tok->start != NULL) {
int start = tok->start - tok->buf;
int oldlen = tok->cur - tok->buf;
int newlen = oldlen + strlen(new);
char *buf = realloc(tok->buf, newlen+1);
tok->lineno++;
if (buf == NULL) {
free(tok->buf);
free(new);
tok->done = E_NOMEM;
return EOF;
}
tok->buf = buf;
tok->cur = tok->buf + oldlen;
strcpy(tok->buf + oldlen, new);
free(new);
tok->inp = tok->buf + newlen;
tok->end = tok->inp + 1;
tok->start = tok->buf + start;
}
else { else {
tok->end = strchr(tok->buf, '\0'); tok->lineno++;
if (tok->end > tok->buf) if (tok->buf != NULL)
add_history(tok->buf); free(tok->buf);
/* Replace trailing '\n' by '\0' tok->buf = new;
(we don't need a '\0', but the
tokenizer wants a '\n'...) */
*tok->end++ = '\n';
tok->inp = tok->end;
tok->cur = tok->buf; tok->cur = tok->buf;
tok->inp = strchr(tok->buf, '\0');
tok->end = tok->inp + 1;
} }
} }
else else {
#endif int done = 0;
{ int cur = 0;
if (tok->prompt != NULL) { if (tok->start == NULL) {
fprintf(stderr, "%s", tok->prompt);
if (tok->nextprompt != NULL)
tok->prompt = tok->nextprompt;
}
if (tok->buf == NULL) { if (tok->buf == NULL) {
tok->buf = NEW(char, BUFSIZ); tok->buf = NEW(char, BUFSIZ);
if (tok->buf == NULL) { if (tok->buf == NULL) {
...@@ -227,11 +230,26 @@ tok_nextc(tok) ...@@ -227,11 +230,26 @@ tok_nextc(tok)
} }
tok->end = tok->buf + BUFSIZ; tok->end = tok->buf + BUFSIZ;
} }
tok->done = fgets_intr(tok->buf, if (fgets(tok->buf, (int)(tok->end - tok->buf),
(int)(tok->end - tok->buf), tok->fp); tok->fp) == NULL) {
tok->done = E_EOF;
done = 1;
}
else {
tok->done = E_OK;
tok->inp = strchr(tok->buf, '\0'); tok->inp = strchr(tok->buf, '\0');
done = tok->inp[-1] == '\n';
}
}
else {
cur = tok->cur - tok->buf;
tok->done = E_OK;
}
tok->lineno++;
/* Read until '\n' or EOF */ /* Read until '\n' or EOF */
while (tok->inp+1==tok->end && tok->inp[-1]!='\n') { while (!done) {
int curstart = tok->start == NULL ? -1 :
tok->start - tok->buf;
int curvalid = tok->inp - tok->buf; int curvalid = tok->inp - tok->buf;
int cursize = tok->end - tok->buf; int cursize = tok->end - tok->buf;
int newsize = cursize + BUFSIZ; int newsize = cursize + BUFSIZ;
...@@ -245,13 +263,19 @@ tok_nextc(tok) ...@@ -245,13 +263,19 @@ tok_nextc(tok)
tok->buf = newbuf; tok->buf = newbuf;
tok->inp = tok->buf + curvalid; tok->inp = tok->buf + curvalid;
tok->end = tok->buf + newsize; tok->end = tok->buf + newsize;
if (fgets_intr(tok->inp, tok->start = curstart < 0 ? NULL :
tok->buf + curstart;
if (fgets(tok->inp,
(int)(tok->end - tok->inp), (int)(tok->end - tok->inp),
tok->fp) != E_OK) tok->fp) == NULL) {
break; /* Last line does not end in \n,
fake one */
strcpy(tok->inp, "\n");
}
tok->inp = strchr(tok->inp, '\0'); tok->inp = strchr(tok->inp, '\0');
done = tok->inp[-1] == '\n';
} }
tok->cur = tok->buf; tok->cur = tok->buf + cur;
} }
if (tok->done != E_OK) { if (tok->done != E_OK) {
if (tok->prompt != NULL) if (tok->prompt != NULL)
...@@ -360,14 +384,15 @@ tok_get(tok, p_start, p_end) ...@@ -360,14 +384,15 @@ tok_get(tok, p_start, p_end)
register int c; register int c;
int blankline; int blankline;
*p_start = *p_end = NULL;
nextline: nextline:
tok->start = NULL;
blankline = 0; blankline = 0;
/* Get indentation level */ /* Get indentation level */
if (tok->atbol) { if (tok->atbol) {
register int col = 0; register int col = 0;
tok->atbol = 0; tok->atbol = 0;
tok->lineno++;
for (;;) { for (;;) {
c = tok_nextc(tok); c = tok_nextc(tok);
if (c == ' ') if (c == ' ')
...@@ -423,7 +448,7 @@ tok_get(tok, p_start, p_end) ...@@ -423,7 +448,7 @@ tok_get(tok, p_start, p_end)
} }
} }
*p_start = *p_end = tok->cur; tok->start = tok->cur;
/* Return pending indents/dedents */ /* Return pending indents/dedents */
if (tok->pendin != 0) { if (tok->pendin != 0) {
...@@ -438,13 +463,14 @@ tok_get(tok, p_start, p_end) ...@@ -438,13 +463,14 @@ tok_get(tok, p_start, p_end)
} }
again: again:
tok->start = NULL;
/* Skip spaces */ /* Skip spaces */
do { do {
c = tok_nextc(tok); c = tok_nextc(tok);
} while (c == ' ' || c == '\t'); } while (c == ' ' || c == '\t');
/* Set start of current token */ /* Set start of current token */
*p_start = tok->cur - 1; tok->start = tok->cur - 1;
/* Skip comment */ /* Skip comment */
if (c == '#') { if (c == '#') {
...@@ -467,7 +493,6 @@ tok_get(tok, p_start, p_end) ...@@ -467,7 +493,6 @@ tok_get(tok, p_start, p_end)
/* Check for EOF and errors now */ /* Check for EOF and errors now */
if (c == EOF) { if (c == EOF) {
*p_start = *p_end = tok->cur;
return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN; return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
} }
...@@ -477,6 +502,7 @@ tok_get(tok, p_start, p_end) ...@@ -477,6 +502,7 @@ tok_get(tok, p_start, p_end)
c = tok_nextc(tok); c = tok_nextc(tok);
} while (isalnum(c) || c == '_'); } while (isalnum(c) || c == '_');
tok_backup(tok, c); tok_backup(tok, c);
*p_start = tok->start;
*p_end = tok->cur; *p_end = tok->cur;
return NAME; return NAME;
} }
...@@ -486,6 +512,7 @@ tok_get(tok, p_start, p_end) ...@@ -486,6 +512,7 @@ tok_get(tok, p_start, p_end)
tok->atbol = 1; tok->atbol = 1;
if (blankline || tok->level > 0) if (blankline || tok->level > 0)
goto nextline; goto nextline;
*p_start = tok->start;
*p_end = tok->cur - 1; /* Leave '\n' out of the string */ *p_end = tok->cur - 1; /* Leave '\n' out of the string */
return NEWLINE; return NEWLINE;
} }
...@@ -498,6 +525,7 @@ tok_get(tok, p_start, p_end) ...@@ -498,6 +525,7 @@ tok_get(tok, p_start, p_end)
} }
else { else {
tok_backup(tok, c); tok_backup(tok, c);
*p_start = tok->start;
*p_end = tok->cur; *p_end = tok->cur;
return DOT; return DOT;
} }
...@@ -538,9 +566,7 @@ tok_get(tok, p_start, p_end) ...@@ -538,9 +566,7 @@ tok_get(tok, p_start, p_end)
else { else {
/* Accept floating point numbers. /* Accept floating point numbers.
XXX This accepts incomplete things like XXX This accepts incomplete things like
XXX 12e or 1e+; worry run-time. XXX 12e or 1e+; worry run-time */
XXX Doesn't accept numbers
XXX starting with a dot */
if (c == '.') { if (c == '.') {
fraction: fraction:
/* Fraction */ /* Fraction */
...@@ -560,58 +586,58 @@ tok_get(tok, p_start, p_end) ...@@ -560,58 +586,58 @@ tok_get(tok, p_start, p_end)
} }
} }
tok_backup(tok, c); tok_backup(tok, c);
*p_start = tok->start;
*p_end = tok->cur; *p_end = tok->cur;
return NUMBER; return NUMBER;
} }
/* String (single quotes) */ /* String */
if (c == '\'') { if (c == '\'' || c == '"') {
int quote = c;
int triple = 0;
int tripcount = 0;
for (;;) { for (;;) {
c = tok_nextc(tok); c = tok_nextc(tok);
if (c == '\n' || c == EOF) { if (c == '\n') {
if (!triple) {
tok->done = E_TOKEN; tok->done = E_TOKEN;
tok->cur = tok->inp; tok_backup(tok, c);
return ERRORTOKEN; return ERRORTOKEN;
} }
if (c == '\\') { tripcount = 0;
c = tok_nextc(tok); }
*p_end = tok->cur; else if (c == EOF) {
if (c == '\n' || c == EOF) {
tok->done = E_TOKEN; tok->done = E_TOKEN;
tok->cur = tok->inp; tok->cur = tok->inp;
return ERRORTOKEN; return ERRORTOKEN;
} }
else if (c == quote) {
tripcount++;
if (tok->cur == tok->start+2) {
c = tok_nextc(tok);
if (c == quote) {
triple = 1;
tripcount = 0;
continue; continue;
} }
if (c == '\'') tok_backup(tok, c);
break;
}
*p_end = tok->cur;
return STRING;
} }
if (!triple || tripcount == 3)
/* String (double quotes) */ break;
if (c == '\"') {
for (;;) {
c = tok_nextc(tok);
if (c == '\n' || c == EOF) {
tok->done = E_TOKEN;
tok->cur = tok->inp;
return ERRORTOKEN;
} }
if (c == '\\') { else if (c == '\\') {
tripcount = 0;
c = tok_nextc(tok); c = tok_nextc(tok);
*p_end = tok->cur; if (c == EOF) {
if (c == '\n' || c == EOF) {
tok->done = E_TOKEN; tok->done = E_TOKEN;
tok->cur = tok->inp; tok->cur = tok->inp;
return ERRORTOKEN; return ERRORTOKEN;
} }
continue;
} }
if (c == '\"') else
break; tripcount = 0;
} }
*p_start = tok->start;
*p_end = tok->cur; *p_end = tok->cur;
return STRING; return STRING;
} }
...@@ -624,7 +650,6 @@ tok_get(tok, p_start, p_end) ...@@ -624,7 +650,6 @@ tok_get(tok, p_start, p_end)
tok->cur = tok->inp; tok->cur = tok->inp;
return ERRORTOKEN; return ERRORTOKEN;
} }
tok->lineno++;
goto again; /* Read next line */ goto again; /* Read next line */
} }
...@@ -633,13 +658,14 @@ tok_get(tok, p_start, p_end) ...@@ -633,13 +658,14 @@ tok_get(tok, p_start, p_end)
int c2 = tok_nextc(tok); int c2 = tok_nextc(tok);
int token = tok_2char(c, c2); int token = tok_2char(c, c2);
if (token != OP) { if (token != OP) {
*p_start = tok->start;
*p_end = tok->cur; *p_end = tok->cur;
return token; return token;
} }
tok_backup(tok, c2); tok_backup(tok, c2);
} }
/* Keep track of parenteses nesting level */ /* Keep track of parentheses nesting level */
switch (c) { switch (c) {
case '(': case '(':
case '[': case '[':
...@@ -654,6 +680,7 @@ tok_get(tok, p_start, p_end) ...@@ -654,6 +680,7 @@ tok_get(tok, p_start, p_end)
} }
/* Punctuation character */ /* Punctuation character */
*p_start = tok->start;
*p_end = tok->cur; *p_end = tok->cur;
return tok_1char(c); return tok_1char(c);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment