Commit cf171a7f authored by Guido van Rossum's avatar Guido van Rossum

Cleanup of tokenizer.c.

parent 053b4f3a
...@@ -1269,30 +1269,24 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) ...@@ -1269,30 +1269,24 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
/* Identifier (most frequent token!) */ /* Identifier (most frequent token!) */
nonascii = 0; nonascii = 0;
if (is_potential_identifier_start(c)) { if (is_potential_identifier_start(c)) {
/* Process r"", u"" and ur"" */ /* Process b"", r"" and br"" */
switch (c) { if (c == 'b' || c == 'B') {
case 'r':
case 'R':
c = tok_nextc(tok); c = tok_nextc(tok);
if (c == '"' || c == '\'') if (c == '"' || c == '\'')
goto letter_quote; goto letter_quote;
break; }
case 'b': if (c == 'r' || c == 'R') {
case 'B':
c = tok_nextc(tok); c = tok_nextc(tok);
if (c == 'r' || c == 'R')
c = tok_nextc(tok);
if (c == '"' || c == '\'') if (c == '"' || c == '\'')
goto letter_quote; goto letter_quote;
break; }
}
while (is_potential_identifier_char(c)) { while (is_potential_identifier_char(c)) {
if (c >= 128) if (c >= 128)
nonascii = 1; nonascii = 1;
c = tok_nextc(tok); c = tok_nextc(tok);
} }
tok_backup(tok, c); tok_backup(tok, c);
if (nonascii && if (nonascii &&
!verify_identifier(tok->start, tok->cur)) { !verify_identifier(tok->start, tok->cur)) {
tok->done = E_IDENTIFIER; tok->done = E_IDENTIFIER;
return ERRORTOKEN; return ERRORTOKEN;
...@@ -1322,7 +1316,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) ...@@ -1322,7 +1316,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
c = tok_nextc(tok); c = tok_nextc(tok);
if (c == '.') { if (c == '.') {
*p_start = tok->start; *p_start = tok->start;
*p_end = tok->cur; *p_end = tok->cur;
return ELLIPSIS; return ELLIPSIS;
} else { } else {
tok_backup(tok, c); tok_backup(tok, c);
...@@ -1436,55 +1430,47 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) ...@@ -1436,55 +1430,47 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
letter_quote: letter_quote:
/* String */ /* String */
if (c == '\'' || c == '"') { if (c == '\'' || c == '"') {
Py_ssize_t quote2 = tok->cur - tok->start + 1; int quote = c;
int quote = c; int quote_size = 1; /* 1 or 3 */
int triple = 0; int end_quote_size = 0;
int tripcount = 0;
for (;;) { /* Find the quote size and start of string */
c = tok_nextc(tok); c = tok_nextc(tok);
if (c == '\n') { if (c == quote) {
if (!triple) { c = tok_nextc(tok);
tok->done = E_EOLS; if (c == quote)
tok_backup(tok, c); quote_size = 3;
return ERRORTOKEN;
}
tripcount = 0;
tok->cont_line = 1; /* multiline string. */
}
else if (c == EOF) {
if (triple)
tok->done = E_EOFS;
else
tok->done = E_EOLS;
tok->cur = tok->inp;
return ERRORTOKEN;
}
else if (c == quote) {
tripcount++;
if (tok->cur - tok->start == quote2) {
c = tok_nextc(tok);
if (c == quote) {
triple = 1;
tripcount = 0;
continue;
}
tok_backup(tok, c);
}
if (!triple || tripcount == 3)
break;
}
else if (c == '\\') {
tripcount = 0;
c = tok_nextc(tok);
if (c == EOF) {
tok->done = E_EOLS;
tok->cur = tok->inp;
return ERRORTOKEN;
}
}
else else
tripcount = 0; end_quote_size = 1; /* empty string found */
} }
if (c != quote)
tok_backup(tok, c);
/* Get rest of string */
while (end_quote_size != quote_size) {
c = tok_nextc(tok);
if (c == EOF) {
if (quote_size == 3)
tok->done = E_EOFS;
else
tok->done = E_EOLS;
tok->cur = tok->inp;
return ERRORTOKEN;
}
if (quote_size == 1 && c == '\n') {
tok->done = E_EOLS;
tok->cur = tok->inp;
return ERRORTOKEN;
}
if (c == quote)
end_quote_size += 1;
else {
end_quote_size = 0;
if (c == '\\')
c = tok_nextc(tok); /* skip escaped char */
}
}
*p_start = tok->start; *p_start = tok->start;
*p_end = tok->cur; *p_end = tok->cur;
return STRING; return STRING;
...@@ -1619,7 +1605,7 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset) ...@@ -1619,7 +1605,7 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
/* Get -*- encoding -*- from a Python file. /* Get -*- encoding -*- from a Python file.
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
the first or second line of the file (in which case the encoding the first or second line of the file (in which case the encoding
should be assumed to be PyUnicode_GetDefaultEncoding()). should be assumed to be PyUnicode_GetDefaultEncoding()).
The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment