Commit d671daf7 authored by Robert Griesemer's avatar Robert Griesemer

- allow unicode digits in identifiers

- fixed a bug with character escapes (before: allowed arbitrary long sequences)

R=r
DELTA=63  (33 added, 19 deleted, 11 changed)
OCL=26010
CL=26070
parent 0c4f4587
...@@ -60,25 +60,6 @@ type Scanner struct { ...@@ -60,25 +60,6 @@ type Scanner struct {
} }
func isLetter(ch int) bool {
return
'a' <= ch && ch <= 'z' ||
'A' <= ch && ch <= 'Z' ||
ch == '_' ||
ch >= 0x80 && unicode.IsLetter(ch);
}
func digitVal(ch int) int {
switch {
case '0' <= ch && ch <= '9': return ch - '0';
case 'a' <= ch && ch <= 'f': return ch - 'a' + 10;
case 'A' <= ch && ch <= 'F': return ch - 'A' + 10;
}
return 16; // larger than any legal digit val
}
// Read the next Unicode char into S.ch. // Read the next Unicode char into S.ch.
// S.ch < 0 means end-of-file. // S.ch < 0 means end-of-file.
func (S *Scanner) next() { func (S *Scanner) next() {
...@@ -195,9 +176,25 @@ func (S *Scanner) scanComment() []byte { ...@@ -195,9 +176,25 @@ func (S *Scanner) scanComment() []byte {
} }
func isLetter(ch int) bool {
return
'a' <= ch && ch <= 'z' ||
'A' <= ch && ch <= 'Z' ||
ch == '_' ||
ch >= 0x80 && unicode.IsLetter(ch);
}
func isDigit(ch int) bool {
return
'0' <= ch && ch <= '9' ||
ch >= 0x80 && unicode.IsDecimalDigit(ch);
}
func (S *Scanner) scanIdentifier() (tok int, lit []byte) { func (S *Scanner) scanIdentifier() (tok int, lit []byte) {
pos := S.chpos; pos := S.chpos;
for isLetter(S.ch) || digitVal(S.ch) < 10 { for isLetter(S.ch) || isDigit(S.ch) {
S.next(); S.next();
} }
lit = S.src[pos : S.chpos]; lit = S.src[pos : S.chpos];
...@@ -205,6 +202,16 @@ func (S *Scanner) scanIdentifier() (tok int, lit []byte) { ...@@ -205,6 +202,16 @@ func (S *Scanner) scanIdentifier() (tok int, lit []byte) {
} }
func digitVal(ch int) int {
switch {
case '0' <= ch && ch <= '9': return ch - '0';
case 'a' <= ch && ch <= 'f': return ch - 'a' + 10;
case 'A' <= ch && ch <= 'F': return ch - 'A' + 10;
}
return 16; // larger than any legal digit val
}
func (S *Scanner) scanMantissa(base int) { func (S *Scanner) scanMantissa(base int) {
for digitVal(S.ch) < base { for digitVal(S.ch) < base {
S.next(); S.next();
...@@ -270,12 +277,12 @@ exit: ...@@ -270,12 +277,12 @@ exit:
} }
func (S *Scanner) scanDigits(n int, base int) { func (S *Scanner) scanDigits(base, length int) {
for digitVal(S.ch) < base { for length > 0 && digitVal(S.ch) < base {
S.next(); S.next();
n--; length--;
} }
if n > 0 { if length > 0 {
S.error(S.chpos, "illegal char escape"); S.error(S.chpos, "illegal char escape");
} }
} }
...@@ -289,13 +296,13 @@ func (S *Scanner) scanEscape(quote int) { ...@@ -289,13 +296,13 @@ func (S *Scanner) scanEscape(quote int) {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
// nothing to do // nothing to do
case '0', '1', '2', '3', '4', '5', '6', '7': case '0', '1', '2', '3', '4', '5', '6', '7':
S.scanDigits(3 - 1, 8); // 1 char read already S.scanDigits(8, 3 - 1); // 1 char read already
case 'x': case 'x':
S.scanDigits(2, 16); S.scanDigits(16, 2);
case 'u': case 'u':
S.scanDigits(4, 16); S.scanDigits(16, 4);
case 'U': case 'U':
S.scanDigits(8, 16); S.scanDigits(16, 8);
default: default:
S.error(pos, "illegal char escape"); S.error(pos, "illegal char escape");
} }
......
...@@ -45,6 +45,9 @@ var tokens = [...]elt{ ...@@ -45,6 +45,9 @@ var tokens = [...]elt{
// Identifiers and basic type literals // Identifiers and basic type literals
elt{ 0, token.IDENT, "foobar", literal }, elt{ 0, token.IDENT, "foobar", literal },
elt{ 0, token.IDENT, "a۰۱۸", literal },
elt{ 0, token.IDENT, "foo६४", literal },
elt{ 0, token.IDENT, "bar9876", literal },
elt{ 0, token.INT, "0", literal }, elt{ 0, token.INT, "0", literal },
elt{ 0, token.INT, "01234567", literal }, elt{ 0, token.INT, "01234567", literal },
elt{ 0, token.INT, "0xcafebabe", literal }, elt{ 0, token.INT, "0xcafebabe", literal },
...@@ -56,6 +59,10 @@ var tokens = [...]elt{ ...@@ -56,6 +59,10 @@ var tokens = [...]elt{
elt{ 0, token.FLOAT, "1e-100", literal }, elt{ 0, token.FLOAT, "1e-100", literal },
elt{ 0, token.FLOAT, "2.71828e-1000", literal }, elt{ 0, token.FLOAT, "2.71828e-1000", literal },
elt{ 0, token.CHAR, "'a'", literal }, elt{ 0, token.CHAR, "'a'", literal },
elt{ 0, token.CHAR, "'\\000'", literal },
elt{ 0, token.CHAR, "'\\xFF'", literal },
elt{ 0, token.CHAR, "'\\uff16'", literal },
elt{ 0, token.CHAR, "'\\U0000ff16'", literal },
elt{ 0, token.STRING, "`foobar`", literal }, elt{ 0, token.STRING, "`foobar`", literal },
// Operators and delimitors // Operators and delimitors
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment