Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cpython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
cpython
Commits
3f993c3b
Commit
3f993c3b
authored
Sep 21, 2007
by
Neil Schemenauer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Use an enum for decoding_state. It makes the code a little more
understandable.
parent
16c70751
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
12 deletions
+18
-12
Parser/tokenizer.c
Parser/tokenizer.c
+11
-11
Parser/tokenizer.h
Parser/tokenizer.h
+7
-1
No files found.
Parser/tokenizer.c
View file @
3f993c3b
...
@@ -139,7 +139,7 @@ tok_new(void)
...
@@ -139,7 +139,7 @@ tok_new(void)
tok
->
alterror
=
1
;
tok
->
alterror
=
1
;
tok
->
alttabsize
=
1
;
tok
->
alttabsize
=
1
;
tok
->
altindstack
[
0
]
=
0
;
tok
->
altindstack
[
0
]
=
0
;
tok
->
decoding_state
=
0
;
tok
->
decoding_state
=
STATE_INIT
;
tok
->
decoding_erred
=
0
;
tok
->
decoding_erred
=
0
;
tok
->
read_coding_spec
=
0
;
tok
->
read_coding_spec
=
0
;
tok
->
encoding
=
NULL
;
tok
->
encoding
=
NULL
;
...
@@ -280,7 +280,7 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
...
@@ -280,7 +280,7 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
if
(
cs
!=
NULL
)
{
if
(
cs
!=
NULL
)
{
tok
->
read_coding_spec
=
1
;
tok
->
read_coding_spec
=
1
;
if
(
tok
->
encoding
==
NULL
)
{
if
(
tok
->
encoding
==
NULL
)
{
assert
(
tok
->
decoding_state
==
1
);
/* raw */
assert
(
tok
->
decoding_state
==
STATE_RAW
);
if
(
strcmp
(
cs
,
"utf-8"
)
==
0
||
if
(
strcmp
(
cs
,
"utf-8"
)
==
0
||
strcmp
(
cs
,
"iso-8859-1"
)
==
0
)
{
strcmp
(
cs
,
"iso-8859-1"
)
==
0
)
{
tok
->
encoding
=
cs
;
tok
->
encoding
=
cs
;
...
@@ -288,7 +288,7 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
...
@@ -288,7 +288,7 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
r
=
set_readline
(
tok
,
cs
);
r
=
set_readline
(
tok
,
cs
);
if
(
r
)
{
if
(
r
)
{
tok
->
encoding
=
cs
;
tok
->
encoding
=
cs
;
tok
->
decoding_state
=
-
1
;
tok
->
decoding_state
=
STATE_NORMAL
;
}
}
else
else
PyMem_FREE
(
cs
);
PyMem_FREE
(
cs
);
...
@@ -318,7 +318,7 @@ check_bom(int get_char(struct tok_state *),
...
@@ -318,7 +318,7 @@ check_bom(int get_char(struct tok_state *),
struct
tok_state
*
tok
)
struct
tok_state
*
tok
)
{
{
int
ch
=
get_char
(
tok
);
int
ch
=
get_char
(
tok
);
tok
->
decoding_state
=
1
;
tok
->
decoding_state
=
STATE_RAW
;
if
(
ch
==
EOF
)
{
if
(
ch
==
EOF
)
{
return
1
;
return
1
;
}
else
if
(
ch
==
0xEF
)
{
}
else
if
(
ch
==
0xEF
)
{
...
@@ -330,11 +330,11 @@ check_bom(int get_char(struct tok_state *),
...
@@ -330,11 +330,11 @@ check_bom(int get_char(struct tok_state *),
} else if (ch == 0xFE) {
} else if (ch == 0xFE) {
ch = get_char(tok); if (ch != 0xFF) goto NON_BOM;
ch = get_char(tok); if (ch != 0xFF) goto NON_BOM;
if (!set_readline(tok, "utf-16-be")) return 0;
if (!set_readline(tok, "utf-16-be")) return 0;
tok->decoding_state =
-1
;
tok->decoding_state =
STATE_NORMAL
;
} else if (ch == 0xFF) {
} else if (ch == 0xFF) {
ch = get_char(tok); if (ch != 0xFE) goto NON_BOM;
ch = get_char(tok); if (ch != 0xFE) goto NON_BOM;
if (!set_readline(tok, "utf-16-le")) return 0;
if (!set_readline(tok, "utf-16-le")) return 0;
tok->decoding_state =
-1
;
tok->decoding_state =
STATE_NORMAL
;
#endif
#endif
}
else
{
}
else
{
unget_char
(
ch
,
tok
);
unget_char
(
ch
,
tok
);
...
@@ -494,12 +494,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
...
@@ -494,12 +494,12 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
char
*
line
=
NULL
;
char
*
line
=
NULL
;
int
badchar
=
0
;
int
badchar
=
0
;
for
(;;)
{
for
(;;)
{
if
(
tok
->
decoding_state
<
0
)
{
if
(
tok
->
decoding_state
==
STATE_NORMAL
)
{
/* We already have a codec associated with
/* We already have a codec associated with
this input. */
this input. */
line
=
fp_readl
(
s
,
size
,
tok
);
line
=
fp_readl
(
s
,
size
,
tok
);
break
;
break
;
}
else
if
(
tok
->
decoding_state
>
0
)
{
}
else
if
(
tok
->
decoding_state
==
STATE_RAW
)
{
/* We want a 'raw' read. */
/* We want a 'raw' read. */
line
=
Py_UniversalNewlineFgets
(
s
,
size
,
line
=
Py_UniversalNewlineFgets
(
s
,
size
,
tok
->
fp
,
NULL
);
tok
->
fp
,
NULL
);
...
@@ -510,7 +510,7 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
...
@@ -510,7 +510,7 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
reader functions from now on. */
reader functions from now on. */
if
(
!
check_bom
(
fp_getc
,
fp_ungetc
,
fp_setreadl
,
tok
))
if
(
!
check_bom
(
fp_getc
,
fp_ungetc
,
fp_setreadl
,
tok
))
return
error_ret
(
tok
);
return
error_ret
(
tok
);
assert
(
tok
->
decoding_state
!=
0
);
assert
(
tok
->
decoding_state
!=
STATE_INIT
);
}
}
}
}
if
(
line
!=
NULL
&&
tok
->
lineno
<
2
&&
!
tok
->
read_coding_spec
)
{
if
(
line
!=
NULL
&&
tok
->
lineno
<
2
&&
!
tok
->
read_coding_spec
)
{
...
@@ -550,7 +550,7 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
...
@@ -550,7 +550,7 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
static
int
static
int
decoding_feof
(
struct
tok_state
*
tok
)
decoding_feof
(
struct
tok_state
*
tok
)
{
{
if
(
tok
->
decoding_state
>=
0
)
{
if
(
tok
->
decoding_state
!=
STATE_NORMAL
)
{
return
feof
(
tok
->
fp
);
return
feof
(
tok
->
fp
);
}
else
{
}
else
{
PyObject
*
buf
=
tok
->
decoding_buffer
;
PyObject
*
buf
=
tok
->
decoding_buffer
;
...
@@ -700,7 +700,7 @@ PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2)
...
@@ -700,7 +700,7 @@ PyTokenizer_FromFile(FILE *fp, char* enc, char *ps1, char *ps2)
return
NULL
;
return
NULL
;
}
}
strcpy
(
tok
->
encoding
,
enc
);
strcpy
(
tok
->
encoding
,
enc
);
tok
->
decoding_state
=
-
1
;
tok
->
decoding_state
=
STATE_NORMAL
;
}
}
return
tok
;
return
tok
;
}
}
...
...
Parser/tokenizer.h
View file @
3f993c3b
...
@@ -12,6 +12,12 @@ extern "C" {
...
@@ -12,6 +12,12 @@ extern "C" {
#define MAXINDENT 100
/* Max indentation level */
#define MAXINDENT 100
/* Max indentation level */
enum
decoding_state
{
STATE_INIT
,
STATE_RAW
,
STATE_NORMAL
,
/* have a codec associated with input */
};
/* Tokenizer state */
/* Tokenizer state */
struct
tok_state
{
struct
tok_state
{
/* Input state; buf <= cur <= inp <= end */
/* Input state; buf <= cur <= inp <= end */
...
@@ -40,7 +46,7 @@ struct tok_state {
...
@@ -40,7 +46,7 @@ struct tok_state {
int
alttabsize
;
/* Alternate tab spacing */
int
alttabsize
;
/* Alternate tab spacing */
int
altindstack
[
MAXINDENT
];
/* Stack of alternate indents */
int
altindstack
[
MAXINDENT
];
/* Stack of alternate indents */
/* Stuff for PEP 0263 */
/* Stuff for PEP 0263 */
int
decoding_state
;
/* -1:decoding, 0:init, 1:raw */
enum
decoding_state
decoding_state
;
int
decoding_erred
;
/* whether erred in decoding */
int
decoding_erred
;
/* whether erred in decoding */
int
read_coding_spec
;
/* whether 'coding:...' has been read */
int
read_coding_spec
;
/* whether 'coding:...' has been read */
char
*
encoding
;
char
*
encoding
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment