Commit 1f24a719 authored by Pablo Galindo's avatar Pablo Galindo Committed by GitHub

bpo-35808: Retire pgen and use pgen2 to generate the parser (GH-11814)

Pgen is the oldest piece of technology in the CPython repository, building it requires various #if[n]def PGEN hacks in other parts of the code and it also depends more and more on CPython internals. This commit removes the old pgen C code and replaces it for a new version implemented in pure Python. This is a modified and adapted version of lib2to3/pgen2 that can generate grammar files compatibles with the current parser.

This commit also eliminates all the #ifdef and code branches related to pgen, simplifying the code and making it more maintainable. The regen-grammar step now uses $(PYTHON_FOR_REGEN) that can be any version of the interpreter, so the new pgen code maintains compatibility with older versions of the interpreter (this also allows regenerating the grammar with the current CI solution that uses Python3.5). The new pgen Python module also makes use of the Grammar/Tokens file that holds the token specification, so is always kept in sync and avoids having to maintain duplicate token definitions.
parent 7eebbbd5
...@@ -73,8 +73,6 @@ PCbuild/arm32/ ...@@ -73,8 +73,6 @@ PCbuild/arm32/
PCbuild/obj/ PCbuild/obj/
PCbuild/win32/ PCbuild/win32/
.purify .purify
Parser/pgen
Parser/pgen.exe
__pycache__ __pycache__
autom4te.cache autom4te.cache
build/ build/
......
#ifndef Py_METAGRAMMAR_H
#define Py_METAGRAMMAR_H
#ifdef __cplusplus
extern "C" {
#endif
#define MSTART 256
#define RULE 257
#define RHS 258
#define ALT 259
#define ITEM 260
#define ATOM 261
#ifdef __cplusplus
}
#endif
#endif /* !Py_METAGRAMMAR_H */
...@@ -12,10 +12,7 @@ extern "C" { ...@@ -12,10 +12,7 @@ extern "C" {
typedef struct { typedef struct {
int error; int error;
#ifndef PGEN
/* The filename is useless for pgen, see comment in tok_state structure */
PyObject *filename; PyObject *filename;
#endif
int lineno; int lineno;
int offset; int offset;
char *text; /* UTF-8-encoded string */ char *text; /* UTF-8-encoded string */
......
#ifndef Py_PGEN_H
#define Py_PGEN_H
#ifdef __cplusplus
extern "C" {
#endif
/* Parser generator interface */
extern grammar *meta_grammar(void);
struct _node;
extern grammar *pgen(struct _node *);
#ifdef __cplusplus
}
#endif
#endif /* !Py_PGEN_H */
...@@ -290,40 +290,21 @@ LIBFFI_INCLUDEDIR= @LIBFFI_INCLUDEDIR@ ...@@ -290,40 +290,21 @@ LIBFFI_INCLUDEDIR= @LIBFFI_INCLUDEDIR@
########################################################################## ##########################################################################
# Parser # Parser
PGEN= Parser/pgen$(EXE)
POBJS= \ POBJS= \
Parser/acceler.o \ Parser/acceler.o \
Parser/grammar1.o \ Parser/grammar1.o \
Parser/listnode.o \ Parser/listnode.o \
Parser/node.o \ Parser/node.o \
Parser/parser.o \ Parser/parser.o \
Parser/bitset.o \
Parser/metagrammar.o \
Parser/firstsets.o \
Parser/grammar.o \
Parser/token.o \ Parser/token.o \
Parser/pgen.o
PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o PARSER_OBJS= $(POBJS) Parser/myreadline.o Parser/parsetok.o Parser/tokenizer.o
PGOBJS= \
Objects/obmalloc.o \
Python/dynamic_annotations.o \
Python/mysnprintf.o \
Python/pyctype.o \
Parser/tokenizer_pgen.o \
Parser/printgrammar.o \
Parser/parsetok_pgen.o \
Parser/pgenmain.o
PARSER_HEADERS= \ PARSER_HEADERS= \
$(srcdir)/Parser/parser.h \ $(srcdir)/Parser/parser.h \
$(srcdir)/Include/parsetok.h \ $(srcdir)/Include/parsetok.h \
$(srcdir)/Parser/tokenizer.h $(srcdir)/Parser/tokenizer.h
PGENOBJS= $(POBJS) $(PGOBJS)
########################################################################## ##########################################################################
# Python # Python
...@@ -802,31 +783,18 @@ Python/sysmodule.o: $(srcdir)/Python/sysmodule.c Makefile ...@@ -802,31 +783,18 @@ Python/sysmodule.o: $(srcdir)/Python/sysmodule.c Makefile
$(IO_OBJS): $(IO_H) $(IO_OBJS): $(IO_H)
$(PGEN): $(PGENOBJS)
$(CC) $(OPT) $(PY_CORE_LDFLAGS) $(PGENOBJS) $(LIBS) -o $(PGEN)
.PHONY: regen-grammar .PHONY: regen-grammar
regen-grammar: $(PGEN) regen-grammar: regen-token
# Regenerate Include/graminit.h and Python/graminit.c # Regenerate Include/graminit.h and Python/graminit.c
# from Grammar/Grammar using pgen # from Grammar/Grammar using pgen
@$(MKDIR_P) Include @$(MKDIR_P) Include
$(PGEN) $(srcdir)/Grammar/Grammar \ $(PYTHON_FOR_REGEN) -m Parser.pgen $(srcdir)/Grammar/Grammar \
$(srcdir)/Grammar/Tokens \
$(srcdir)/Include/graminit.h.new \ $(srcdir)/Include/graminit.h.new \
$(srcdir)/Python/graminit.c.new $(srcdir)/Python/graminit.c.new
$(UPDATE_FILE) $(srcdir)/Include/graminit.h $(srcdir)/Include/graminit.h.new $(UPDATE_FILE) $(srcdir)/Include/graminit.h $(srcdir)/Include/graminit.h.new
$(UPDATE_FILE) $(srcdir)/Python/graminit.c $(srcdir)/Python/graminit.c.new $(UPDATE_FILE) $(srcdir)/Python/graminit.c $(srcdir)/Python/graminit.c.new
Parser/grammar.o: $(srcdir)/Parser/grammar.c \
$(srcdir)/Include/token.h \
$(srcdir)/Include/grammar.h
Parser/metagrammar.o: $(srcdir)/Parser/metagrammar.c
Parser/tokenizer_pgen.o: $(srcdir)/Parser/tokenizer.c
Parser/parsetok_pgen.o: $(srcdir)/Parser/parsetok.c
Parser/printgrammar.o: $(srcdir)/Parser/printgrammar.c
Parser/pgenmain.o: $(srcdir)/Include/parsetok.h
.PHONY=regen-ast .PHONY=regen-ast
regen-ast: regen-ast:
# Regenerate Include/Python-ast.h using Parser/asdl_c.py -h # Regenerate Include/Python-ast.h using Parser/asdl_c.py -h
...@@ -1016,7 +984,6 @@ PYTHON_HEADERS= \ ...@@ -1016,7 +984,6 @@ PYTHON_HEADERS= \
$(srcdir)/Include/longobject.h \ $(srcdir)/Include/longobject.h \
$(srcdir)/Include/marshal.h \ $(srcdir)/Include/marshal.h \
$(srcdir)/Include/memoryobject.h \ $(srcdir)/Include/memoryobject.h \
$(srcdir)/Include/metagrammar.h \
$(srcdir)/Include/methodobject.h \ $(srcdir)/Include/methodobject.h \
$(srcdir)/Include/modsupport.h \ $(srcdir)/Include/modsupport.h \
$(srcdir)/Include/moduleobject.h \ $(srcdir)/Include/moduleobject.h \
...@@ -1028,7 +995,6 @@ PYTHON_HEADERS= \ ...@@ -1028,7 +995,6 @@ PYTHON_HEADERS= \
$(srcdir)/Include/osdefs.h \ $(srcdir)/Include/osdefs.h \
$(srcdir)/Include/osmodule.h \ $(srcdir)/Include/osmodule.h \
$(srcdir)/Include/patchlevel.h \ $(srcdir)/Include/patchlevel.h \
$(srcdir)/Include/pgen.h \
$(srcdir)/Include/pgenheaders.h \ $(srcdir)/Include/pgenheaders.h \
$(srcdir)/Include/pyarena.h \ $(srcdir)/Include/pyarena.h \
$(srcdir)/Include/pycapsule.h \ $(srcdir)/Include/pycapsule.h \
...@@ -1771,7 +1737,7 @@ profile-removal: ...@@ -1771,7 +1737,7 @@ profile-removal:
rm -f profile-run-stamp rm -f profile-run-stamp
clobber: clean profile-removal clobber: clean profile-removal
-rm -f $(BUILDPYTHON) $(PGEN) $(LIBRARY) $(LDLIBRARY) $(DLLLIBRARY) \ -rm -f $(BUILDPYTHON) $(LIBRARY) $(LDLIBRARY) $(DLLLIBRARY) \
tags TAGS \ tags TAGS \
config.cache config.log pyconfig.h Modules/config.c config.cache config.log pyconfig.h Modules/config.c
-rm -rf build platform -rm -rf build platform
......
Retire pgen and use a modified version of pgen2 to generate the parser.
Patch by Pablo Galindo.
...@@ -92,14 +92,6 @@ wchar_t *Py_DecodeLocale(const char* arg, size_t *size) ...@@ -92,14 +92,6 @@ wchar_t *Py_DecodeLocale(const char* arg, size_t *size)
return w; return w;
} }
/* Parser/pgenmain.c */
grammar *getgrammar(const char *filename)
{
grammar *g;
__coverity_tainted_data_sink__(filename);
return g;
}
/* Python/marshal.c */ /* Python/marshal.c */
static Py_ssize_t r_string(char *s, Py_ssize_t n, RFILE *p) static Py_ssize_t r_string(char *s, Py_ssize_t n, RFILE *p)
......
...@@ -161,7 +161,6 @@ ...@@ -161,7 +161,6 @@
<ClInclude Include="..\Include\longobject.h" /> <ClInclude Include="..\Include\longobject.h" />
<ClInclude Include="..\Include\marshal.h" /> <ClInclude Include="..\Include\marshal.h" />
<ClInclude Include="..\Include\memoryobject.h" /> <ClInclude Include="..\Include\memoryobject.h" />
<ClInclude Include="..\Include\metagrammar.h" />
<ClInclude Include="..\Include\methodobject.h" /> <ClInclude Include="..\Include\methodobject.h" />
<ClInclude Include="..\Include\modsupport.h" /> <ClInclude Include="..\Include\modsupport.h" />
<ClInclude Include="..\Include\moduleobject.h" /> <ClInclude Include="..\Include\moduleobject.h" />
...@@ -175,7 +174,6 @@ ...@@ -175,7 +174,6 @@
<ClInclude Include="..\Include\osmodule.h" /> <ClInclude Include="..\Include\osmodule.h" />
<ClInclude Include="..\Include\parsetok.h" /> <ClInclude Include="..\Include\parsetok.h" />
<ClInclude Include="..\Include\patchlevel.h" /> <ClInclude Include="..\Include\patchlevel.h" />
<ClInclude Include="..\Include\pgen.h" />
<ClInclude Include="..\Include\pgenheaders.h" /> <ClInclude Include="..\Include\pgenheaders.h" />
<ClInclude Include="..\Include\pyhash.h" /> <ClInclude Include="..\Include\pyhash.h" />
<ClInclude Include="..\Include\py_curses.h" /> <ClInclude Include="..\Include\py_curses.h" />
...@@ -372,12 +370,8 @@ ...@@ -372,12 +370,8 @@
<ClCompile Include="..\Objects\unicodeobject.c" /> <ClCompile Include="..\Objects\unicodeobject.c" />
<ClCompile Include="..\Objects\weakrefobject.c" /> <ClCompile Include="..\Objects\weakrefobject.c" />
<ClCompile Include="..\Parser\acceler.c" /> <ClCompile Include="..\Parser\acceler.c" />
<ClCompile Include="..\Parser\bitset.c" />
<ClCompile Include="..\Parser\firstsets.c" />
<ClCompile Include="..\Parser\grammar.c" />
<ClCompile Include="..\Parser\grammar1.c" /> <ClCompile Include="..\Parser\grammar1.c" />
<ClCompile Include="..\Parser\listnode.c" /> <ClCompile Include="..\Parser\listnode.c" />
<ClCompile Include="..\Parser\metagrammar.c" />
<ClCompile Include="..\Parser\myreadline.c" /> <ClCompile Include="..\Parser\myreadline.c" />
<ClCompile Include="..\Parser\node.c" /> <ClCompile Include="..\Parser\node.c" />
<ClCompile Include="..\Parser\parser.c" /> <ClCompile Include="..\Parser\parser.c" />
......
...@@ -234,9 +234,6 @@ ...@@ -234,9 +234,6 @@
<ClInclude Include="..\Include\memoryobject.h"> <ClInclude Include="..\Include\memoryobject.h">
<Filter>Include</Filter> <Filter>Include</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\Include\metagrammar.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\methodobject.h"> <ClInclude Include="..\Include\methodobject.h">
<Filter>Include</Filter> <Filter>Include</Filter>
</ClInclude> </ClInclude>
...@@ -270,9 +267,6 @@ ...@@ -270,9 +267,6 @@
<ClInclude Include="..\Include\patchlevel.h"> <ClInclude Include="..\Include\patchlevel.h">
<Filter>Include</Filter> <Filter>Include</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\Include\pgen.h">
<Filter>Include</Filter>
</ClInclude>
<ClInclude Include="..\Include\pgenheaders.h"> <ClInclude Include="..\Include\pgenheaders.h">
<Filter>Include</Filter> <Filter>Include</Filter>
</ClInclude> </ClInclude>
...@@ -836,24 +830,12 @@ ...@@ -836,24 +830,12 @@
<ClCompile Include="..\Parser\acceler.c"> <ClCompile Include="..\Parser\acceler.c">
<Filter>Parser</Filter> <Filter>Parser</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\Parser\bitset.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\firstsets.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\grammar.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\grammar1.c"> <ClCompile Include="..\Parser\grammar1.c">
<Filter>Parser</Filter> <Filter>Parser</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\Parser\listnode.c"> <ClCompile Include="..\Parser\listnode.c">
<Filter>Parser</Filter> <Filter>Parser</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\Parser\metagrammar.c">
<Filter>Parser</Filter>
</ClCompile>
<ClCompile Include="..\Parser\myreadline.c"> <ClCompile Include="..\Parser\myreadline.c">
<Filter>Parser</Filter> <Filter>Parser</Filter>
</ClCompile> </ClCompile>
......
/* Bitset primitives used by the parser generator */
#include "pgenheaders.h"
#include "bitset.h"
bitset
newbitset(int nbits)
{
int nbytes = NBYTES(nbits);
bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes);
if (ss == NULL)
Py_FatalError("no mem for bitset");
ss += nbytes;
while (--nbytes >= 0)
*--ss = 0;
return ss;
}
void
delbitset(bitset ss)
{
PyObject_FREE(ss);
}
int
addbit(bitset ss, int ibit)
{
int ibyte = BIT2BYTE(ibit);
BYTE mask = BIT2MASK(ibit);
if (ss[ibyte] & mask)
return 0; /* Bit already set */
ss[ibyte] |= mask;
return 1;
}
#if 0 /* Now a macro */
int
testbit(bitset ss, int ibit)
{
return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0;
}
#endif
int
samebitset(bitset ss1, bitset ss2, int nbits)
{
int i;
for (i = NBYTES(nbits); --i >= 0; )
if (*ss1++ != *ss2++)
return 0;
return 1;
}
void
mergebitset(bitset ss1, bitset ss2, int nbits)
{
int i;
for (i = NBYTES(nbits); --i >= 0; )
*ss1++ |= *ss2++;
}
/* Computation of FIRST stets */
#include "pgenheaders.h"
#include "grammar.h"
#include "token.h"
extern int Py_DebugFlag;
/* Forward */
static void calcfirstset(grammar *, dfa *);
void
addfirstsets(grammar *g)
{
int i;
dfa *d;
if (Py_DebugFlag)
printf("Adding FIRST sets ...\n");
for (i = 0; i < g->g_ndfas; i++) {
d = &g->g_dfa[i];
if (d->d_first == NULL)
calcfirstset(g, d);
}
}
static void
calcfirstset(grammar *g, dfa *d)
{
int i, j;
state *s;
arc *a;
int nsyms;
int *sym;
int nbits;
static bitset dummy;
bitset result;
int type;
dfa *d1;
label *l0;
if (Py_DebugFlag)
printf("Calculate FIRST set for '%s'\n", d->d_name);
if (dummy == NULL)
dummy = newbitset(1);
if (d->d_first == dummy) {
fprintf(stderr, "Left-recursion for '%s'\n", d->d_name);
return;
}
if (d->d_first != NULL) {
fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n",
d->d_name);
}
d->d_first = dummy;
l0 = g->g_ll.ll_label;
nbits = g->g_ll.ll_nlabels;
result = newbitset(nbits);
sym = (int *)PyObject_MALLOC(sizeof(int));
if (sym == NULL)
Py_FatalError("no mem for new sym in calcfirstset");
nsyms = 1;
sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL);
s = &d->d_state[d->d_initial];
for (i = 0; i < s->s_narcs; i++) {
a = &s->s_arc[i];
for (j = 0; j < nsyms; j++) {
if (sym[j] == a->a_lbl)
break;
}
if (j >= nsyms) { /* New label */
sym = (int *)PyObject_REALLOC(sym,
sizeof(int) * (nsyms + 1));
if (sym == NULL)
Py_FatalError(
"no mem to resize sym in calcfirstset");
sym[nsyms++] = a->a_lbl;
type = l0[a->a_lbl].lb_type;
if (ISNONTERMINAL(type)) {
d1 = PyGrammar_FindDFA(g, type);
if (d1->d_first == dummy) {
fprintf(stderr,
"Left-recursion below '%s'\n",
d->d_name);
}
else {
if (d1->d_first == NULL)
calcfirstset(g, d1);
mergebitset(result,
d1->d_first, nbits);
}
}
else if (ISTERMINAL(type)) {
addbit(result, a->a_lbl);
}
}
}
d->d_first = result;
if (Py_DebugFlag) {
printf("FIRST set for '%s': {", d->d_name);
for (i = 0; i < nbits; i++) {
if (testbit(result, i))
printf(" %s", PyGrammar_LabelRepr(&l0[i]));
}
printf(" }\n");
}
PyObject_FREE(sym);
}
/* Grammar implementation */
#include "Python.h"
#include "pgenheaders.h"
#include <ctype.h>
#include "token.h"
#include "grammar.h"
extern int Py_DebugFlag;
grammar *
newgrammar(int start)
{
grammar *g;
g = (grammar *)PyObject_MALLOC(sizeof(grammar));
if (g == NULL)
Py_FatalError("no mem for new grammar");
g->g_ndfas = 0;
g->g_dfa = NULL;
g->g_start = start;
g->g_ll.ll_nlabels = 0;
g->g_ll.ll_label = NULL;
g->g_accel = 0;
return g;
}
void
freegrammar(grammar *g)
{
int i;
for (i = 0; i < g->g_ndfas; i++) {
free(g->g_dfa[i].d_name);
for (int j = 0; j < g->g_dfa[i].d_nstates; j++)
PyObject_FREE(g->g_dfa[i].d_state[j].s_arc);
PyObject_FREE(g->g_dfa[i].d_state);
}
PyObject_FREE(g->g_dfa);
for (i = 0; i < g->g_ll.ll_nlabels; i++)
free(g->g_ll.ll_label[i].lb_str);
PyObject_FREE(g->g_ll.ll_label);
PyObject_FREE(g);
}
dfa *
adddfa(grammar *g, int type, const char *name)
{
dfa *d;
g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa,
sizeof(dfa) * (g->g_ndfas + 1));
if (g->g_dfa == NULL)
Py_FatalError("no mem to resize dfa in adddfa");
d = &g->g_dfa[g->g_ndfas++];
d->d_type = type;
d->d_name = strdup(name);
d->d_nstates = 0;
d->d_state = NULL;
d->d_initial = -1;
d->d_first = NULL;
return d; /* Only use while fresh! */
}
int
addstate(dfa *d)
{
state *s;
d->d_state = (state *)PyObject_REALLOC(d->d_state,
sizeof(state) * (d->d_nstates + 1));
if (d->d_state == NULL)
Py_FatalError("no mem to resize state in addstate");
s = &d->d_state[d->d_nstates++];
s->s_narcs = 0;
s->s_arc = NULL;
s->s_lower = 0;
s->s_upper = 0;
s->s_accel = NULL;
s->s_accept = 0;
return Py_SAFE_DOWNCAST(s - d->d_state, intptr_t, int);
}
void
addarc(dfa *d, int from, int to, int lbl)
{
state *s;
arc *a;
assert(0 <= from && from < d->d_nstates);
assert(0 <= to && to < d->d_nstates);
s = &d->d_state[from];
s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1));
if (s->s_arc == NULL)
Py_FatalError("no mem to resize arc list in addarc");
a = &s->s_arc[s->s_narcs++];
a->a_lbl = lbl;
a->a_arrow = to;
}
int
addlabel(labellist *ll, int type, const char *str)
{
int i;
label *lb;
for (i = 0; i < ll->ll_nlabels; i++) {
if (ll->ll_label[i].lb_type == type &&
strcmp(ll->ll_label[i].lb_str, str) == 0)
return i;
}
ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label,
sizeof(label) * (ll->ll_nlabels + 1));
if (ll->ll_label == NULL)
Py_FatalError("no mem to resize labellist in addlabel");
lb = &ll->ll_label[ll->ll_nlabels++];
lb->lb_type = type;
lb->lb_str = strdup(str);
if (Py_DebugFlag)
printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,
PyGrammar_LabelRepr(lb));
return Py_SAFE_DOWNCAST(lb - ll->ll_label, intptr_t, int);
}
/* Same, but rather dies than adds */
int
findlabel(labellist *ll, int type, const char *str)
{
int i;
for (i = 0; i < ll->ll_nlabels; i++) {
if (ll->ll_label[i].lb_type == type /*&&
strcmp(ll->ll_label[i].lb_str, str) == 0*/)
return i;
}
fprintf(stderr, "Label %d/'%s' not found\n", type, str);
Py_FatalError("grammar.c:findlabel()");
/* Py_FatalError() is declared with __attribute__((__noreturn__)).
GCC emits a warning without "return 0;" (compiler bug!), but Clang is
smarter and emits a warning on the return... */
#ifndef __clang__
return 0; /* Make gcc -Wall happy */
#endif
}
/* Forward */
static void translabel(grammar *, label *);
void
translatelabels(grammar *g)
{
int i;
#ifdef Py_DEBUG
printf("Translating labels ...\n");
#endif
/* Don't translate EMPTY */
for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)
translabel(g, &g->g_ll.ll_label[i]);
}
static void
translabel(grammar *g, label *lb)
{
int i;
if (Py_DebugFlag)
printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));
if (lb->lb_type == NAME) {
for (i = 0; i < g->g_ndfas; i++) {
if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
if (Py_DebugFlag)
printf(
"Label %s is non-terminal %d.\n",
lb->lb_str,
g->g_dfa[i].d_type);
lb->lb_type = g->g_dfa[i].d_type;
free(lb->lb_str);
lb->lb_str = NULL;
return;
}
}
for (i = 0; i < (int)N_TOKENS; i++) {
if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {
if (Py_DebugFlag)
printf("Label %s is terminal %d.\n",
lb->lb_str, i);
lb->lb_type = i;
free(lb->lb_str);
lb->lb_str = NULL;
return;
}
}
printf("Can't translate NAME label '%s'\n", lb->lb_str);
return;
}
if (lb->lb_type == STRING) {
if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||
lb->lb_str[1] == '_') {
char *p;
char *src;
char *dest;
size_t name_len;
if (Py_DebugFlag)
printf("Label %s is a keyword\n", lb->lb_str);
lb->lb_type = NAME;
src = lb->lb_str + 1;
p = strchr(src, '\'');
if (p)
name_len = p - src;
else
name_len = strlen(src);
dest = (char *)malloc(name_len + 1);
if (!dest) {
printf("Can't alloc dest '%s'\n", src);
return;
}
strncpy(dest, src, name_len);
dest[name_len] = '\0';
free(lb->lb_str);
lb->lb_str = dest;
}
else if (lb->lb_str[2] == lb->lb_str[0]) {
int type = (int) PyToken_OneChar(lb->lb_str[1]);
if (type != OP) {
lb->lb_type = type;
free(lb->lb_str);
lb->lb_str = NULL;
}
else
printf("Unknown OP label %s\n",
lb->lb_str);
}
else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {
int type = (int) PyToken_TwoChars(lb->lb_str[1],
lb->lb_str[2]);
if (type != OP) {
lb->lb_type = type;
free(lb->lb_str);
lb->lb_str = NULL;
}
else
printf("Unknown OP label %s\n",
lb->lb_str);
}
else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {
int type = (int) PyToken_ThreeChars(lb->lb_str[1],
lb->lb_str[2],
lb->lb_str[3]);
if (type != OP) {
lb->lb_type = type;
free(lb->lb_str);
lb->lb_str = NULL;
}
else
printf("Unknown OP label %s\n",
lb->lb_str);
}
else
printf("Can't translate STRING label %s\n",
lb->lb_str);
}
else
printf("Can't translate label '%s'\n",
PyGrammar_LabelRepr(lb));
}
#include "pgenheaders.h"
#include "metagrammar.h"
#include "grammar.h"
#include "pgen.h"
static arc arcs_0_0[3] = {
{2, 0},
{3, 0},
{4, 1},
};
static arc arcs_0_1[1] = {
{0, 1},
};
static state states_0[2] = {
{3, arcs_0_0},
{1, arcs_0_1},
};
static arc arcs_1_0[1] = {
{5, 1},
};
static arc arcs_1_1[1] = {
{6, 2},
};
static arc arcs_1_2[1] = {
{7, 3},
};
static arc arcs_1_3[1] = {
{3, 4},
};
static arc arcs_1_4[1] = {
{0, 4},
};
static state states_1[5] = {
{1, arcs_1_0},
{1, arcs_1_1},
{1, arcs_1_2},
{1, arcs_1_3},
{1, arcs_1_4},
};
static arc arcs_2_0[1] = {
{8, 1},
};
static arc arcs_2_1[2] = {
{9, 0},
{0, 1},
};
static state states_2[2] = {
{1, arcs_2_0},
{2, arcs_2_1},
};
static arc arcs_3_0[1] = {
{10, 1},
};
static arc arcs_3_1[2] = {
{10, 1},
{0, 1},
};
static state states_3[2] = {
{1, arcs_3_0},
{2, arcs_3_1},
};
static arc arcs_4_0[2] = {
{11, 1},
{13, 2},
};
static arc arcs_4_1[1] = {
{7, 3},
};
static arc arcs_4_2[3] = {
{14, 4},
{15, 4},
{0, 2},
};
static arc arcs_4_3[1] = {
{12, 4},
};
static arc arcs_4_4[1] = {
{0, 4},
};
static state states_4[5] = {
{2, arcs_4_0},
{1, arcs_4_1},
{3, arcs_4_2},
{1, arcs_4_3},
{1, arcs_4_4},
};
static arc arcs_5_0[3] = {
{5, 1},
{16, 1},
{17, 2},
};
static arc arcs_5_1[1] = {
{0, 1},
};
static arc arcs_5_2[1] = {
{7, 3},
};
static arc arcs_5_3[1] = {
{18, 1},
};
static state states_5[4] = {
{3, arcs_5_0},
{1, arcs_5_1},
{1, arcs_5_2},
{1, arcs_5_3},
};
static dfa dfas[6] = {
{256, "MSTART", 0, 2, states_0,
"\070\000\000"},
{257, "RULE", 0, 5, states_1,
"\040\000\000"},
{258, "RHS", 0, 2, states_2,
"\040\010\003"},
{259, "ALT", 0, 2, states_3,
"\040\010\003"},
{260, "ITEM", 0, 5, states_4,
"\040\010\003"},
{261, "ATOM", 0, 4, states_5,
"\040\000\003"},
};
static label labels[19] = {
{0, "EMPTY"},
{256, 0},
{257, 0},
{4, 0},
{0, 0},
{1, 0},
{11, 0},
{258, 0},
{259, 0},
{18, 0},
{260, 0},
{9, 0},
{10, 0},
{261, 0},
{16, 0},
{14, 0},
{3, 0},
{7, 0},
{8, 0},
};
static grammar _PyParser_Grammar = {
6,
dfas,
{19, labels},
256
};
grammar *
meta_grammar(void)
{
return &_PyParser_Grammar;
}
grammar *
Py_meta_grammar(void)
{
return meta_grammar();
}
...@@ -99,10 +99,8 @@ PyParser_ParseStringObject(const char *s, PyObject *filename, ...@@ -99,10 +99,8 @@ PyParser_ParseStringObject(const char *s, PyObject *filename,
tok->type_comments = 1; tok->type_comments = 1;
} }
#ifndef PGEN
Py_INCREF(err_ret->filename); Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename; tok->filename = err_ret->filename;
#endif
return parsetok(tok, g, start, err_ret, flags); return parsetok(tok, g, start, err_ret, flags);
} }
...@@ -113,7 +111,6 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str, ...@@ -113,7 +111,6 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
{ {
node *n; node *n;
PyObject *filename = NULL; PyObject *filename = NULL;
#ifndef PGEN
if (filename_str != NULL) { if (filename_str != NULL) {
filename = PyUnicode_DecodeFSDefault(filename_str); filename = PyUnicode_DecodeFSDefault(filename_str);
if (filename == NULL) { if (filename == NULL) {
...@@ -121,11 +118,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str, ...@@ -121,11 +118,8 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
return NULL; return NULL;
} }
} }
#endif
n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags); n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
#ifndef PGEN
Py_XDECREF(filename); Py_XDECREF(filename);
#endif
return n; return n;
} }
...@@ -169,10 +163,8 @@ PyParser_ParseFileObject(FILE *fp, PyObject *filename, ...@@ -169,10 +163,8 @@ PyParser_ParseFileObject(FILE *fp, PyObject *filename,
if (*flags & PyPARSE_TYPE_COMMENTS) { if (*flags & PyPARSE_TYPE_COMMENTS) {
tok->type_comments = 1; tok->type_comments = 1;
} }
#ifndef PGEN
Py_INCREF(err_ret->filename); Py_INCREF(err_ret->filename);
tok->filename = err_ret->filename; tok->filename = err_ret->filename;
#endif
return parsetok(tok, g, start, err_ret, flags); return parsetok(tok, g, start, err_ret, flags);
} }
...@@ -184,7 +176,6 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, ...@@ -184,7 +176,6 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
{ {
node *n; node *n;
PyObject *fileobj = NULL; PyObject *fileobj = NULL;
#ifndef PGEN
if (filename != NULL) { if (filename != NULL) {
fileobj = PyUnicode_DecodeFSDefault(filename); fileobj = PyUnicode_DecodeFSDefault(filename);
if (fileobj == NULL) { if (fileobj == NULL) {
...@@ -192,12 +183,9 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, ...@@ -192,12 +183,9 @@ PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
return NULL; return NULL;
} }
} }
#endif
n = PyParser_ParseFileObject(fp, fileobj, enc, g, n = PyParser_ParseFileObject(fp, fileobj, enc, g,
start, ps1, ps2, err_ret, flags); start, ps1, ps2, err_ret, flags);
#ifndef PGEN
Py_XDECREF(fileobj); Py_XDECREF(fileobj);
#endif
return n; return n;
} }
...@@ -371,7 +359,6 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, ...@@ -371,7 +359,6 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
} }
} }
#ifndef PGEN
/* Check that the source for a single input statement really /* Check that the source for a single input statement really
is a single statement by looking at what is left in the is a single statement by looking at what is left in the
buffer after parsing. Trailing whitespace and comments buffer after parsing. Trailing whitespace and comments
...@@ -399,7 +386,6 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, ...@@ -399,7 +386,6 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
c = *++cur; c = *++cur;
} }
} }
#endif
} }
else else
n = NULL; n = NULL;
...@@ -470,7 +456,6 @@ initerr(perrdetail *err_ret, PyObject *filename) ...@@ -470,7 +456,6 @@ initerr(perrdetail *err_ret, PyObject *filename)
err_ret->text = NULL; err_ret->text = NULL;
err_ret->token = -1; err_ret->token = -1;
err_ret->expected = -1; err_ret->expected = -1;
#ifndef PGEN
if (filename) { if (filename) {
Py_INCREF(filename); Py_INCREF(filename);
err_ret->filename = filename; err_ret->filename = filename;
...@@ -482,6 +467,5 @@ initerr(perrdetail *err_ret, PyObject *filename) ...@@ -482,6 +467,5 @@ initerr(perrdetail *err_ret, PyObject *filename)
return -1; return -1;
} }
} }
#endif
return 0; return 0;
} }
#define PGEN
#include "parsetok.c"
This diff is collapsed.
import argparse
from .pgen import ParserGenerator
def main():
parser = argparse.ArgumentParser(description="Parser generator main program.")
parser.add_argument(
"grammar", type=str, help="The file with the grammar definition in EBNF format"
)
parser.add_argument(
"tokens", type=str, help="The file with the token definitions"
)
parser.add_argument(
"graminit_h",
type=argparse.FileType('w'),
help="The path to write the grammar's non-terminals as #defines",
)
parser.add_argument(
"graminit_c",
type=argparse.FileType('w'),
help="The path to write the grammar as initialized data",
)
parser.add_argument("--verbose", "-v", action="count")
args = parser.parse_args()
p = ParserGenerator(args.grammar, args.tokens, verbose=args.verbose)
grammar = p.make_grammar()
grammar.produce_graminit_h(args.graminit_h.write)
grammar.produce_graminit_c(args.graminit_c.write)
if __name__ == "__main__":
main()
import collections
class Grammar:
"""Pgen parsing tables conversion class.
Once initialized, this class supplies the grammar tables for the
parsing engine implemented by parse.py. The parsing engine
accesses the instance variables directly. The class here does not
provide initialization of the tables; several subclasses exist to
do this (see the conv and pgen modules).
The load() method reads the tables from a pickle file, which is
much faster than the other ways offered by subclasses. The pickle
file is written by calling dump() (after loading the grammar
tables using a subclass). The report() method prints a readable
representation of the tables to stdout, for debugging.
The instance variables are as follows:
symbol2number -- a dict mapping symbol names to numbers. Symbol
numbers are always 256 or higher, to distinguish
them from token numbers, which are between 0 and
255 (inclusive).
number2symbol -- a dict mapping numbers to symbol names;
these two are each other's inverse.
states -- a list of DFAs, where each DFA is a list of
states, each state is a list of arcs, and each
arc is a (i, j) pair where i is a label and j is
a state number. The DFA number is the index into
this list. (This name is slightly confusing.)
Final states are represented by a special arc of
the form (0, j) where j is its own state number.
dfas -- a dict mapping symbol numbers to (DFA, first)
pairs, where DFA is an item from the states list
above, and first is a set of tokens that can
begin this grammar rule (represented by a dict
whose values are always 1).
labels -- a list of (x, y) pairs where x is either a token
number or a symbol number, and y is either None
or a string; the strings are keywords. The label
number is the index in this list; label numbers
are used to mark state transitions (arcs) in the
DFAs.
start -- the number of the grammar's start symbol.
keywords -- a dict mapping keyword strings to arc labels.
tokens -- a dict mapping token numbers to arc labels.
"""
def __init__(self):
self.symbol2number = collections.OrderedDict()
self.number2symbol = collections.OrderedDict()
self.states = []
self.dfas = collections.OrderedDict()
self.labels = [(0, "EMPTY")]
self.keywords = collections.OrderedDict()
self.tokens = collections.OrderedDict()
self.symbol2label = collections.OrderedDict()
self.start = 256
def produce_graminit_h(self, writer):
writer("/* Generated by Parser/pgen */\n\n")
for number, symbol in self.number2symbol.items():
writer("#define {} {}\n".format(symbol, number))
def produce_graminit_c(self, writer):
writer("/* Generated by Parser/pgen */\n\n")
writer('#include "pgenheaders.h"\n')
writer('#include "grammar.h"\n')
writer("grammar _PyParser_Grammar;\n")
self.print_dfas(writer)
self.print_labels(writer)
writer("grammar _PyParser_Grammar = {\n")
writer(" {n_dfas},\n".format(n_dfas=len(self.dfas)))
writer(" dfas,\n")
writer(" {{{n_labels}, labels}},\n".format(n_labels=len(self.labels)))
writer(" {start_number}\n".format(start_number=self.start))
writer("};\n")
def print_labels(self, writer):
writer(
"static label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels))
)
for label, name in self.labels:
if name is None:
writer(" {{{label}, 0}},\n".format(label=label))
else:
writer(
' {{{label}, "{label_name}"}},\n'.format(
label=label, label_name=name
)
)
writer("};\n")
def print_dfas(self, writer):
self.print_states(writer)
writer("static dfa dfas[{}] = {{\n".format(len(self.dfas)))
for dfaindex, dfa_elem in enumerate(self.dfas.items()):
symbol, (dfa, first_sets) = dfa_elem
writer(
' {{{dfa_symbol}, "{symbol_name}", '.format(
dfa_symbol=symbol, symbol_name=self.number2symbol[symbol]
)
+ "0, {n_states}, states_{dfa_index},\n".format(
n_states=len(dfa), dfa_index=dfaindex
)
)
writer(' "')
k = [name for label, name in self.labels if label in first_sets]
bitset = bytearray((len(self.labels) >> 3) + 1)
for token in first_sets:
bitset[token >> 3] |= 1 << (token & 7)
for byte in bitset:
writer("\\%03o" % (byte & 0xFF))
writer('"},\n')
writer("};\n")
def print_states(self, write):
for dfaindex, dfa in enumerate(self.states):
self.print_arcs(write, dfaindex, dfa)
write(
"static state states_{dfa_index}[{n_states}] = {{\n".format(
dfa_index=dfaindex, n_states=len(dfa)
)
)
for stateindex, state in enumerate(dfa):
narcs = len(state)
write(
" {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format(
n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex
)
)
write("};\n")
def print_arcs(self, write, dfaindex, states):
for stateindex, state in enumerate(states):
narcs = len(state)
write(
"static arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format(
dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs
)
)
for a, b in state:
write(
" {{{from_label}, {to_state}}},\n".format(
from_label=a, to_state=b
)
)
write("};\n")
This diff is collapsed.
import itertools
def generate_tokens(tokens):
numbers = itertools.count(0)
for line in tokens:
line = line.strip()
if not line:
continue
if line.strip().startswith('#'):
continue
name = line.split()[0]
yield (name, next(numbers))
yield ('N_TOKENS', next(numbers))
yield ('NT_OFFSET', 256)
def generate_opmap(tokens):
for line in tokens:
line = line.strip()
if not line:
continue
if line.strip().startswith('#'):
continue
pieces = line.split()
if len(pieces) != 2:
continue
name, op = pieces
yield (op.strip("'"), name)
# Yield independently <>. This is needed so it does not collide
# with the token generation in "generate_tokens" because if this
# symbol is included in Grammar/Tokens, it will collide with !=
# as it has the same name (NOTEQUAL).
yield ('<>', 'NOTEQUAL')
/* Parser generator main program */
/* This expects a filename containing the grammar as argv[1] (UNIX)
or asks the console for such a file name (THINK C).
It writes its output on two files in the current directory:
- "graminit.c" gets the grammar as a bunch of initialized data
- "graminit.h" gets the grammar's non-terminals as #defines.
Error messages and status info during the generation process are
written to stdout, or sometimes to stderr. */
/* XXX TO DO:
- check for duplicate definitions of names (instead of fatal err)
*/
#define PGEN
#include "Python.h"
#include "pycore_pymem.h"
#include "pycore_pystate.h"
#include "pgenheaders.h"
#include "grammar.h"
#include "node.h"
#include "parsetok.h"
#include "pgen.h"
int Py_DebugFlag = 0;
int Py_VerboseFlag = 0;
int Py_IgnoreEnvironmentFlag = 0;
_PyRuntimeState _PyRuntime = _PyRuntimeState_INIT;
/* Forward */
grammar *getgrammar(const char *filename);
void
Py_Exit(int sts)
{
exit(sts);
}
/* Needed by obmalloc.c */
int PyGILState_Check(void)
{ return 1; }
void _PyMem_DumpTraceback(int fd, const void *ptr)
{}
int
main(int argc, char **argv)
{
grammar *g;
FILE *fp;
char *filename, *graminit_h, *graminit_c;
if (argc != 4) {
fprintf(stderr,
"usage: %s grammar graminit.h graminit.c\n", argv[0]);
Py_Exit(2);
}
filename = argv[1];
graminit_h = argv[2];
graminit_c = argv[3];
g = getgrammar(filename);
fp = fopen(graminit_c, "w");
if (fp == NULL) {
perror(graminit_c);
Py_Exit(1);
}
if (Py_DebugFlag)
printf("Writing %s ...\n", graminit_c);
printgrammar(g, fp);
fclose(fp);
fp = fopen(graminit_h, "w");
if (fp == NULL) {
perror(graminit_h);
Py_Exit(1);
}
if (Py_DebugFlag)
printf("Writing %s ...\n", graminit_h);
printnonterminals(g, fp);
fclose(fp);
freegrammar(g);
Py_Exit(0);
return 0; /* Make gcc -Wall happy */
}
grammar *
getgrammar(const char *filename)
{
FILE *fp;
node *n;
grammar *g0, *g;
perrdetail err;
fp = fopen(filename, "r");
if (fp == NULL) {
perror(filename);
Py_Exit(1);
}
g0 = meta_grammar();
n = PyParser_ParseFile(fp, filename, g0, g0->g_start,
(char *)NULL, (char *)NULL, &err);
fclose(fp);
if (n == NULL) {
fprintf(stderr, "Parsing error %d, line %d.\n",
err.error, err.lineno);
if (err.text != NULL) {
size_t len;
int i;
fprintf(stderr, "%s", err.text);
len = strlen(err.text);
if (len == 0 || err.text[len-1] != '\n')
fprintf(stderr, "\n");
for (i = 0; i < err.offset; i++) {
if (err.text[i] == '\t')
putc('\t', stderr);
else
putc(' ', stderr);
}
fprintf(stderr, "^\n");
PyObject_FREE(err.text);
}
Py_Exit(1);
}
g = pgen(n);
PyNode_Free(n);
if (g == NULL) {
printf("Bad grammar.\n");
Py_Exit(1);
}
return g;
}
/* Can't happen in pgen */
PyObject*
PyErr_Occurred()
{
return 0;
}
void
Py_FatalError(const char *msg)
{
fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg);
Py_Exit(1);
}
/* No-nonsense my_readline() for tokenizer.c */
char *
PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, const char *prompt)
{
size_t n = 1000;
char *p = (char *)PyMem_MALLOC(n);
char *q;
if (p == NULL)
return NULL;
fprintf(stderr, "%s", prompt);
q = fgets(p, n, sys_stdin);
if (q == NULL) {
*p = '\0';
return p;
}
n = strlen(p);
if (n > 0 && p[n-1] != '\n')
p[n-1] = '\n';
return (char *)PyMem_REALLOC(p, n+1);
}
/* No-nonsense fgets */
char *
Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
{
return fgets(buf, n, stream);
}
#include <stdarg.h>
void
PySys_WriteStderr(const char *format, ...)
{
va_list va;
va_start(va, format);
vfprintf(stderr, format, va);
va_end(va);
}
/* Print a bunch of C initializers that represent a grammar */
#define PGEN
#include "pgenheaders.h"
#include "grammar.h"
/* Forward */
static void printarcs(int, dfa *, FILE *);
static void printstates(grammar *, FILE *);
static void printdfas(grammar *, FILE *);
static void printlabels(grammar *, FILE *);
void
printgrammar(grammar *g, FILE *fp)
{
fprintf(fp, "/* Generated by Parser/pgen */\n\n");
fprintf(fp, "#include \"pgenheaders.h\"\n");
fprintf(fp, "#include \"grammar.h\"\n");
fprintf(fp, "grammar _PyParser_Grammar;\n");
printdfas(g, fp);
printlabels(g, fp);
fprintf(fp, "grammar _PyParser_Grammar = {\n");
fprintf(fp, " %d,\n", g->g_ndfas);
fprintf(fp, " dfas,\n");
fprintf(fp, " {%d, labels},\n", g->g_ll.ll_nlabels);
fprintf(fp, " %d\n", g->g_start);
fprintf(fp, "};\n");
}
void
printnonterminals(grammar *g, FILE *fp)
{
dfa *d;
int i;
fprintf(fp, "/* Generated by Parser/pgen */\n\n");
d = g->g_dfa;
for (i = g->g_ndfas; --i >= 0; d++)
fprintf(fp, "#define %s %d\n", d->d_name, d->d_type);
}
static void
printarcs(int i, dfa *d, FILE *fp)
{
arc *a;
state *s;
int j, k;
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++) {
fprintf(fp, "static arc arcs_%d_%d[%d] = {\n",
i, j, s->s_narcs);
a = s->s_arc;
for (k = 0; k < s->s_narcs; k++, a++)
fprintf(fp, " {%d, %d},\n", a->a_lbl, a->a_arrow);
fprintf(fp, "};\n");
}
}
static void
printstates(grammar *g, FILE *fp)
{
state *s;
dfa *d;
int i, j;
d = g->g_dfa;
for (i = 0; i < g->g_ndfas; i++, d++) {
printarcs(i, d, fp);
fprintf(fp, "static state states_%d[%d] = {\n",
i, d->d_nstates);
s = d->d_state;
for (j = 0; j < d->d_nstates; j++, s++)
fprintf(fp, " {%d, arcs_%d_%d},\n",
s->s_narcs, i, j);
fprintf(fp, "};\n");
}
}
static void
printdfas(grammar *g, FILE *fp)
{
dfa *d;
int i, j, n;
printstates(g, fp);
fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas);
d = g->g_dfa;
for (i = 0; i < g->g_ndfas; i++, d++) {
fprintf(fp, " {%d, \"%s\", %d, %d, states_%d,\n",
d->d_type, d->d_name, d->d_initial, d->d_nstates, i);
fprintf(fp, " \"");
n = NBYTES(g->g_ll.ll_nlabels);
for (j = 0; j < n; j++)
fprintf(fp, "\\%03o", d->d_first[j] & 0xff);
fprintf(fp, "\"},\n");
}
fprintf(fp, "};\n");
}
static void
printlabels(grammar *g, FILE *fp)
{
label *l;
int i;
fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels);
l = g->g_ll.ll_label;
for (i = g->g_ll.ll_nlabels; --i >= 0; l++) {
if (l->lb_str == NULL)
fprintf(fp, " {%d, 0},\n", l->lb_type);
else
fprintf(fp, " {%d, \"%s\"},\n",
l->lb_type, l->lb_str);
}
fprintf(fp, "};\n");
}
...@@ -10,13 +10,11 @@ ...@@ -10,13 +10,11 @@
#include "tokenizer.h" #include "tokenizer.h"
#include "errcode.h" #include "errcode.h"
#ifndef PGEN
#include "unicodeobject.h" #include "unicodeobject.h"
#include "bytesobject.h" #include "bytesobject.h"
#include "fileobject.h" #include "fileobject.h"
#include "codecs.h" #include "codecs.h"
#include "abstract.h" #include "abstract.h"
#endif /* PGEN */
/* Alternate tab spacing */ /* Alternate tab spacing */
#define ALTTABSIZE 1 #define ALTTABSIZE 1
...@@ -81,11 +79,9 @@ tok_new(void) ...@@ -81,11 +79,9 @@ tok_new(void)
tok->enc = NULL; tok->enc = NULL;
tok->encoding = NULL; tok->encoding = NULL;
tok->cont_line = 0; tok->cont_line = 0;
#ifndef PGEN
tok->filename = NULL; tok->filename = NULL;
tok->decoding_readline = NULL; tok->decoding_readline = NULL;
tok->decoding_buffer = NULL; tok->decoding_buffer = NULL;
#endif
tok->type_comments = 0; tok->type_comments = 0;
return tok; return tok;
...@@ -104,28 +100,6 @@ new_string(const char *s, Py_ssize_t len, struct tok_state *tok) ...@@ -104,28 +100,6 @@ new_string(const char *s, Py_ssize_t len, struct tok_state *tok)
return result; return result;
} }
#ifdef PGEN
static char *
decoding_fgets(char *s, int size, struct tok_state *tok)
{
return fgets(s, size, tok->fp);
}
static int
decoding_feof(struct tok_state *tok)
{
return feof(tok->fp);
}
static char *
decode_str(const char *str, int exec_input, struct tok_state *tok)
{
return new_string(str, strlen(str), tok);
}
#else /* PGEN */
static char * static char *
error_ret(struct tok_state *tok) /* XXX */ error_ret(struct tok_state *tok) /* XXX */
{ {
...@@ -551,7 +525,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok) ...@@ -551,7 +525,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
return error_ret(tok); return error_ret(tok);
} }
} }
#ifndef PGEN
/* The default encoding is UTF-8, so make sure we don't have any /* The default encoding is UTF-8, so make sure we don't have any
non-UTF-8 sequences in it. */ non-UTF-8 sequences in it. */
if (line && !tok->encoding) { if (line && !tok->encoding) {
...@@ -574,7 +547,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok) ...@@ -574,7 +547,6 @@ decoding_fgets(char *s, int size, struct tok_state *tok)
badchar, tok->filename, tok->lineno + 1); badchar, tok->filename, tok->lineno + 1);
return error_ret(tok); return error_ret(tok);
} }
#endif
return line; return line;
} }
...@@ -738,8 +710,6 @@ decode_str(const char *input, int single, struct tok_state *tok) ...@@ -738,8 +710,6 @@ decode_str(const char *input, int single, struct tok_state *tok)
return str; return str;
} }
#endif /* PGEN */
/* Set up tokenizer for string */ /* Set up tokenizer for string */
struct tok_state * struct tok_state *
...@@ -765,9 +735,7 @@ PyTokenizer_FromUTF8(const char *str, int exec_input) ...@@ -765,9 +735,7 @@ PyTokenizer_FromUTF8(const char *str, int exec_input)
struct tok_state *tok = tok_new(); struct tok_state *tok = tok_new();
if (tok == NULL) if (tok == NULL)
return NULL; return NULL;
#ifndef PGEN
tok->input = str = translate_newlines(str, exec_input, tok); tok->input = str = translate_newlines(str, exec_input, tok);
#endif
if (str == NULL) { if (str == NULL) {
PyTokenizer_Free(tok); PyTokenizer_Free(tok);
return NULL; return NULL;
...@@ -828,11 +796,9 @@ PyTokenizer_Free(struct tok_state *tok) ...@@ -828,11 +796,9 @@ PyTokenizer_Free(struct tok_state *tok)
{ {
if (tok->encoding != NULL) if (tok->encoding != NULL)
PyMem_FREE(tok->encoding); PyMem_FREE(tok->encoding);
#ifndef PGEN
Py_XDECREF(tok->decoding_readline); Py_XDECREF(tok->decoding_readline);
Py_XDECREF(tok->decoding_buffer); Py_XDECREF(tok->decoding_buffer);
Py_XDECREF(tok->filename); Py_XDECREF(tok->filename);
#endif
if (tok->fp != NULL && tok->buf != NULL) if (tok->fp != NULL && tok->buf != NULL)
PyMem_FREE(tok->buf); PyMem_FREE(tok->buf);
if (tok->input) if (tok->input)
...@@ -871,7 +837,6 @@ tok_nextc(struct tok_state *tok) ...@@ -871,7 +837,6 @@ tok_nextc(struct tok_state *tok)
} }
if (tok->prompt != NULL) { if (tok->prompt != NULL) {
char *newtok = PyOS_Readline(stdin, stdout, tok->prompt); char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
#ifndef PGEN
if (newtok != NULL) { if (newtok != NULL) {
char *translated = translate_newlines(newtok, 0, tok); char *translated = translate_newlines(newtok, 0, tok);
PyMem_FREE(newtok); PyMem_FREE(newtok);
...@@ -900,7 +865,6 @@ tok_nextc(struct tok_state *tok) ...@@ -900,7 +865,6 @@ tok_nextc(struct tok_state *tok)
strcpy(newtok, buf); strcpy(newtok, buf);
Py_DECREF(u); Py_DECREF(u);
} }
#endif
if (tok->nextprompt != NULL) if (tok->nextprompt != NULL)
tok->prompt = tok->nextprompt; tok->prompt = tok->nextprompt;
if (newtok == NULL) if (newtok == NULL)
...@@ -1056,7 +1020,6 @@ tok_backup(struct tok_state *tok, int c) ...@@ -1056,7 +1020,6 @@ tok_backup(struct tok_state *tok, int c)
static int static int
syntaxerror(struct tok_state *tok, const char *format, ...) syntaxerror(struct tok_state *tok, const char *format, ...)
{ {
#ifndef PGEN
va_list vargs; va_list vargs;
#ifdef HAVE_STDARG_PROTOTYPES #ifdef HAVE_STDARG_PROTOTYPES
va_start(vargs, format); va_start(vargs, format);
...@@ -1069,9 +1032,6 @@ syntaxerror(struct tok_state *tok, const char *format, ...) ...@@ -1069,9 +1032,6 @@ syntaxerror(struct tok_state *tok, const char *format, ...)
tok->lineno, tok->lineno,
(int)(tok->cur - tok->line_start)); (int)(tok->cur - tok->line_start));
tok->done = E_ERROR; tok->done = E_ERROR;
#else
tok->done = E_TOKEN;
#endif
return ERRORTOKEN; return ERRORTOKEN;
} }
...@@ -1083,9 +1043,6 @@ indenterror(struct tok_state *tok) ...@@ -1083,9 +1043,6 @@ indenterror(struct tok_state *tok)
return ERRORTOKEN; return ERRORTOKEN;
} }
#ifdef PGEN
#define verify_identifier(tok) 1
#else
/* Verify that the identifier follows PEP 3131. /* Verify that the identifier follows PEP 3131.
All identifier strings are guaranteed to be "ready" unicode objects. All identifier strings are guaranteed to be "ready" unicode objects.
*/ */
...@@ -1112,7 +1069,6 @@ verify_identifier(struct tok_state *tok) ...@@ -1112,7 +1069,6 @@ verify_identifier(struct tok_state *tok)
tok->done = E_IDENTIFIER; tok->done = E_IDENTIFIER;
return result; return result;
} }
#endif
static int static int
tok_decimal_tail(struct tok_state *tok) tok_decimal_tail(struct tok_state *tok)
...@@ -1667,25 +1623,20 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) ...@@ -1667,25 +1623,20 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
case '(': case '(':
case '[': case '[':
case '{': case '{':
#ifndef PGEN
if (tok->level >= MAXLEVEL) { if (tok->level >= MAXLEVEL) {
return syntaxerror(tok, "too many nested parentheses"); return syntaxerror(tok, "too many nested parentheses");
} }
tok->parenstack[tok->level] = c; tok->parenstack[tok->level] = c;
tok->parenlinenostack[tok->level] = tok->lineno; tok->parenlinenostack[tok->level] = tok->lineno;
#endif
tok->level++; tok->level++;
break; break;
case ')': case ')':
case ']': case ']':
case '}': case '}':
#ifndef PGEN
if (!tok->level) { if (!tok->level) {
return syntaxerror(tok, "unmatched '%c'", c); return syntaxerror(tok, "unmatched '%c'", c);
} }
#endif
tok->level--; tok->level--;
#ifndef PGEN
int opening = tok->parenstack[tok->level]; int opening = tok->parenstack[tok->level];
if (!((opening == '(' && c == ')') || if (!((opening == '(' && c == ')') ||
(opening == '[' && c == ']') || (opening == '[' && c == ']') ||
...@@ -1704,7 +1655,6 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) ...@@ -1704,7 +1655,6 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end)
c, opening); c, opening);
} }
} }
#endif
break; break;
} }
...@@ -1742,11 +1692,7 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) ...@@ -1742,11 +1692,7 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
FILE *fp; FILE *fp;
char *p_start =NULL , *p_end =NULL , *encoding = NULL; char *p_start =NULL , *p_end =NULL , *encoding = NULL;
#ifndef PGEN
fd = _Py_dup(fd); fd = _Py_dup(fd);
#else
fd = dup(fd);
#endif
if (fd < 0) { if (fd < 0) {
return NULL; return NULL;
} }
...@@ -1760,7 +1706,6 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) ...@@ -1760,7 +1706,6 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
fclose(fp); fclose(fp);
return NULL; return NULL;
} }
#ifndef PGEN
if (filename != NULL) { if (filename != NULL) {
Py_INCREF(filename); Py_INCREF(filename);
tok->filename = filename; tok->filename = filename;
...@@ -1773,7 +1718,6 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) ...@@ -1773,7 +1718,6 @@ PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
return encoding; return encoding;
} }
} }
#endif
while (tok->lineno < 2 && tok->done == E_OK) { while (tok->lineno < 2 && tok->done == E_OK) {
PyTokenizer_Get(tok, &p_start, &p_end); PyTokenizer_Get(tok, &p_start, &p_end);
} }
......
...@@ -42,15 +42,9 @@ struct tok_state { ...@@ -42,15 +42,9 @@ struct tok_state {
expression (cf. issue 16806) */ expression (cf. issue 16806) */
int level; /* () [] {} Parentheses nesting level */ int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */ /* Used to allow free continuations inside them */
#ifndef PGEN
char parenstack[MAXLEVEL]; char parenstack[MAXLEVEL];
int parenlinenostack[MAXLEVEL]; int parenlinenostack[MAXLEVEL];
/* pgen doesn't have access to Python codecs, it cannot decode the input
filename. The bytes filename might be kept, but it is only used by
indenterror() and it is not really needed: pgen only compiles one file
(Grammar/Grammar). */
PyObject *filename; PyObject *filename;
#endif
/* Stuff for checking on different tab sizes */ /* Stuff for checking on different tab sizes */
int altindstack[MAXINDENT]; /* Stack of alternate indents */ int altindstack[MAXINDENT]; /* Stack of alternate indents */
/* Stuff for PEP 0263 */ /* Stuff for PEP 0263 */
...@@ -63,10 +57,8 @@ struct tok_state { ...@@ -63,10 +57,8 @@ struct tok_state {
const char* multi_line_start; /* pointer to start of first line of const char* multi_line_start; /* pointer to start of first line of
a single line or multi line string a single line or multi line string
expression (cf. issue 16806) */ expression (cf. issue 16806) */
#ifndef PGEN
PyObject *decoding_readline; /* open(...).readline */ PyObject *decoding_readline; /* open(...).readline */
PyObject *decoding_buffer; PyObject *decoding_buffer;
#endif
const char* enc; /* Encoding for the current str. */ const char* enc; /* Encoding for the current str. */
const char* str; const char* str;
const char* input; /* Tokenizer's newline translated copy of the string. */ const char* input; /* Tokenizer's newline translated copy of the string. */
......
#define PGEN
#include "tokenizer.c"
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment