Clean implementation of Parser/pgen and fix some style issues (GH-12156)

8bc401a5 · Pablo Galindo · GitHub · 97c288df · 8bc401a5 · 8bc401a5
Commit 8bc401a5 authored Mar 04, 2019 by Pablo Galindo Committed by GitHub Mar 04, 2019
Showing with 14 additions and 28 deletions

Parser/pgen/__main__.py Parser/pgen/__main__.py +1 -0

Parser/pgen/grammar.py Parser/pgen/grammar.py +9 -24

Parser/pgen/pgen.py Parser/pgen/pgen.py +2 -4

Parser/pgen/token.py Parser/pgen/token.py +2 -0

No files found.
--- a/Parser/pgen/__main__.py
+++ b/Parser/pgen/__main__.py
@@ -2,6 +2,7 @@ import argparse

 from .pgen import ParserGenerator

+
 def main():
    parser = argparse.ArgumentParser(description="Parser generator main program.")
    parser.add_argument(

--- a/Parser/pgen/grammar.py
+++ b/Parser/pgen/grammar.py
 import collections

-class Grammar:
-    """Pgen parsing tables conversion class.
-
-    Once initialized, this class supplies the grammar tables for the
-    parsing engine implemented by parse.py.  The parsing engine
-    accesses the instance variables directly.  The class here does not
-    provide initialization of the tables; several subclasses exist to
-    do this (see the conv and pgen modules).

-    The load() method reads the tables from a pickle file, which is
-    much faster than the other ways offered by subclasses.  The pickle
-    file is written by calling dump() (after loading the grammar
-    tables using a subclass).  The report() method prints a readable
-    representation of the tables to stdout, for debugging.
+class Grammar:
+    """Pgen parsing tables class.

    The instance variables are as follows:

@@ -36,8 +25,7 @@ class Grammar:
    dfas          -- a dict mapping symbol numbers to (DFA, first)
                     pairs, where DFA is an item from the states list
                     above, and first is a set of tokens that can
-                     begin this grammar rule (represented by a dict
-                     whose values are always 1).
+                     begin this grammar rule.

    labels        -- a list of (x, y) pairs where x is either a token
                     number or a symbol number, and y is either None
@@ -92,14 +80,12 @@ class Grammar:
            "static label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels))
        )
        for label, name in self.labels:
-            if name is None:
-                writer("    {{{label}, 0}},\n".format(label=label))
-            else:
-                writer(
-                    '    {{{label}, "{label_name}"}},\n'.format(
-                        label=label, label_name=name
-                    )
+            label_name = '"{}"'.format(name) if name is not None else 0
+            writer(
+                '    {{{label}, {label_name}}},\n'.format(
+                    label=label, label_name=label_name
                )
+            )
        writer("};\n")

    def print_dfas(self, writer):
@@ -114,10 +100,9 @@ class Grammar:
                + "0, {n_states}, states_{dfa_index},\n".format(
                    n_states=len(dfa), dfa_index=dfaindex
                )
+                + '     "'
            )
-            writer('     "')

-            k = [name for label, name in self.labels if label in first_sets]
            bitset = bytearray((len(self.labels) >> 3) + 1)
            for token in first_sets:
                bitset[token >> 3] |= 1 << (token & 7)

--- a/Parser/pgen/pgen.py
+++ b/Parser/pgen/pgen.py
@@ -3,6 +3,7 @@ import tokenize  # from stdlib

 from . import grammar, token

+
 class ParserGenerator(object):

    def __init__(self, grammar_file, token_file, stream=None, verbose=False):
@@ -183,11 +184,8 @@ class ParserGenerator(object):
            dfa = self.make_dfa(a, z)
            if self.verbose:
                self.dump_dfa(name, dfa)
-            oldlen = len(dfa)
            self.simplify_dfa(dfa)
-            newlen = len(dfa)
            dfas[name] = dfa
-            #print name, oldlen, newlen
            if startsymbol is None:
                startsymbol = name
        return dfas, startsymbol
@@ -355,7 +353,7 @@ class ParserGenerator(object):
        if args:
            try:
                msg = msg % args
-            except:
+            except Exception:
                msg = " ".join([msg] + list(map(str, args)))
        raise SyntaxError(msg, (self.filename, self.end[0],
                                self.end[1], self.line))

--- a/Parser/pgen/token.py
+++ b/Parser/pgen/token.py
 import itertools

+
 def generate_tokens(tokens):
    numbers = itertools.count(0)
    for line in tokens:
@@ -16,6 +17,7 @@ def generate_tokens(tokens):
    yield ('N_TOKENS', next(numbers))
    yield ('NT_OFFSET', 256)

+
 def generate_opmap(tokens):
    for line in tokens:
        line = line.strip()