Scanning.py 14.4 KB
Newer Older
1
# cython: infer_types=True, language_level=3
William Stein's avatar
William Stein committed
2
#
3
#   Cython Scanner
William Stein's avatar
William Stein committed
4 5
#

Lisandro Dalcin's avatar
Lisandro Dalcin committed
6
import sys
William Stein's avatar
William Stein committed
7
import os
8
import platform
William Stein's avatar
William Stein committed
9

Robert Bradshaw's avatar
Robert Bradshaw committed
10
import cython
11
cython.declare(EncodedString=object, string_prefixes=object, raw_prefixes=object, IDENT=unicode,
12
               print_function=object)
Robert Bradshaw's avatar
Robert Bradshaw committed
13

14
from Cython import Plex, Utils
15
from Cython.Plex.Scanners import Scanner
William Stein's avatar
William Stein committed
16
from Cython.Plex.Errors import UnrecognizedInput
William Stein's avatar
William Stein committed
17
from Errors import CompileError, error
Robert Bradshaw's avatar
Robert Bradshaw committed
18
from Lexicon import string_prefixes, raw_prefixes, make_lexicon, IDENT
19
from Future import print_function
William Stein's avatar
William Stein committed
20

21
from StringEncoding import EncodedString
22

William Stein's avatar
William Stein committed
23 24 25 26 27 28 29 30 31 32
debug_scanner = 0
trace_scanner = 0
scanner_debug_flags = 0
scanner_dump_file = None

lexicon = None

def get_lexicon():
    global lexicon
    if not lexicon:
33
        lexicon = make_lexicon()
William Stein's avatar
William Stein committed
34
    return lexicon
35

William Stein's avatar
William Stein committed
36 37
#------------------------------------------------------------------

38
py_reserved_words = [
39
    "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
40 41 42 43 44 45 46 47
    "continue", "return", "raise", "import", "exec", "try",
    "except", "finally", "while", "if", "elif", "else", "for",
    "in", "assert", "and", "or", "not", "is", "in", "lambda",
    "from", "yield", "with", "nonlocal",
]

pyx_reserved_words = py_reserved_words + [
    "include", "ctypedef", "cdef", "cpdef",
48
    "cimport", "DEF", "IF", "ELIF", "ELSE"
William Stein's avatar
William Stein committed
49 50
]

51
class Method(object):
William Stein's avatar
William Stein committed
52 53 54 55

    def __init__(self, name):
        self.name = name
        self.__name__ = name # for Plex tracing
56

William Stein's avatar
William Stein committed
57 58 59 60 61
    def __call__(self, stream, text):
        return getattr(stream, self.name)(text)

#------------------------------------------------------------------

62
class CompileTimeScope(object):
63 64 65 66

    def __init__(self, outer = None):
        self.entries = {}
        self.outer = outer
67

68 69
    def declare(self, name, value):
        self.entries[name] = value
70

71 72
    def lookup_here(self, name):
        return self.entries[name]
73

74 75
    def __contains__(self, name):
        return name in self.entries
76

77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
    def lookup(self, name):
        try:
            return self.lookup_here(name)
        except KeyError:
            outer = self.outer
            if outer:
                return outer.lookup(name)
            else:
                raise

def initial_compile_time_env():
    benv = CompileTimeScope()
    names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE',
        'UNAME_VERSION', 'UNAME_MACHINE')
    for name, value in zip(names, platform.uname()):
        benv.declare(name, value)
93 94 95 96
    try:
        import __builtin__ as builtins
    except ImportError:
        import builtins
97 98 99 100 101 102
    names = ('False', 'True',
        'abs', 'bool', 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate',
        'float', 'hash', 'hex', 'int', 'len', 'list', 'long', 'map', 'max', 'min',
        'oct', 'ord', 'pow', 'range', 'reduce', 'repr', 'round', 'slice', 'str',
        'sum', 'tuple', 'xrange', 'zip')
    for name in names:
Stefan Behnel's avatar
Stefan Behnel committed
103 104 105 106 107
        try:
            benv.declare(name, getattr(builtins, name))
        except AttributeError:
            # ignore, likely Py3
            pass
108 109 110 111 112
    denv = CompileTimeScope(benv)
    return denv

#------------------------------------------------------------------

113
class SourceDescriptor(object):
114 115 116
    """
    A SourceDescriptor should be considered immutable.
    """
117 118
    _file_type = 'pyx'

119
    _escaped_description = None
120
    _cmp_name = ''
121 122
    def __str__(self):
        assert False # To catch all places where a descriptor is used directly as a filename
123 124 125 126 127 128 129 130 131 132 133

    def set_file_type_from_name(self, filename):
        name, ext = os.path.splitext(filename)
        self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'

    def is_cython_file(self):
        return self._file_type in ('pyx', 'pxd')

    def is_python_file(self):
        return self._file_type == 'py'

134 135 136 137 138
    def get_escaped_description(self):
        if self._escaped_description is None:
            self._escaped_description = \
                self.get_description().encode('ASCII', 'replace').decode("ASCII")
        return self._escaped_description
139

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
    def __gt__(self, other):
        # this is only used to provide some sort of order
        try:
            return self._cmp_name > other._cmp_name
        except AttributeError:
            return False

    def __lt__(self, other):
        # this is only used to provide some sort of order
        try:
            return self._cmp_name < other._cmp_name
        except AttributeError:
            return False

    def __le__(self, other):
        # this is only used to provide some sort of order
        try:
            return self._cmp_name <= other._cmp_name
        except AttributeError:
            return False

161 162 163 164 165 166 167 168
class FileSourceDescriptor(SourceDescriptor):
    """
    Represents a code source. A code source is a more generic abstraction
    for a "filename" (as sometimes the code doesn't come from a file).
    Instances of code sources are passed to Scanner.__init__ as the
    optional name argument and will be passed back when asking for
    the position()-tuple.
    """
169
    def __init__(self, filename, path_description=None):
170
        filename = Utils.decode_filename(filename)
171
        self.path_description = path_description or filename
172
        self.filename = filename
173
        self.set_file_type_from_name(filename)
174
        self._cmp_name = filename
175

176
    def get_lines(self, encoding=None, error_handling=None):
177 178 179 180 181
        return Utils.open_source_file(
            self.filename, encoding=encoding,
            error_handling=error_handling,
            # newline normalisation is costly before Py2.6
            require_normalised_newlines=False)
182

183
    def get_description(self):
184
        return self.path_description
185

186
    def get_filenametable_entry(self):
187
        return self.filename
188

189 190 191 192 193 194
    def __eq__(self, other):
        return isinstance(other, FileSourceDescriptor) and self.filename == other.filename

    def __hash__(self):
        return hash(self.filename)

195
    def __repr__(self):
196
        return "<FileSourceDescriptor:%s>" % self.filename
197 198 199 200 201 202

class StringSourceDescriptor(SourceDescriptor):
    """
    Instances of this class can be used instead of a filenames if the
    code originates from a string object.
    """
203 204
    filename = None

205 206
    def __init__(self, name, code):
        self.name = name
207
        #self.set_file_type_from_name(name)
208
        self.codelines = [x + "\n" for x in code.split("\n")]
209
        self._cmp_name = name
210

211 212 213 214 215 216 217
    def get_lines(self, encoding=None, error_handling=None):
        if not encoding:
            return self.codelines
        else:
            return [ line.encode(encoding, error_handling).decode(encoding)
                     for line in self.codelines ]

218
    def get_description(self):
219 220
        return self.name

221 222 223
    def get_filenametable_entry(self):
        return "stringsource"

224 225 226 227 228 229
    def __hash__(self):
        return hash(self.name)

    def __eq__(self, other):
        return isinstance(other, StringSourceDescriptor) and self.name == other.name

230
    def __repr__(self):
231
        return "<StringSourceDescriptor:%s>" % self.name
232 233 234

#------------------------------------------------------------------

William Stein's avatar
William Stein committed
235
class PyrexScanner(Scanner):
236
    #  context            Context  Compilation context
237
    #  included_files     [string] Files included with 'include' statement
238 239 240
    #  compile_time_env   dict     Environment for conditional compilation
    #  compile_time_eval  boolean  In a true conditional compilation context
    #  compile_time_expr  boolean  In a compile-time expression context
William Stein's avatar
William Stein committed
241

242
    def __init__(self, file, filename, parent_scanner = None,
243 244
                 scope = None, context = None, source_encoding=None, parse_comments=True, initial_pos=None):
        Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
William Stein's avatar
William Stein committed
245 246
        if parent_scanner:
            self.context = parent_scanner.context
247
            self.included_files = parent_scanner.included_files
248 249 250
            self.compile_time_env = parent_scanner.compile_time_env
            self.compile_time_eval = parent_scanner.compile_time_eval
            self.compile_time_expr = parent_scanner.compile_time_expr
William Stein's avatar
William Stein committed
251 252
        else:
            self.context = context
253
            self.included_files = scope.included_files
254 255 256
            self.compile_time_env = initial_compile_time_env()
            self.compile_time_eval = 1
            self.compile_time_expr = 0
257
        self.parse_comments = parse_comments
258
        self.source_encoding = source_encoding
259
        if filename.is_python_file():
260
            self.in_python_file = True
261 262
            self.keywords = cython.set(py_reserved_words)
        else:
263
            self.in_python_file = False
264
            self.keywords = cython.set(pyx_reserved_words)
William Stein's avatar
William Stein committed
265 266 267 268 269 270 271
        self.trace = trace_scanner
        self.indentation_stack = [0]
        self.indentation_char = None
        self.bracket_nesting_level = 0
        self.begin('INDENT')
        self.sy = ''
        self.next()
272

273 274
    def commentline(self, text):
        if self.parse_comments:
275 276
            self.produce('commentline', text)

William Stein's avatar
William Stein committed
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
    def current_level(self):
        return self.indentation_stack[-1]

    def open_bracket_action(self, text):
        self.bracket_nesting_level = self.bracket_nesting_level + 1
        return text

    def close_bracket_action(self, text):
        self.bracket_nesting_level = self.bracket_nesting_level - 1
        return text

    def newline_action(self, text):
        if self.bracket_nesting_level == 0:
            self.begin('INDENT')
            self.produce('NEWLINE', '')
292

William Stein's avatar
William Stein committed
293 294 295 296 297 298
    string_states = {
        "'":   'SQ_STRING',
        '"':   'DQ_STRING',
        "'''": 'TSQ_STRING',
        '"""': 'TDQ_STRING'
    }
299

William Stein's avatar
William Stein committed
300 301 302
    def begin_string_action(self, text):
        if text[:1] in string_prefixes:
            text = text[1:]
303 304
        if text[:1] in raw_prefixes:
            text = text[1:]
William Stein's avatar
William Stein committed
305 306
        self.begin(self.string_states[text])
        self.produce('BEGIN_STRING')
307

William Stein's avatar
William Stein committed
308 309 310
    def end_string_action(self, text):
        self.begin('')
        self.produce('END_STRING')
311

William Stein's avatar
William Stein committed
312 313 314 315 316 317 318 319
    def unclosed_string_action(self, text):
        self.end_string_action(text)
        self.error("Unclosed string literal")

    def indentation_action(self, text):
        self.begin('')
        # Indentation within brackets should be ignored.
        #if self.bracket_nesting_level > 0:
Robert Bradshaw's avatar
Robert Bradshaw committed
320
        #    return
William Stein's avatar
William Stein committed
321 322 323 324 325 326 327 328
        # Check that tabs and spaces are being used consistently.
        if text:
            c = text[0]
            #print "Scanner.indentation_action: indent with", repr(c) ###
            if self.indentation_char is None:
                self.indentation_char = c
                #print "Scanner.indentation_action: setting indent_char to", repr(c)
            else:
Stefan Behnel's avatar
Stefan Behnel committed
329
                if self.indentation_char != c:
William Stein's avatar
William Stein committed
330
                    self.error("Mixed use of tabs and spaces")
Stefan Behnel's avatar
Stefan Behnel committed
331
            if text.replace(c, "") != "":
William Stein's avatar
William Stein committed
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
                self.error("Mixed use of tabs and spaces")
        # Figure out how many indents/dedents to do
        current_level = self.current_level()
        new_level = len(text)
        #print "Changing indent level from", current_level, "to", new_level ###
        if new_level == current_level:
            return
        elif new_level > current_level:
            #print "...pushing level", new_level ###
            self.indentation_stack.append(new_level)
            self.produce('INDENT', '')
        else:
            while new_level < self.current_level():
                #print "...popping level", self.indentation_stack[-1] ###
                self.indentation_stack.pop()
                self.produce('DEDENT', '')
            #print "...current level now", self.current_level() ###
Stefan Behnel's avatar
Stefan Behnel committed
349
            if new_level != self.current_level():
William Stein's avatar
William Stein committed
350 351 352 353 354 355 356 357 358 359 360 361 362
                self.error("Inconsistent indentation")

    def eof_action(self, text):
        while len(self.indentation_stack) > 1:
            self.produce('DEDENT', '')
            self.indentation_stack.pop()
        self.produce('EOF', '')

    def next(self):
        try:
            sy, systring = self.read()
        except UnrecognizedInput:
            self.error("Unrecognized character")
Robert Bradshaw's avatar
Robert Bradshaw committed
363
        if sy == IDENT:
364
            if systring in self.keywords:
365
                if systring == u'print' and print_function in self.context.future_directives:
366
                    self.keywords.discard('print')
367
                    systring = EncodedString(systring)
368
                elif systring == u'exec' and self.context.language_level >= 3:
369
                    self.keywords.discard('exec')
370 371 372
                    systring = EncodedString(systring)
                else:
                    sy = systring
373
            else:
374
                systring = EncodedString(systring)
William Stein's avatar
William Stein committed
375 376
        self.sy = sy
        self.systring = systring
377
        if False: # debug_scanner:
William Stein's avatar
William Stein committed
378 379 380 381 382
            _, line, col = self.position()
            if not self.systring or self.sy == self.systring:
                t = self.sy
            else:
                t = "%s %s" % (self.sy, self.systring)
Stefan Behnel's avatar
Stefan Behnel committed
383
            print("--- %3d %2d %s" % (line, col, t))
384

385 386 387 388 389 390 391
    def peek(self):
        saved = self.sy, self.systring
        self.next()
        next = self.sy, self.systring
        self.unread(*next)
        self.sy, self.systring = saved
        return next
392

William Stein's avatar
William Stein committed
393 394 395 396
    def put_back(self, sy, systring):
        self.unread(self.sy, self.systring)
        self.sy = sy
        self.systring = systring
397

William Stein's avatar
William Stein committed
398 399 400
    def unread(self, token, value):
        # This method should be added to Plex
        self.queue.insert(0, (token, value))
401

402
    def error(self, message, pos = None, fatal = True):
William Stein's avatar
William Stein committed
403 404 405
        if pos is None:
            pos = self.position()
        if self.sy == 'INDENT':
406 407 408
            err = error(pos, "Possible inconsistent indentation")
        err = error(pos, message)
        if fatal: raise err
409

William Stein's avatar
William Stein committed
410 411 412 413
    def expect(self, what, message = None):
        if self.sy == what:
            self.next()
        else:
414
            self.expected(what, message)
415

416
    def expect_keyword(self, what, message = None):
Robert Bradshaw's avatar
Robert Bradshaw committed
417
        if self.sy == IDENT and self.systring == what:
418 419 420
            self.next()
        else:
            self.expected(what, message)
421

422
    def expected(self, what, message = None):
423 424 425
        if message:
            self.error(message)
        else:
426 427 428 429 430
            if self.sy == IDENT:
                found = self.systring
            else:
                found = self.sy
            self.error("Expected '%s', found '%s'" % (what, found))
431

William Stein's avatar
William Stein committed
432 433 434 435 436 437 438 439
    def expect_indent(self):
        self.expect('INDENT',
            "Expected an increase in indentation level")

    def expect_dedent(self):
        self.expect('DEDENT',
            "Expected a decrease in indentation level")

440
    def expect_newline(self, message = "Expected a newline"):
William Stein's avatar
William Stein committed
441
        # Expect either a newline or end of file
Stefan Behnel's avatar
Stefan Behnel committed
442
        if self.sy != 'EOF':
William Stein's avatar
William Stein committed
443
            self.expect('NEWLINE', message)