import re from pygments.lexer import Lexer, RegexLexer, ExtendedRegexLexer, \ LexerContext, include, combined, do_insertions, bygroups, using from pygments.token import Error, Text, \ Comment, Operator, Keyword, Name, String, Number, Generic, Punctuation from pygments.util import get_bool_opt, get_list_opt, shebang_matches from pygments import unistring as uni from sphinx import highlighting line_re = re.compile('.*?\n') class CythonLexer(RegexLexer): """ For `Cython <http://cython.org>`_ source code. """ name = 'Cython' aliases = ['cython', 'pyx'] filenames = ['*.pyx', '*.pxd', '*.pxi'] mimetypes = ['text/x-cython', 'application/x-cython'] tokens = { 'root': [ (r'\n', Text), (r'^(\s*)("""(?:.|\n)*?""")', bygroups(Text, String.Doc)), (r"^(\s*)('''(?:.|\n)*?''')", bygroups(Text, String.Doc)), (r'[^\S\n]+', Text), (r'#.*$', Comment), (r'[]{}:(),;[]', Punctuation), (r'\\\n', Text), (r'\\', Text), (r'(in|is|and|or|not)\b', Operator.Word), (r'(<)([a-zA-Z0-9.?]+)(>)', bygroups(Punctuation, Keyword.Type, Punctuation)), (r'!=|==|<<|>>|[-~+/*%=<>&^|.?]', Operator), (r'(from)(\d+)(<=)(\s+)(<)(\d+)(:)', bygroups(Keyword, Number.Integer, Operator, Name, Operator, Name, Punctuation)), include('keywords'), (r'(def|property)(\s+)', bygroups(Keyword, Text), 'funcname'), (r'(cp?def)(\s+)', bygroups(Keyword, Text), 'cdef'), (r'(class|struct)(\s+)', bygroups(Keyword, Text), 'classname'), (r'(from)(\s+)', bygroups(Keyword, Text), 'fromimport'), (r'(c?import)(\s+)', bygroups(Keyword, Text), 'import'), include('builtins'), include('backtick'), ('(?:[rR]|[uU][rR]|[rR][uU])"""', String, 'tdqs'), ("(?:[rR]|[uU][rR]|[rR][uU])'''", String, 'tsqs'), ('(?:[rR]|[uU][rR]|[rR][uU])"', String, 'dqs'), ("(?:[rR]|[uU][rR]|[rR][uU])'", String, 'sqs'), ('[uU]?"""', String, combined('stringescape', 'tdqs')), ("[uU]?'''", String, combined('stringescape', 'tsqs')), ('[uU]?"', String, combined('stringescape', 'dqs')), ("[uU]?'", String, combined('stringescape', 'sqs')), include('name'), include('numbers'), ], 'keywords': [ (r'(assert|break|by|continue|ctypedef|del|elif|else|except\??|exec|' r'finally|for|gil|global|if|include|lambda|nogil|pass|print|raise|' r'return|try|while|yield|as|with)\b', Keyword), (r'(DEF|IF|ELIF|ELSE)\b', Comment.Preproc), ], 'builtins': [ (r'(?<!\.)(__import__|abs|all|any|apply|basestring|bin|bool|buffer|' r'bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|' r'complex|delattr|dict|dir|divmod|enumerate|eval|execfile|exit|' r'file|filter|float|frozenset|getattr|globals|hasattr|hash|hex|id|' r'input|int|intern|isinstance|issubclass|iter|len|list|locals|' r'long|map|max|min|next|object|oct|open|ord|pow|property|range|' r'raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|' r'sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|' r'vars|xrange|zip)\b', Name.Builtin), (r'(?<!\.)(self|None|Ellipsis|NotImplemented|False|True|NULL' r')\b', Name.Builtin.Pseudo), (r'(?<!\.)(ArithmeticError|AssertionError|AttributeError|' r'BaseException|DeprecationWarning|EOFError|EnvironmentError|' r'Exception|FloatingPointError|FutureWarning|GeneratorExit|IOError|' r'ImportError|ImportWarning|IndentationError|IndexError|KeyError|' r'KeyboardInterrupt|LookupError|MemoryError|NameError|' r'NotImplemented|NotImplementedError|OSError|OverflowError|' r'OverflowWarning|PendingDeprecationWarning|ReferenceError|' r'RuntimeError|RuntimeWarning|StandardError|StopIteration|' r'SyntaxError|SyntaxWarning|SystemError|SystemExit|TabError|' r'TypeError|UnboundLocalError|UnicodeDecodeError|' r'UnicodeEncodeError|UnicodeError|UnicodeTranslateError|' r'UnicodeWarning|UserWarning|ValueError|Warning|ZeroDivisionError' r')\b', Name.Exception), ], 'numbers': [ (r'(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?', Number.Float), (r'0\d+', Number.Oct), (r'0[xX][a-fA-F0-9]+', Number.Hex), (r'\d+L', Number.Integer.Long), (r'\d+', Number.Integer) ], 'backtick': [ ('`.*?`', String.Backtick), ], 'name': [ (r'@[a-zA-Z0-9_]+', Name.Decorator), ('[a-zA-Z_][a-zA-Z0-9_]*', Name), ], 'funcname': [ ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Function, '#pop') ], 'cdef': [ (r'(public|readonly|extern|api|inline)\b', Keyword.Reserved), (r'(struct|enum|union|class)\b', Keyword), (r'([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(?=[(:#=]|$)', bygroups(Name.Function, Text), '#pop'), (r'([a-zA-Z_][a-zA-Z0-9_]*)(\s*)(,)', bygroups(Name.Function, Text, Punctuation)), (r'from\b', Keyword, '#pop'), (r'as\b', Keyword), (r':', Punctuation, '#pop'), (r'(?=["\'])', Text, '#pop'), (r'[a-zA-Z_][a-zA-Z0-9_]*', Keyword.Type), (r'.', Text), ], 'classname': [ ('[a-zA-Z_][a-zA-Z0-9_]*', Name.Class, '#pop') ], 'import': [ (r'(\s+)(as)(\s+)', bygroups(Text, Keyword, Text)), (r'[a-zA-Z_][a-zA-Z0-9_.]*', Name.Namespace), (r'(\s*)(,)(\s*)', bygroups(Text, Operator, Text)), (r'', Text, '#pop') # all else: go back ], 'fromimport': [ (r'(\s+)(c?import)\b', bygroups(Text, Keyword), '#pop'), (r'[a-zA-Z_.][a-zA-Z0-9_.]*', Name.Namespace), # ``cdef foo from "header"``, or ``for foo from 0 < i < 10`` (r'', Text, '#pop'), ], 'stringescape': [ (r'\\([\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|' r'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})', String.Escape) ], 'strings': [ (r'%(\([a-zA-Z0-9]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?' '[hlL]?[diouxXeEfFgGcrs%]', String.Interpol), (r'[^\\\'"%\n]+', String), # quotes, percents and backslashes must be parsed one at a time (r'[\'"\\]', String), # unhandled string formatting sign (r'%', String) # newlines are an error (use "nl" state) ], 'nl': [ (r'\n', String) ], 'dqs': [ (r'"', String, '#pop'), (r'\\\\|\\"|\\\n', String.Escape), # included here again for raw strings include('strings') ], 'sqs': [ (r"'", String, '#pop'), (r"\\\\|\\'|\\\n", String.Escape), # included here again for raw strings include('strings') ], 'tdqs': [ (r'"""', String, '#pop'), include('strings'), include('nl') ], 'tsqs': [ (r"'''", String, '#pop'), include('strings'), include('nl') ], } ##TODO: fix this, as shebang lines don't make sense for cython. def analyse_text(text): return shebang_matches(text, r'pythonw?(2\.\d)?') highlighting.lexers['cython'] = CythonLexer()