Commit a3477848 authored by Kevin Modzelewski's avatar Kevin Modzelewski

another sre_compile benchmark

parent cae875ec
import sre_compile
# Every regular expression created during django's startup:
patterns = [
'',
'\n \\$(?:\n (?P<escaped>\\$) | # Escape sequence of two delimiters\n (?P<named>[_a-z][_a-z0-9]*) | # delimiter and a Python identifier\n {(?P<braced>[_a-z][_a-z0-9]*)} | # delimiter and a braced identifier\n (?P<invalid>) # Other ill-formed delimiter exprs\n )\n ',
'^[a-zA-Z](?:[a-zA-Z0-9-]*)$',
'^([a-zA-Z](?:[a-zA-Z0-9-]*))=!([a-zA-Z](?:[a-zA-Z0-9-]*))$',
'^[a-zA-Z]([a-zA-Z0-9_]*)$',
'\\[(?P<header>[^]]+)\\]',
'(?P<option>[^:=\\s][^:=]*)\\s*(?P<vi>[:=])\\s*(?P<value>.*)$',
'(?P<option>[^:=\\s][^:=]*)\\s*(?:(?P<vi>[:=])\\s*(?P<value>.*))?$',
'%\\(([^)]*)\\)s|.',
'%\\(([^)]+)\\)s',
'([a-zA-Z][a-zA-Z0-9_]+)\\s*=\\s*(.*)',
'\\$\\(([A-Za-z][A-Za-z0-9_]*)\\)',
'\\${([A-Za-z][A-Za-z0-9_]*)}',
'^[a-zA-Z_][a-zA-Z_0-9]*(\\.[a-zA-Z_][a-zA-Z_0-9]*)*$',
'cygwin.*',
'os2emx',
'posix',
'[^\\\\\\\'\\"\t\n\x0b\x0c\r ]*',
"'(?:[^'\\\\]|\\\\.)*'",
'"(?:[^"\\\\]|\\\\.)*"',
'',
'sys.exc_clear',
'threading',
' # A numeric string consists of:\n# \\s*\n (?P<sign>[-+])? # an optional sign, followed by either...\n (\n (?=\\d|\\.\\d) # ...a number (with at least one digit)\n (?P<int>\\d*) # having a (possibly empty) integer part\n (\\.(?P<frac>\\d*))? # followed by an optional fractional part\n (E(?P<exp>[-+]?\\d+))? # followed by an optional exponent, or...\n |\n Inf(inity)? # ...an infinity, or...\n |\n (?P<signal>s)? # ...an (optionally signaling)\n NaN # NaN\n (?P<diag>\\d*) # with (possibly empty) diagnostic info.\n )\n# \\s*\n \\Z\n',
'0*$',
'50*$',
'\\A\n(?:\n (?P<fill>.)?\n (?P<align>[<>=^])\n)?\n(?P<sign>[-+ ])?\n(?P<zeropad>0)?\n(?P<minimumwidth>(?!0)\\d+)?\n(?P<thousands_sep>,)?\n(?:\\.(?P<precision>0|(?!0)\\d+))?\n(?P<type>[eEfFgGn%])?\n\\Z\n',
'%(?:\\((?P<key>.*?)\\))?(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]',
'\n \\$(?:\n (?P<escaped>\\$) | # Escape sequence of two delimiters\n (?P<named>[_a-z][_a-z0-9]*) | # delimiter and a Python identifier\n {(?P<braced>[_a-z][_a-z0-9]*)} | # delimiter and a braced identifier\n (?P<invalid>) # Other ill-formed delimiter exprs\n )\n ',
'[ \\f\\t]*(\\\\\\r?\\n[ \\f\\t]*)*(#[^\\r\\n]*)?(((\\d+[jJ]|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)[jJ])|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)|(0[xX][\\da-fA-F]+[lL]?|0[bB][01]+[lL]?|(0[oO][0-7]+)|(0[0-7]*)[lL]?|[1-9]\\d*[lL]?))|((\\*\\*=?|>>=?|<<=?|<>|!=|//=?|[+\\-*/%&|^=<>]=?|~)|[][(){}]|(\\r?\\n|[:;.,`@]))|([uUbB]?[rR]?\'[^\\n\'\\\\]*(?:\\\\.[^\\n\'\\\\]*)*\'|[uUbB]?[rR]?"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*")|[a-zA-Z_]\\w*)',
'[ \\f\\t]*((\\\\\\r?\\n|\\Z|#[^\\r\\n]*|([uUbB]?[rR]?\'\'\'|[uUbB]?[rR]?"""))|((\\d+[jJ]|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)[jJ])|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)|(0[xX][\\da-fA-F]+[lL]?|0[bB][01]+[lL]?|(0[oO][0-7]+)|(0[0-7]*)[lL]?|[1-9]\\d*[lL]?))|((\\*\\*=?|>>=?|<<=?|<>|!=|//=?|[+\\-*/%&|^=<>]=?|~)|[][(){}]|(\\r?\\n|[:;.,`@]))|([uUbB]?[rR]?\'[^\\n\'\\\\]*(?:\\\\.[^\\n\'\\\\]*)*(\'|\\\\\\r?\\n)|[uUbB]?[rR]?"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*("|\\\\\\r?\\n))|[a-zA-Z_]\\w*)',
"[^'\\\\]*(?:(?:\\\\.|'(?!''))[^'\\\\]*)*'''",
'[^"\\\\]*(?:(?:\\\\.|"(?!""))[^"\\\\]*)*"""',
"[^'\\\\]*(?:\\\\.[^'\\\\]*)*'",
'[^"\\\\]*(?:\\\\.[^"\\\\]*)*"',
'([\x00-\x7f]+)',
'(\\s+|[^\\s\\w]*\\w+[^0-9\\W]-(?=\\w+[^0-9\\W])|(?<=[\\w\\!\\"\\\'\\&\\.\\,\\?])-{2,}(?=\\w))',
'(\\s+)',
'[abcdefghijklmnopqrstuvwxyz][\\.\\!\\?][\\"\\\']?\\Z',
'^[ \t]+$',
'(^[ \t]*)(?:[^ \t\n])',
'[A-Z][A-Z0-9_]+$',
'\\\\[0-3][0-7][0-7]',
'[\\\\].',
'(?x)(?P<key>[\\w\\d!#%&\'~_`><@,:/\\$\\*\\+\\-\\.\\^\\|\\)\\(\\?\\}\\{\\=]+?)\\s*=\\s*(?P<val>"(?:[^\\\\"]|\\\\.)*"|\\w{3},\\s[\\s\\w\\d-]{9,11}\\s[\\d:]{8}\\sGMT|[\\w\\d!#%&\'~_`><@,:/\\$\\*\\+\\-\\.\\^\\|\\)\\(\\?\\}\\{\\=]*)\\s*;?',
'(-?(?:0|[1-9]\\d*))(\\.\\d+)?([eE][-+]?\\d+)?',
'(.*?)(["\\\\\\x00-\\x1f])',
'[ \\t\\n\\r]*',
'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t]',
'([\\\\"]|[^\\ -~])',
'[\\x80-\\xff]',
u'\\s*\n\\s*',
u'<.*?>|((?:\\w[-\\w]*|&.*?;)+)',
u'<.*?>|(.)',
u'<(/)?([^ ]+?)(?:(\\s*/)| .*?)?>',
u'\\r\\n|\\r',
u'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))',
u'([\x80-\uffff])',
u'\n ((?:\n [^\\s\'"]*\n (?:\n (?:"(?:[^"\\\\]|\\\\.)*" | \'(?:[^\'\\\\]|\\\\.)*\')\n [^\\s\'"]*\n )+\n ) | \\S+)\n',
u'&(#?[xX]?(?:[0-9a-fA-F]+|\\w{1,8}));',
u'^https?://',
u'^([a-z0-9.-]+|\\[[a-f0-9]*:[a-f0-9:]+\\])(:\\d+)?$',
'[][\\\\()<>@,:;".]',
'[][\\\\()"]',
'\n =\\? # literal =?\n (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset\n \\? # literal ?\n (?P<encoding>[qb]) # either a "q" or a "b", case insensitive\n \\? # literal ?\n (?P<atom>.*?) # non-greedy up to the next ?= is the atom\n \\?= # literal ?=\n ',
'^(?P<name>\\w+)\\*((?P<num>[0-9]+)\\*?)?$',
'[^-a-zA-Z0-9!*+/ ]',
'[^ !-<>-~\\t]',
'\n =\\? # literal =?\n (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset\n \\? # literal ?\n (?P<encoding>[qb]) # either a "q" or a "b", case insensitive\n \\? # literal ?\n (?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string\n \\?= # literal ?=\n (?=[ \\t]|$) # whitespace or the end of the string\n ',
'[\\041-\\176]+:$',
'\\n[^ \\t]+:',
u'^(?:[a-z0-9\\.\\-]*)://(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\\.)+(?:[A-Z]{2,6}\\.?|[A-Z0-9-]{2,}(?<!-)\\.?)|localhost|\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}|\\[?[A-F0-9]*:[A-F0-9:]+\\]?)(?::\\d+)?(?:/?|[/?]\\S+)$',
u'(^[-!#$%&\'*+/=?^_`{}|~0-9A-Z]+(\\.[-!#$%&\'*+/=?^_`{}|~0-9A-Z]+)*$|^"([\\001-\\010\\013\\014\\016-\\037!#-\\[\\]-\\177]|\\\\[\\001-\\011\\013\\014\\016-\\177])*"$)',
u'(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\\.)+(?:[A-Z]{2,6}|[A-Z0-9-]{2,}(?<!-))$',
u'\\[([A-f0-9:\\.]+)\\]$',
u'^[-a-zA-Z0-9_]+$',
u'^(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)(\\.(25[0-5]|2[0-4]\\d|[0-1]?\\d?\\d)){3}$',
u'^[\\d,]+$',
'[a-zA-Z][-_.a-zA-Z0-9]*\\s*',
'(\\\'[^\\\']*\\\'|"[^"]*")\\s*',
'--\\s*>',
']\\s*]\\s*>',
']\\s*>',
'[&<]',
'&[a-zA-Z#]',
'&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]',
'&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]',
'<[a-zA-Z]',
'>',
'([a-zA-Z][^\t\n\r\x0c />\x00]*)(?:\\s|/(?!>))*',
'[a-zA-Z][^\t\n\r\x0c />\x00]*',
'((?<=[\\\'"\\s/])[^\\s/>][^\\s/=>]*)(\\s*=+\\s*(\\\'[^\\\']*\\\'|"[^"]*"|(?![\\\'"])[^>\\s]*))?(?:\\s|/(?!>))*',
'\n <[a-zA-Z][^\\t\\n\\r\\f />\\x00]* # tag name\n (?:[\\s/]* # optional whitespace before attribute name\n (?:(?<=[\'"\\s/])[^\\s/>][^\\s/=>]* # attribute name\n (?:\\s*=+\\s* # value indicator\n (?:\'[^\']*\' # LITA-enclosed value\n |"[^"]*" # LIT-enclosed value\n |(?![\'"])[^>\\s]* # bare value\n )\n )?(?:\\s|/(?!>))*\n )*\n )?\n \\s* # trailing whitespace\n',
'</\\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\\s*>',
u'&(?!(\\w+|#\\d+);)',
u'(\\s+)',
u'^https?://\\[?\\w',
u'^www\\.|^(?!http)\\w[^@]+\\.(com|edu|gov|int|mil|net|org)$',
u'^\\S+@\\S+\\.\\S+$',
u'(<a [^>]*?)target=[^\\s>]+',
u'(?:<br clear="all">|<i><\\/i>|<b><\\/b>|<em><\\/em>|<strong><\\/strong>|<\\/?smallcaps>|<\\/?uppercase>)',
u'((?:<p>(?:\\&middot\\;|\\*|\\\u2022|\\&\\#149\\;|\\&bull\\;|\\&\\#8226\\;).*?[a-zA-Z].*?</p>\\s*)+)',
u'(?:<p>(?:&nbsp;|\\s|<br \\/>)*?</p>\\s*)+\\Z',
u'(?<!\\\\)([aAbBcdDeEfFgGhHiIjlLmMnNoOPrsStTUuwWyYzZ])',
u'\\\\(.)',
'((^|[^%])(%%)*%[sy])',
'(?P<year>\\d{4})-(?P<month>\\d{1,2})-(?P<day>\\d{1,2})$',
'(?P<hour>\\d{1,2}):(?P<minute>\\d{1,2})(?::(?P<second>\\d{1,2})(?:\\.(?P<microsecond>\\d{1,6})\\d{0,6})?)?',
'(?P<year>\\d{4})-(?P<month>\\d{1,2})-(?P<day>\\d{1,2})[T ](?P<hour>\\d{1,2}):(?P<minute>\\d{1,2})(?::(?P<second>\\d{1,2})(?:\\.(?P<microsecond>\\d{1,6})\\d{0,6})?)?(?P<tzinfo>Z|[+-]\\d{2}(?::?\\d{2})?)?$',
'\\?|[-+]?[.\\w]+$',
u'(?:W/)?"((?:\\\\.|[^"])*)"',
u'^\\w{3}, (?P<day>\\d{2}) (?P<mon>\\w{3}) (?P<year>\\d{4}) (?P<hour>\\d{2}):(?P<min>\\d{2}):(?P<sec>\\d{2}) GMT$',
u'^\\w{6,9}, (?P<day>\\d{2})-(?P<mon>\\w{3})-(?P<year>\\d{2}) (?P<hour>\\d{2}):(?P<min>\\d{2}):(?P<sec>\\d{2}) GMT$',
u'^\\w{3} (?P<mon>\\w{3}) (?P<day>[ \\d]\\d) (?P<hour>\\d{2}):(?P<min>\\d{2}):(?P<sec>\\d{2}) (?P<year>\\d{4})$',
u'\\s*,\\s*',
'^From ',
'[ \\(\\)<>@,;:\\\\"/\\[\\]\\?=]',
u'(\\{\\%.*?\\%\\}|\\{\\{.*?\\}\\}|\\{\\#.*?\\#\\})',
u'\n^(?P<constant>(?:\\_\\("[^"\\\\]*(?:\\\\.[^"\\\\]*)*"\\)|\\_\\(\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'\\)|"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"|\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'))|\n^(?P<var>[\\w\\.]+|[-+\\.]?\\d[\\d\\.e]*)|\n (?:\\s*\\|\\s*\n (?P<filter_name>\\w+)\n (?:\\:\n (?:\n (?P<constant_arg>(?:\\_\\("[^"\\\\]*(?:\\\\.[^"\\\\]*)*"\\)|\\_\\(\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'\\)|"[^"\\\\]*(?:\\\\.[^"\\\\]*)*"|\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*\'))|\n (?P<var_arg>[\\w\\.]+|[-+\\.]?\\d[\\d\\.e]*)\n )\n )?\n )',
u'(?:(\\w+)=)?(.+)',
u'API|TOKEN|KEY|SECRET|PASS|PROFANITIES_LIST|SIGNATURE',
'\\s*#?\\s*$',
'[_a-z]\\w*\\.py$',
u'.*; charset=([\\w\\d-]+);?',
'[ \\(\\)<>@,;:\\\\"/\\[\\]\\?=]',
u'\\s+',
u'^[\\w.@+-]+$',
]
for pattern in patterns:
sre_compile.compile(pattern, 0)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment