Commit b549149c authored by pombredanne's avatar pombredanne

Now supporting semicolon and hash comments.

parent 318ef243
...@@ -74,42 +74,34 @@ class MissingSectionHeaderError(ParsingError): ...@@ -74,42 +74,34 @@ class MissingSectionHeaderError(ParsingError):
self.lineno = lineno self.lineno = lineno
self.line = line self.line = line
# This regex captures either plain sections headers with optional trailing # This regex captures either sections headers with optional trailing comment
# comment separated by a semicolon or a pound sign OR ... # separated by a semicolon or a hash. Section headers can have an optional
# new style section headers with an expression and optional trailing comment # expression. Expressions and comments can contain brackets but no verbatim '#'
# that then can be only separated by a pound sign. # and ';' : these need to be escaped.
# This second case could require complex parsing as expressions and comments
# can contain brackets and # signs that would need at least to balance brackets
# A title line with an expression has the general form: # A title line with an expression has the general form:
# [section_name: some Python expression] # some comment # [section_name: some Python expression] #; some comment
# This regex leverages the fact that the following is a valid Python expression: # This regex leverages the fact that the following is a valid Python expression:
# [some Python expression] # some comment # [some Python expression] # some comment
# and that section headers are always delimited by [brackets] which are also # and that section headers are also delimited by [brackets] taht are also [list]
# the delimiters for Python [lists] # delimiters.
# So instead of doing complex parsing to balance brackets, we capture just # So instead of doing complex parsing to balance brackets in an expression, we
# enough from a header line to collect then remove the section_name and colon # capture just enough from a header line to collect then remove the section_name
# expression separator keeping only a list-enclosed expression and optional # and colon expression separator keeping only a list-enclosed expression and
# comments. Therefore the parsing and validation of this resulting Python # optional comments. The parsing and validation of this Python expression can be
# expression can be entirely delegated to the built-in Python eval compiler. # entirely delegated to Python's eval. The result of the evaluated expression is
# The result of the evaluated expression is the always returned wrapped in a # the always returned wrapped in a list with a single item that contains the
# list with a single item that contains the original expression # original expression
section_header = re.compile( section_header = re.compile(
r'(?P<head>\[)' # opening bracket [ starts a section title line r'(?P<head>\[)'
r'\s*'
r'(?P<name>[^\s#[\]:;{}]+)'
r'\s*' r'\s*'
r'(?P<name>[^\s[\]:{}]+)' # section name r'(:(?P<expression>[^#;]*))?'
r'\s*' r'\s*'
r'(' r'(?P<tail>]'
r']' # closing bracket ] r'\s*'
r'\s*' r'([#;].*)?$)'
r'([#;].*)?$' # optional trailing comment marked by '#' or ';'
r'|' # OR
r':' # optional ':' separator for expression
r'\s*'
r'(?P<tail>.*' # optional arbitrary Python expression
r']' # closing bracket ]
r'\s*'
r'\#?.*)$' # optional trailing comment marked by '#'
r')'
).match ).match
option_start = re.compile( option_start = re.compile(
...@@ -129,13 +121,13 @@ def parse(fp, fpname, exp_globals=None): ...@@ -129,13 +121,13 @@ def parse(fp, fpname, exp_globals=None):
leading whitespace. Blank lines, lines beginning with a '#', leading whitespace. Blank lines, lines beginning with a '#',
and just about everything else are ignored. and just about everything else are ignored.
The title line is in the form [name] followed an optional a trailing The title line is in the form [name] followed by an optional trailing
comment separated by a semicolon ';' or a pound `#' sign. comment separated by a semicolon `;' or a hash `#' character.
Optionally the title line can have the form [name:expression] where Optionally the title line can have the form `[name:expression]' where
expression is an arbitrary Python expression. Sections with an expression expression is an arbitrary Python expression. Sections with an expression
that evaluates to False are ignored. In this form, the optional trailing that evaluates to False are ignored. Semicolon `;' an hash `#' characters
comment can only be marked by a pound # sign (semi-colon ; is not valid) mustr be string-escaped in expression literals.
exp_globals is a callable returning a mapping of defaults used as globals exp_globals is a callable returning a mapping of defaults used as globals
during the evaluation of a section conditional expression. during the evaluation of a section conditional expression.
...@@ -179,21 +171,21 @@ def parse(fp, fpname, exp_globals=None): ...@@ -179,21 +171,21 @@ def parse(fp, fpname, exp_globals=None):
sectname = header.group('name') sectname = header.group('name')
head = header.group('head') # the starting [ head = header.group('head') # the starting [
tail = header.group('tail') # closing ], expression and comment expression = header.group('expression')
if tail: tail = header.group('tail') # closing ]and comment
if expression:
# normalize tail comments to Python style
tail = tail.replace(';', '#') if tail else ''
# un-escape literal # and ; . Do not use a string-escape decode
expr = expression.replace(r'\x23','#').replace(r'x3b', ';')
# rebuild a valid Python expression wrapped in a list
expr = head + expr + tail
# lazily populate context only expression # lazily populate context only expression
if not context: if not context:
context = exp_globals() if exp_globals else {} context = exp_globals() if exp_globals else {}
# evaluated expression is in list: get first element
# rebuild a valid Python expression wrapped in a list section_condition = eval(expr, context)[0]
expression = head + tail # finally, ignore section when an expression evaluates to false
# by design and construction, the evaluated expression
# is always the first element of a wrapping list
# so we get the first element
section_condition = eval(expression, context)[0]
# ignore section when an expression evaluates to false
if not section_condition: if not section_condition:
logger.debug('Ignoring section %(sectname)r with [expression]: %(expression)r' % locals()) logger.debug('Ignoring section %(sectname)r with [expression]: %(expression)r' % locals())
continue continue
......
...@@ -99,13 +99,13 @@ conditional exclusion of sections:: ...@@ -99,13 +99,13 @@ conditional exclusion of sections::
[s1: 2 + 2 == 4] # this expression is true [therefore "this section" _will_ be NOT skipped [s1: 2 + 2 == 4] # this expression is true [therefore "this section" _will_ be NOT skipped
a = 1 a = 1
[ s2 : 2 + 2 == 5 ] # comment: this expression is false, so this section will be ignored [ s2 : 2 + 2 == 5 ] # comment: this expression is false, so this section will be ignored]
long = a long = a
[ s2 : 41 + 1 == 42 ] # a comment: this expression is true, so this section will be kept [ s2 : 41 + 1 == 42 ] # a comment: this expression is [true], so this section will be kept
long = b long = b
[s3:2 in map(lambda i:i*2, [i for i in range(10)])] # Complex expressions are [possible!];, though they should not be (abused:) [s3:2 in map(lambda i:i*2, [i for i in range(10)])] ;# Complex expressions are [possible!];, though they should not be (abused:)
# this section will not be skipped # this section will not be skipped
long = c long = c
...@@ -119,10 +119,13 @@ conditional exclusion of sections:: ...@@ -119,10 +119,13 @@ conditional exclusion of sections::
{'s1': {'a': '1'}, 's2': {'long': 'b'}, 's3': {'long': 'c'}} {'s1': {'a': '1'}, 's2': {'long': 'b'}, 's3': {'long': 'c'}}
The title line can contain an optional trailing comment separated by a pound Title line optional trailing comments are separated by a hash '#' or semicolon
sign. The expression and the comment can contain arbitrary characters, including ';' character. The expression is an arbitrary expression with one restriction:
brackets that are also used to mark the end of a section header and that may be it cannot contain a literal hash '#' or semicolon ';' character: these need to be
ambiguous to recognize in some cases. For example, valid sections lines include:: string-escaped.
The comment can contain arbitrary characters, including brackets that are also
used to mark the end of a section header and may be ambiguous to recognize in
some cases. For example, valid sections lines include::
[ a ] [ a ]
a=1 a=1
...@@ -142,9 +145,18 @@ ambiguous to recognize in some cases. For example, valid sections lines include: ...@@ -142,9 +145,18 @@ ambiguous to recognize in some cases. For example, valid sections lines include:
[ f ] # ] [ f ] # ]
f = 1 f = 1
[g:2 in map(lambda i:i*2, ['''#;)'''] + [i for i in range(10)] + list('#[]][;#'))] # Complex #expressions; ][are [possible!] [g:2 in map(lambda i:i*2, ['''\x23\x3b)'''] + [i for i in range(10)] + list('\x23[]][\x3b\x23'))] # Complex #expressions; ][are [possible!] and can us escaped # and ; in literals
g = 1 g = 1
[ h : True ] ; ]
h =1
[ i : True] ; []
i=1
[j:2 in map(lambda i:i*2, ['''\x23\x3b)'''] + [i for i in range(10)] + list('\x23[]][\x3b\x23'))] ; Complex #expressions; ][are [possible!] and can us escaped # and ; in literals
j = 1
.. -> text .. -> text
>>> try: import StringIO >>> try: import StringIO
...@@ -158,28 +170,31 @@ ambiguous to recognize in some cases. For example, valid sections lines include: ...@@ -158,28 +170,31 @@ ambiguous to recognize in some cases. For example, valid sections lines include:
'd': {'d': '1'}, 'd': {'d': '1'},
'e': {'e': '1'}, 'e': {'e': '1'},
'f': {'f': '1'}, 'f': {'f': '1'},
'g': {'g': '1'}} 'g': {'g': '1'},
'h': {'h': '1'},
'i': {'i': '1'},
'j': {'j': '1'}}
A title line optional trailing comment may also be separated by a comma A title line optional trailing comment be separated by a hash or semicolon
-- for backward compatibility -- if and only if the title line does not contain character. The following are valid semicolon-separated comments::
an expression. The following are valid::
[ a ] ;comma comment are supported for lines without expressions ] [ a ] ;semicolon comment are supported for lines without expressions ]
a = 1 a = 1
# this comma separated comment is valid because this section does not contain an expression
[ b ] ; [] [ b ] ; []
b = 1 b = 1
# this comma separated comment is valid because this section does not contain an expression
[ c ] ; ] [ c ] ; ]
c = 1 c = 1
# this comma separated comment is valid because this section does not contain an expression
[ d ] ; [ [ d ] ; [
d = 1 d = 1
[ e: True ] ;semicolon comments are supported for lines with expressions ]
e = 1
.. -> text .. -> text
>>> try: import StringIO >>> try: import StringIO
...@@ -187,30 +202,88 @@ an expression. The following are valid:: ...@@ -187,30 +202,88 @@ an expression. The following are valid::
>>> import pprint, zc.buildout.configparser >>> import pprint, zc.buildout.configparser
>>> pprint.pprint(zc.buildout.configparser.parse(StringIO.StringIO( >>> pprint.pprint(zc.buildout.configparser.parse(StringIO.StringIO(
... text), 'test')) ... text), 'test'))
{'a': {'a': '1'}, 'b': {'b': '1'}, 'c': {'c': '1'}, 'd': {'d': '1'}} {'a': {'a': '1'},
'b': {'b': '1'},
'c': {'c': '1'},
'd': {'d': '1'},
'e': {'e': '1'}}
And the following is invalid and will trigger an error:: The following sections with hash comment separators are valid too::
[ d: True ] ;comma comment are not supported for lines with expressions ] [ a ] #hash comment ] are supported for lines without expressions ]
a = 1
[ b ] # []
b = 1
[ c ] # ]
c = 1
[ d ] # [
d = 1 d = 1
[ e: True ] #hash comments] are supported for lines with expressions ]
e = 1
.. -> text
>>> try: import StringIO
... except ImportError: import io as StringIO
>>> import pprint, zc.buildout.configparser
>>> pprint.pprint(zc.buildout.configparser.parse(StringIO.StringIO(
... text), 'test'))
{'a': {'a': '1'},
'b': {'b': '1'},
'c': {'c': '1'},
'd': {'d': '1'},
'e': {'e': '1'}}
However, explicit semicolon and hash characters are invalid in expressions and
must be escaped or this triggers an error. In the rare case where a hash '#' or
semicolon ';' would be needed in an expression literal, you can use the
string-escaped representation of these characters: use '\x23' for hash '#' and
'\x3b' for semicolon ';' to avoid evaluation errors.
These expressions are valid and use escaped hash and semicolons in literals::
[a:2 in map(lambda i:i*2, ['''\x23\x3b)'''] + [i for i in range(10)] + list('\x23[]][\x3b\x23'))] # Complex #expressions; ][are [possible!] and can us escaped # and ; in literals
a = 1
[b:2 in map(lambda i:i*2, ['''\x23\x3b)'''] + [i for i in range(10)] + list('\x23[]][\x3b\x23'))] ; Complex #expressions; ][are [possible!] and can us escaped # and ; in literals
b = 1
.. -> text
>>> try: import StringIO
... except ImportError: import io as StringIO
>>> import pprint, zc.buildout.configparser
>>> pprint.pprint(zc.buildout.configparser.parse(StringIO.StringIO(
... text), 'test'))
{'a': {'a': '1'}, 'b': {'b': '1'}}
And using unescaped semicolon and hash characters in expressions triggers an error::
[a:'#' in '#;'] # this is not a supported expression
a = 1
.. -> text .. -> text
>>> try: import StringIO >>> try: import StringIO
... except ImportError: import io as StringIO ... except ImportError: import io as StringIO
>>> import zc.buildout.configparser >>> import zc.buildout.configparser
>>> try: zc.buildout.configparser.parse(StringIO.StringIO(text), 'test') >>> try: zc.buildout.configparser.parse(StringIO.StringIO(text), 'test')
... except SyntaxError: pass # success ... except zc.buildout.configparser.MissingSectionHeaderError: pass # success
One of the typical usage is to have buildout parts that are operating system or One of the typical usage of expression is to have buildout parts that are
platform specific. The configparser.parse function has an optional operating system or platform-specific. The configparser.parse function has an
exp_globals argument. This is a callable returning a mapping of objects made optional exp_globals argument. This is a callable returning a mapping of
available to the evaluation context of the expression. Here we add the objects made available to the evaluation context of the expression. Here we add
platform and sys modules to the evaluation context, so we can access platform the platform and sys modules to the evaluation context, so we can access
and sys functions and objects in our expressions :: platform and sys modules functions and objects in our expressions ::
[s1: platform.python_version_tuple()[0] in ('2', '3',)] # this expression is true, the major versions of python are either 2 or 3 [s1: platform.python_version_tuple()[0] in ('2', '3',)] # this expression is true, the major versions of python are either 2 or 3
a = 1 a = 1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment