Commit ff2fe9ad authored by Chris McDonough's avatar Chris McDonough

DocumentClass was changed to support alternate locates a while back. One of the

 ways in which this was done was to use string.punctuation with locale support t
urned on, stuffing the results into regular expressions.

Unfortunately, the insertion of the punctuation into regular expressions was com
pletely literal.  This meant that, depending on locale, a regex could take on a
completely different semantic due to the fact that it could include "]", "?", et
c.  At times, this could cause a segmentation fault if a nonsensical generated r
egex was especially juicy.

I took out the code which localized punctuation, although the code which localiz
es letters is still there.
parent eb9f1477
...@@ -207,11 +207,10 @@ class StructuredTextRow(ST.StructuredTextDocument): ...@@ -207,11 +207,10 @@ class StructuredTextRow(ST.StructuredTextDocument):
""" """
row is a list of tuples, where each tuple is row is a list of tuples, where each tuple is
the raw text for a cell/column and the span the raw text for a cell/column and the span
of that cell/column". of that cell/column.
EX EX
[('this is column one',1), ('this is column two',1)] [('this is column one',1), ('this is column two',1)]
""" """
apply(ST.StructuredTextDocument.__init__,(self,[]),kw) apply(ST.StructuredTextDocument.__init__,(self,[]),kw)
self._columns = [] self._columns = []
for column in row: for column in row:
...@@ -584,7 +583,7 @@ class DocumentClass: ...@@ -584,7 +583,7 @@ class DocumentClass:
def doc_emphasize( def doc_emphasize(
self, s, self, s,
expr = re.compile('\s*\*([ \n%s0-9]+)\*(?!\*|-)' % lettpunc).search expr = re.compile('\s*\*([ \n%s0-9.:/;,\'\"\?\=\-\>\<\(\)]+)\*(?!\*|-)' % letters).search
): ):
r=expr(s) r=expr(s)
...@@ -632,7 +631,7 @@ class DocumentClass: ...@@ -632,7 +631,7 @@ class DocumentClass:
def doc_underline(self, def doc_underline(self,
s, s,
expr=re.compile("\s+\_([0-9%s ]+)\_" % lettpunc).search): expr=re.compile("\_([%s0-9\s\.,\?\/]+)\_" % letters).search):
result = expr(s) result = expr(s)
if result: if result:
...@@ -644,7 +643,7 @@ class DocumentClass: ...@@ -644,7 +643,7 @@ class DocumentClass:
def doc_strong(self, def doc_strong(self,
s, s,
expr = re.compile('\s*\*\*([ \n%s0-9]+)\*\*' % lettpunc).search expr = re.compile('\s*\*\*([ \n%s0-9.:/;\-,!\?\'\"]+)\*\*' % letters).search
): ):
r=expr(s) r=expr(s)
...@@ -655,10 +654,11 @@ class DocumentClass: ...@@ -655,10 +654,11 @@ class DocumentClass:
return None return None
def doc_href( def doc_href(
self, s, self, s,
expr1 = re.compile("(\"[ %s0-9\n\-\.\,\;\(\)\/\:\/\*\']+\")(:)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)([,]*\s*)" % letters).search, expr1 = re.compile("(\"[ %s0-9\n\-\.\,\;\(\)\/\:\/\*\']+\")(:)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)([,]*\s*)" % letters).search,
expr2 = re.compile('(\"[ %s0-9\n\-\.\:\;\(\)\/\*\']+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)(\s*)' % letters).search): expr2 = re.compile('(\"[ %s0-9\n\-\.\:\;\(\)\/\*\']+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)(\s*)' % letters).search,
punctuation = re.compile("[\,\.\?\!\;]+").match
):
r=expr1(s) or expr2(s) r=expr1(s) or expr2(s)
...@@ -669,16 +669,36 @@ class DocumentClass: ...@@ -669,16 +669,36 @@ class DocumentClass:
start,e = r.span(1) start,e = r.span(1)
name = s[start:e] name = s[start:e]
name = replace(name,'"','',2) name = replace(name,'"','',2)
#start = start + 1
st,end = r.span(3) st,end = r.span(3)
if punctuation(s[end-1:end]):
if s[end-1:end] in punctuations: end-=1 end = end -1
link = s[st:end] link = s[st:end]
#end = end - 1
# name is the href title, link is the target # name is the href title, link is the target
# of the href # of the href
return (StructuredTextLink(name, href=link), return (StructuredTextLink(name, href=link),
start, end) start, end)
#return (StructuredTextLink(s[start:end], href=s[start:end]),
# start, end)
else: else:
return None return None
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment