DocumentClass was changed to support alternate locates a while back. One of the

ways in which this was done was to use string.punctuation with locale support t urned on, stuffing the results into regular expressions. Unfortunately, the insertion of the punctuation into regular expressions was com pletely literal. This meant that, depending on locale, a regex could take on a completely different semantic due to the fact that it could include "]", "?", et c. At times, this could cause a segmentation fault if a nonsensical generated r egex was especially juicy. I took out the code which localized punctuation, although the code which localiz es letters is still there.

DocumentClass was changed to support alternate locates a while back. One of the
ways in which this was done was to use string.punctuation with locale support t urned on, stuffing the results into regular expressions. Unfortunately, the insertion of the punctuation into regular expressions was com pletely literal. This meant that, depending on locale, a regex could take on a completely different semantic due to the fact that it could include "]", "?", et c. At times, this could cause a segmentation fault if a nonsensical generated r egex was especially juicy. I took out the code which localized punctuation, although the code which localiz es letters is still there.
ff2fe9ad · Chris McDonough · eb9f1477 · ff2fe9ad
Commit ff2fe9ad authored Jun 23, 2001 by Chris McDonough
Hide whitespace changes
Inline Side-by-side

Showing with 31 additions and 11 deletions

lib/python/StructuredText/ClassicDocumentClass.py lib/python/StructuredText/ClassicDocumentClass.py +31 -11

No files found.
--- a/lib/python/StructuredText/ClassicDocumentClass.py
+++ b/lib/python/StructuredText/ClassicDocumentClass.py
@@ -207,11 +207,10 @@ class StructuredTextRow(ST.StructuredTextDocument):
        """
        row is a list of tuples, where each tuple is
        the raw text for a cell/column and the span
-        of that cell/column". 
+        of that cell/column. 
        EX 
        [('this is column one',1), ('this is column two',1)]
        """
        apply(ST.StructuredTextDocument.__init__,(self,[]),kw)
        self._columns = []
        for column in row:            
@@ -584,7 +583,7 @@ class DocumentClass:
    def doc_emphasize(
        self, s,
-        expr = re.compile('\s*\*([ \n%s0-9]+)\*(?!\*|-)' % lettpunc).search
+        expr = re.compile('\s*\*([ \n%s0-9.:/;,\'\"\?\=\-\>\<\(\)]+)\*(?!\*|-)' % letters).search
        ):
        r=expr(s)
@@ -632,7 +631,7 @@ class DocumentClass:
    def doc_underline(self,
                      s,
-                      expr=re.compile("\s+\_([0-9%s ]+)\_" % lettpunc).search):
+                      expr=re.compile("\_([%s0-9\s\.,\?\/]+)\_" % letters).search):
        result = expr(s)
        if result:
@@ -644,7 +643,7 @@ class DocumentClass:
    def doc_strong(self, 
                   s,
-        expr = re.compile('\s*\*\*([ \n%s0-9]+)\*\*' % lettpunc).search
+        expr = re.compile('\s*\*\*([ \n%s0-9.:/;\-,!\?\'\"]+)\*\*' % letters).search
        ):
        r=expr(s)
@@ -655,10 +654,11 @@ class DocumentClass:
           return None
    def doc_href(
        self, s,
        expr1 = re.compile("(\"[ %s0-9\n\-\.\,\;\(\)\/\:\/\*\']+\")(:)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)([,]*\s*)" % letters).search,
-        expr2 = re.compile('(\"[ %s0-9\n\-\.\:\;\(\)\/\*\']+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)(\s*)' % letters).search):
+        expr2 = re.compile('(\"[ %s0-9\n\-\.\:\;\(\)\/\*\']+\")([,]+\s+)([a-zA-Z0-9\@\.\,\?\!\/\:\;\-\#\~]+)(\s*)' % letters).search,
+        punctuation = re.compile("[\,\.\?\!\;]+").match
+        ):
        r=expr1(s) or expr2(s)
@@ -669,16 +669,36 @@ class DocumentClass:
            start,e = r.span(1)
            name    = s[start:e]
            name    = replace(name,'"','',2)
+            #start   = start + 1
            st,end   = r.span(3)
+            if punctuation(s[end-1:end]):
-            if s[end-1:end] in punctuations: end-=1
+                end = end -1
            link    = s[st:end]
+            #end     = end - 1                        
            # name is the href title, link is the target
            # of the href
            return (StructuredTextLink(name, href=link),
                    start, end)
+            #return (StructuredTextLink(s[start:end], href=s[start:end]),
+            #        start, end)
        else:
            return None