Commit b29008c9 authored by Chris McDonough's avatar Chris McDonough

A number of small changes and enhancements to the detection of symbols and gestures:

 - All regexes attempt to now detect \r as well as \n as a newline character.

 - Fixed improper regex for emphasis which caused emphasized text to be rendered as strong.

 - Fixed href method to not return false positives for "comma", link:types.  In many cases, inappropriate things were being detected as comma-link-type hrefs.  Now only things with (http|https|ftp|mailto|file|about) as a leader are used as comma-link-types.

 - Changed ordered list types to not treat small words with following punctuation at the beginning of a paragraph (e.g. "Yes.") as an ordered list gesture.  Instead, only single characters (or any-length digits) followed by punctuation will be treated as an ordered list gesture.

- Removed unused regular expression from doc_header signature.

- Changed doc_literal, doc_emphasis, and doc_strong methods to
  work across newlines.  This means that people can start to type
  for example, a literal 'on one line and it should be ok to
  ... cross to the next'.  The danger here is that it will match
  too much (especially in the case of literals), but this is
  ameliorated by the fact that (at least for literals) the
  ending "'" is required to be followed by some kind of whitespace.
  Emphasis and strong don't have this limitation, but their symbology
  (*) is much less frequently used in normal writing.
parent f44a849a
...@@ -85,7 +85,8 @@ ...@@ -85,7 +85,8 @@
import re, ST, STDOM import re, ST, STDOM
from string import split, join, replace, expandtabs, strip, find, rstrip from string import split, join, replace, expandtabs, strip, find, rstrip
from STletters import letters from STletters import letters, digits, literal_punc, under_punc,\
strongem_punc, phrase_delimiters
StringType=type('') StringType=type('')
ListType=type([]) ListType=type([])
...@@ -364,7 +365,7 @@ class DocumentClass: ...@@ -364,7 +365,7 @@ class DocumentClass:
#'doc_inner_link', #'doc_inner_link',
#'doc_named_link', #'doc_named_link',
#'doc_underline', #'doc_underline'
text_types = [ text_types = [
'doc_sgml', 'doc_sgml',
'doc_href', 'doc_href',
...@@ -790,7 +791,7 @@ class DocumentClass: ...@@ -790,7 +791,7 @@ class DocumentClass:
def doc_numbered( def doc_numbered(
self, paragraph, self, paragraph,
expr = re.compile(r'(\s*[%s]+\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match): expr = re.compile(r'(\s*[%s]\.)|(\s*[0-9]+\.)|(\s*[0-9]+\s+)' % letters).match):
# This is the old expression. It had a nasty habit # This is the old expression. It had a nasty habit
# of grabbing paragraphs that began with a single # of grabbing paragraphs that began with a single
...@@ -838,8 +839,7 @@ class DocumentClass: ...@@ -838,8 +839,7 @@ class DocumentClass:
indent=paragraph.indent, indent=paragraph.indent,
delim=d) delim=d)
def doc_header(self, paragraph, def doc_header(self, paragraph):
expr=re.compile(r'[ %s0-9.:/,-_*<>\?\'\"]+' % letters).match):
subs=paragraph.getSubparagraphs() subs=paragraph.getSubparagraphs()
if not subs: return None if not subs: return None
top=paragraph.getColorizableTexts()[0] top=paragraph.getColorizableTexts()[0]
...@@ -858,11 +858,14 @@ class DocumentClass: ...@@ -858,11 +858,14 @@ class DocumentClass:
def doc_literal( def doc_literal(
self, s, self, s,
expr=re.compile( expr = re.compile(r"'([%s%s%s\s]+)'[%s]+" % (letters, digits, literal_punc, phrase_delimiters)).search):
r"(?:\s|^)'" # open
r"([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n']*[^ \t\n\r\f\v'])" # contents # old expr... failed to cross newlines.
r"'(?:\s|[,.;:!?]|$)" # close # expr=re.compile(
).search): # r"(?:\s|^)'" # open
# r"([^ \t\n\r\f\v']|[^ \t\n\r\f\v'][^\n\r']*[^ \t\n\r\f\v'])" # contents
# r"'(?:\s|[,.;:!?]|$)" # close
# ).search):
r=expr(s) r=expr(s)
if r: if r:
...@@ -873,7 +876,8 @@ class DocumentClass: ...@@ -873,7 +876,8 @@ class DocumentClass:
def doc_emphasize( def doc_emphasize(
self, s, self, s,
expr = re.compile(r'\s*\*([ \n%s0-9.:/;,\'\"\?\-\_\/\=\-\>\<\(\)]+)\*(?!\*|-)' % letters).search expr = re.compile(r'\*([%s%s%s\s]+?)\*' % (letters, digits, strongem_punc)).search
#expr = re.compile(r'\s*\*([ \n\r%s0-9.:/;,\'\"\?\-\_\/\=\-\>\<\(\)]+)\*(?!\*|-)' % letters).search # old expr, inconsistent punctuation
): ):
r=expr(s) r=expr(s)
...@@ -886,7 +890,7 @@ class DocumentClass: ...@@ -886,7 +890,7 @@ class DocumentClass:
def doc_inner_link(self, def doc_inner_link(self,
s, s,
expr1 = re.compile(r"\.\.\s*").search, expr1 = re.compile(r"\.\.\s*").search,
expr2 = re.compile(r"\[[%s0-9]+\]" % letters ).search): expr2 = re.compile(r"\[[%s%s]+\]" % (letters, digits) ).search):
# make sure we dont grab a named link # make sure we dont grab a named link
if expr2(s) and expr1(s): if expr2(s) and expr1(s):
...@@ -920,9 +924,12 @@ class DocumentClass: ...@@ -920,9 +924,12 @@ class DocumentClass:
def doc_underline(self, def doc_underline(self,
s, s,
expr=re.compile(r"\_([%s0-9\s\.,\?]+)\_" % letters).search): #expr=re.compile(r"\_([a-zA-Z0-9\s\.,\?]+)\_").search, # old expr, inconsistent punc, failed to cross newlines
expr=re.compile(r'_([%s%s%s\s]+)_' % (letters, digits, under_punc)).search):
result = expr(s) result = expr(s)
if result: if result:
if result.group(1)[:1] == '_':
return None # no double unders
start,end = result.span(1) start,end = result.span(1)
st,e = result.span() st,e = result.span()
return (StructuredTextUnderline(s[start:end]),st,e) return (StructuredTextUnderline(s[start:end]),st,e)
...@@ -931,7 +938,8 @@ class DocumentClass: ...@@ -931,7 +938,8 @@ class DocumentClass:
def doc_strong(self, def doc_strong(self,
s, s,
expr = re.compile(r'\s*\*([ \n%s0-9.:/;,\'\"\?\-\_\/\=\-\>\<\(\)]+)\*(?!\*|-)' % letters).search expr = re.compile(r'\*\*([%s%s%s\s]+?)\*\*' % (letters, digits, strongem_punc)).search
#expr = re.compile(r'\s*\*\*([ \n\r%s0-9.:/;,\'\"\?\-\_\/\=\-\>\<\(\)]+)\*\*(?!\*|-)' % letters).search, # old expr, inconsistent punc, failed to cross newlines.
): ):
r=expr(s) r=expr(s)
...@@ -942,8 +950,8 @@ class DocumentClass: ...@@ -942,8 +950,8 @@ class DocumentClass:
return None return None
## Some constants to make the doc_href() regex easier to read. ## Some constants to make the doc_href() regex easier to read.
_DQUOTEDTEXT = r'("[ %s0-9\n\-\.\,\;\(\)\/\:\/\*\']+")' % letters ## double quoted text _DQUOTEDTEXT = r'("[ %s0-9\n\r\-\.\,\;\(\)\/\:\/\*\']+")' % letters ## double quoted text
_URL_AND_PUNC = r'([%s0-9_\@\.\,\?\!\/\:\;\-\#\~]+)' % letters _URL_AND_PUNC = r'((http|https|ftp|mailto|file|about)[:/]+?[%s0-9_\@\.\,\?\!\/\:\;\-\#\~]+)' % letters
_SPACES = r'(\s*)' _SPACES = r'(\s*)'
def doc_href(self, s, def doc_href(self, s,
...@@ -989,7 +997,7 @@ class DocumentClass: ...@@ -989,7 +997,7 @@ class DocumentClass:
def doc_xref(self, s, def doc_xref(self, s,
expr = re.compile('\[([%s0-9\-.:/;,\n\~]+)\]' % letters).search expr = re.compile('\[([%s0-9\-.:/;,\n\r\~]+)\]' % letters).search
): ):
r = expr(s) r = expr(s)
if r: if r:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment