Commit 7644ff1f authored by Serhiy Storchaka's avatar Serhiy Storchaka

Now re tests work again on Unicode-disabled build.

Simplified existing detections of Unicode-disabled build.
parent ed5ea15b
......@@ -663,7 +663,7 @@ xyzabc
try:
u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
except SyntaxError:
except (SyntaxError, ValueError):
pass
else:
tests.extend([
......
from test.test_support import verbose, run_unittest, import_module
from test.test_support import precisionbigmemtest, _2G, cpython_only
from test.test_support import captured_stdout
from test.test_support import captured_stdout, have_unicode, requires_unicode, u
import re
from re import Scanner
import sre_constants
......@@ -86,6 +86,7 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
'abc\ndef\n')
@requires_unicode
def test_bug_1140(self):
# re.sub(x, y, u'') should return u'', not '', and
# re.sub(x, y, '') should return '', not u''.
......@@ -376,10 +377,11 @@ class ReTests(unittest.TestCase):
"abcd abc bcd bx", re.LOCALE).group(1), "bx")
self.assertEqual(re.search(r"\B(b.)\B",
"abc bcd bc abxd", re.LOCALE).group(1), "bx")
self.assertEqual(re.search(r"\b(b.)\b",
"abcd abc bcd bx", re.UNICODE).group(1), "bx")
self.assertEqual(re.search(r"\B(b.)\B",
"abc bcd bc abxd", re.UNICODE).group(1), "bx")
if have_unicode:
self.assertEqual(re.search(r"\b(b.)\b",
"abcd abc bcd bx", re.UNICODE).group(1), "bx")
self.assertEqual(re.search(r"\B(b.)\B",
"abc bcd bc abxd", re.UNICODE).group(1), "bx")
self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
......@@ -394,8 +396,9 @@ class ReTests(unittest.TestCase):
"1aa! a").group(0), "1aa! a")
self.assertEqual(re.search(r"\d\D\w\W\s\S",
"1aa! a", re.LOCALE).group(0), "1aa! a")
self.assertEqual(re.search(r"\d\D\w\W\s\S",
"1aa! a", re.UNICODE).group(0), "1aa! a")
if have_unicode:
self.assertEqual(re.search(r"\d\D\w\W\s\S",
"1aa! a", re.UNICODE).group(0), "1aa! a")
def test_string_boundaries(self):
# See http://bugs.python.org/issue10713
......@@ -423,13 +426,14 @@ class ReTests(unittest.TestCase):
# Can match around the whitespace.
self.assertEqual(len(re.findall(r"\B", " ")), 2)
@requires_unicode
def test_bigcharset(self):
self.assertEqual(re.match(u"([\u2222\u2223])",
u"\u2222").group(1), u"\u2222")
self.assertEqual(re.match(u"([\u2222\u2223])",
u"\u2222", re.UNICODE).group(1), u"\u2222")
self.assertEqual(re.match(u(r"([\u2222\u2223])"),
unichr(0x2222)).group(1), unichr(0x2222))
self.assertEqual(re.match(u(r"([\u2222\u2223])"),
unichr(0x2222), re.UNICODE).group(1), unichr(0x2222))
r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01")
self.assertEqual(re.match(r, unichr(0xff01), re.UNICODE).group(), unichr(0xff01))
def test_big_codesize(self):
# Issue #1160
......@@ -476,7 +480,8 @@ class ReTests(unittest.TestCase):
import _sre
self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
if have_unicode:
self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
......@@ -503,8 +508,9 @@ class ReTests(unittest.TestCase):
self.assertEqual(m.group(), match)
self.assertEqual(m.span(), span)
@requires_unicode
def test_re_escape(self):
alnum_chars = string.ascii_letters + string.digits
alnum_chars = unicode(string.ascii_letters + string.digits)
p = u''.join(unichr(i) for i in range(256))
for c in p:
if c in alnum_chars:
......@@ -517,7 +523,7 @@ class ReTests(unittest.TestCase):
self.assertMatch(re.escape(p), p)
def test_re_escape_byte(self):
alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
alnum_chars = string.ascii_letters + string.digits
p = ''.join(chr(i) for i in range(256))
for b in p:
if b in alnum_chars:
......@@ -529,20 +535,21 @@ class ReTests(unittest.TestCase):
self.assertMatch(re.escape(b), b)
self.assertMatch(re.escape(p), p)
@requires_unicode
def test_re_escape_non_ascii(self):
s = u'xxx\u2620\u2620\u2620xxx'
s = u(r'xxx\u2620\u2620\u2620xxx')
s_escaped = re.escape(s)
self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
self.assertEqual(s_escaped, u(r'xxx\\\u2620\\\u2620\\\u2620xxx'))
self.assertMatch(s_escaped, s)
self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
u'x\u2620\u2620\u2620x', (2, 7), re.search)
self.assertMatch(u'.%s+.' % re.escape(unichr(0x2620)), s,
u(r'x\u2620\u2620\u2620x'), (2, 7), re.search)
def test_re_escape_non_ascii_bytes(self):
b = u'y\u2620y\u2620y'.encode('utf-8')
b = b'y\xe2\x98\xa0y\xe2\x98\xa0y'
b_escaped = re.escape(b)
self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
self.assertMatch(b_escaped, b)
res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
res = re.findall(re.escape(b'\xe2\x98\xa0'), b)
self.assertEqual(len(res), 2)
def test_pickling(self):
......@@ -621,8 +628,9 @@ class ReTests(unittest.TestCase):
# non-recursive scheme was implemented.
self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
@requires_unicode
def test_bug_612074(self):
pat=u"["+re.escape(u"\u2039")+u"]"
pat=u"["+re.escape(unichr(0x2039))+u"]"
self.assertEqual(re.compile(pat) and 1, 1)
def test_stack_overflow(self):
......@@ -696,12 +704,9 @@ class ReTests(unittest.TestCase):
self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
('a', None, None))
@requires_unicode
def test_bug_764548(self):
# bug 764548, re.compile() barfs on str/unicode subclasses
try:
unicode
except NameError:
self.skipTest('no problem if we have no unicode')
class my_unicode(unicode): pass
pat = re.compile(my_unicode("abc"))
self.assertIsNone(pat.match("xyz"))
......@@ -711,20 +716,14 @@ class ReTests(unittest.TestCase):
self.assertEqual([item.group(0) for item in iter],
[":", "::", ":::"])
@requires_unicode
def test_bug_926075(self):
try:
unicode
except NameError:
self.skipTest('no problem if we have no unicode')
self.assertIsNot(re.compile('bug_926075'),
re.compile(eval("u'bug_926075'")))
re.compile(u'bug_926075'))
@requires_unicode
def test_bug_931848(self):
try:
unicode
except NameError:
self.skipTest('no problem if we have no unicode')
pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
pattern = u(r"[\u002E\u3002\uFF0E\uFF61]")
self.assertEqual(re.compile(pattern).split("a.b.c"),
['a','b','c'])
......@@ -743,23 +742,24 @@ class ReTests(unittest.TestCase):
self.assertEqual(iter.next().span(), (4, 4))
self.assertRaises(StopIteration, iter.next)
@requires_unicode
def test_bug_6561(self):
# '\d' should match characters in Unicode category 'Nd'
# (Number, Decimal Digit), but not those in 'Nl' (Number,
# Letter) or 'No' (Number, Other).
decimal_digits = [
u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
unichr(0x0037), # '\N{DIGIT SEVEN}', category 'Nd'
unichr(0x0e58), # '\N{THAI DIGIT SIX}', category 'Nd'
unichr(0xff10), # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
]
for x in decimal_digits:
self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
not_decimal_digits = [
u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
unichr(0x2165), # '\N{ROMAN NUMERAL SIX}', category 'Nl'
unichr(0x3039), # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
unichr(0x2082), # '\N{SUBSCRIPT TWO}', category 'No'
unichr(0x32b4), # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
]
for x in not_decimal_digits:
self.assertIsNone(re.match('^\d$', x, re.UNICODE))
......@@ -767,11 +767,15 @@ class ReTests(unittest.TestCase):
def test_empty_array(self):
# SF buf 1647541
import array
for typecode in 'cbBuhHiIlLfd':
typecodes = 'cbBhHiIlLfd'
if have_unicode:
typecodes += 'u'
for typecode in typecodes:
a = array.array(typecode)
self.assertIsNone(re.compile("bla").match(a))
self.assertEqual(re.compile("").match(a).groups(), ())
@requires_unicode
def test_inline_flags(self):
# Bug #1700
upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
......@@ -906,9 +910,10 @@ class ReTests(unittest.TestCase):
pattern = '.' + reps + mod + 'yz'
self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
['xyz'], msg=pattern)
pattern = pattern.encode()
self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
[b'xyz'], msg=pattern)
if have_unicode:
pattern = unicode(pattern)
self.assertEqual(re.compile(pattern, re.S).findall(u'xyz'),
[u'xyz'], msg=pattern)
def test_bug_2537(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment