Now re tests work again on Unicode-disabled build.

Simplified existing detections of Unicode-disabled build.

Now re tests work again on Unicode-disabled build.
Simplified existing detections of Unicode-disabled build.
7644ff1f · Serhiy Storchaka · ed5ea15b · 7644ff1f · 7644ff1f
Commit 7644ff1f authored Sep 14, 2014 by Serhiy Storchaka
Hide whitespace changes
Inline Side-by-side

Showing with 53 additions and 48 deletions

Lib/test/re_tests.py Lib/test/re_tests.py +1 -1

Lib/test/test_re.py Lib/test/test_re.py +52 -47

No files found.
--- a/Lib/test/re_tests.py
+++ b/Lib/test/re_tests.py
@@ -663,7 +663,7 @@ xyzabc

 try:
    u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
-except SyntaxError:
+except (SyntaxError, ValueError):
    pass
 else:
    tests.extend([

--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
 from test.test_support import verbose, run_unittest, import_module
 from test.test_support import precisionbigmemtest, _2G, cpython_only
-from test.test_support import captured_stdout
+from test.test_support import captured_stdout, have_unicode, requires_unicode, u
 import re
 from re import Scanner
 import sre_constants
@@ -86,6 +86,7 @@ class ReTests(unittest.TestCase):
        self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
                         'abc\ndef\n')

+    @requires_unicode
    def test_bug_1140(self):
        # re.sub(x, y, u'') should return u'', not '', and
        # re.sub(x, y, '') should return '', not u''.
@@ -376,10 +377,11 @@ class ReTests(unittest.TestCase):
                                   "abcd abc bcd bx", re.LOCALE).group(1), "bx")
        self.assertEqual(re.search(r"\B(b.)\B",
                                   "abc bcd bc abxd", re.LOCALE).group(1), "bx")
-        self.assertEqual(re.search(r"\b(b.)\b",
-                                   "abcd abc bcd bx", re.UNICODE).group(1), "bx")
-        self.assertEqual(re.search(r"\B(b.)\B",
-                                   "abc bcd bc abxd", re.UNICODE).group(1), "bx")
+        if have_unicode:
+            self.assertEqual(re.search(r"\b(b.)\b",
+                                       "abcd abc bcd bx", re.UNICODE).group(1), "bx")
+            self.assertEqual(re.search(r"\B(b.)\B",
+                                       "abc bcd bc abxd", re.UNICODE).group(1), "bx")
        self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
        self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
        self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
@@ -394,8 +396,9 @@ class ReTests(unittest.TestCase):
                                   "1aa! a").group(0), "1aa! a")
        self.assertEqual(re.search(r"\d\D\w\W\s\S",
                                   "1aa! a", re.LOCALE).group(0), "1aa! a")
-        self.assertEqual(re.search(r"\d\D\w\W\s\S",
-                                   "1aa! a", re.UNICODE).group(0), "1aa! a")
+        if have_unicode:
+            self.assertEqual(re.search(r"\d\D\w\W\s\S",
+                                       "1aa! a", re.UNICODE).group(0), "1aa! a")

    def test_string_boundaries(self):
        # See http://bugs.python.org/issue10713
@@ -423,13 +426,14 @@ class ReTests(unittest.TestCase):
        # Can match around the whitespace.
        self.assertEqual(len(re.findall(r"\B", " ")), 2)

+    @requires_unicode
    def test_bigcharset(self):
-        self.assertEqual(re.match(u"([\u2222\u2223])",
-                                  u"\u2222").group(1), u"\u2222")
-        self.assertEqual(re.match(u"([\u2222\u2223])",
-                                  u"\u2222", re.UNICODE).group(1), u"\u2222")
+        self.assertEqual(re.match(u(r"([\u2222\u2223])"),
+                                  unichr(0x2222)).group(1), unichr(0x2222))
+        self.assertEqual(re.match(u(r"([\u2222\u2223])"),
+                                  unichr(0x2222), re.UNICODE).group(1), unichr(0x2222))
        r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
-        self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01")
+        self.assertEqual(re.match(r, unichr(0xff01), re.UNICODE).group(), unichr(0xff01))

    def test_big_codesize(self):
        # Issue #1160
@@ -476,7 +480,8 @@ class ReTests(unittest.TestCase):
        import _sre
        self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
        self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
-        self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
+        if have_unicode:
+            self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))

        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
@@ -503,8 +508,9 @@ class ReTests(unittest.TestCase):
        self.assertEqual(m.group(), match)
        self.assertEqual(m.span(), span)

+    @requires_unicode
    def test_re_escape(self):
-        alnum_chars = string.ascii_letters + string.digits
+        alnum_chars = unicode(string.ascii_letters + string.digits)
        p = u''.join(unichr(i) for i in range(256))
        for c in p:
            if c in alnum_chars:
@@ -517,7 +523,7 @@ class ReTests(unittest.TestCase):
        self.assertMatch(re.escape(p), p)

    def test_re_escape_byte(self):
-        alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
+        alnum_chars = string.ascii_letters + string.digits
        p = ''.join(chr(i) for i in range(256))
        for b in p:
            if b in alnum_chars:
@@ -529,20 +535,21 @@ class ReTests(unittest.TestCase):
            self.assertMatch(re.escape(b), b)
        self.assertMatch(re.escape(p), p)

+    @requires_unicode
    def test_re_escape_non_ascii(self):
-        s = u'xxx\u2620\u2620\u2620xxx'
+        s = u(r'xxx\u2620\u2620\u2620xxx')
        s_escaped = re.escape(s)
-        self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
+        self.assertEqual(s_escaped, u(r'xxx\\\u2620\\\u2620\\\u2620xxx'))
        self.assertMatch(s_escaped, s)
-        self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
-                         u'x\u2620\u2620\u2620x', (2, 7), re.search)
+        self.assertMatch(u'.%s+.' % re.escape(unichr(0x2620)), s,
+                         u(r'x\u2620\u2620\u2620x'), (2, 7), re.search)

    def test_re_escape_non_ascii_bytes(self):
-        b = u'y\u2620y\u2620y'.encode('utf-8')
+        b = b'y\xe2\x98\xa0y\xe2\x98\xa0y'
        b_escaped = re.escape(b)
        self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
        self.assertMatch(b_escaped, b)
-        res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
+        res = re.findall(re.escape(b'\xe2\x98\xa0'), b)
        self.assertEqual(len(res), 2)

    def test_pickling(self):
@@ -621,8 +628,9 @@ class ReTests(unittest.TestCase):
        # non-recursive scheme was implemented.
        self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)

+    @requires_unicode
    def test_bug_612074(self):
-        pat=u"["+re.escape(u"\u2039")+u"]"
+        pat=u"["+re.escape(unichr(0x2039))+u"]"
        self.assertEqual(re.compile(pat) and 1, 1)

    def test_stack_overflow(self):
@@ -696,12 +704,9 @@ class ReTests(unittest.TestCase):
        self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
                         ('a', None, None))

+    @requires_unicode
    def test_bug_764548(self):
        # bug 764548, re.compile() barfs on str/unicode subclasses
-        try:
-            unicode
-        except NameError:
-            self.skipTest('no problem if we have no unicode')
        class my_unicode(unicode): pass
        pat = re.compile(my_unicode("abc"))
        self.assertIsNone(pat.match("xyz"))
@@ -711,20 +716,14 @@ class ReTests(unittest.TestCase):
        self.assertEqual([item.group(0) for item in iter],
                         [":", "::", ":::"])

+    @requires_unicode
    def test_bug_926075(self):
-        try:
-            unicode
-        except NameError:
-            self.skipTest('no problem if we have no unicode')
        self.assertIsNot(re.compile('bug_926075'),
-                         re.compile(eval("u'bug_926075'")))
+                         re.compile(u'bug_926075'))

+    @requires_unicode
    def test_bug_931848(self):
-        try:
-            unicode
-        except NameError:
-            self.skipTest('no problem if we have no unicode')
-        pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
+        pattern = u(r"[\u002E\u3002\uFF0E\uFF61]")
        self.assertEqual(re.compile(pattern).split("a.b.c"),
                         ['a','b','c'])

@@ -743,23 +742,24 @@ class ReTests(unittest.TestCase):
        self.assertEqual(iter.next().span(), (4, 4))
        self.assertRaises(StopIteration, iter.next)

+    @requires_unicode
    def test_bug_6561(self):
        # '\d' should match characters in Unicode category 'Nd'
        # (Number, Decimal Digit), but not those in 'Nl' (Number,
        # Letter) or 'No' (Number, Other).
        decimal_digits = [
-            u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
-            u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
-            u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
+            unichr(0x0037), # '\N{DIGIT SEVEN}', category 'Nd'
+            unichr(0x0e58), # '\N{THAI DIGIT SIX}', category 'Nd'
+            unichr(0xff10), # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
            ]
        for x in decimal_digits:
            self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)

        not_decimal_digits = [
-            u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
-            u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
-            u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
-            u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
+            unichr(0x2165), # '\N{ROMAN NUMERAL SIX}', category 'Nl'
+            unichr(0x3039), # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
+            unichr(0x2082), # '\N{SUBSCRIPT TWO}', category 'No'
+            unichr(0x32b4), # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
            ]
        for x in not_decimal_digits:
            self.assertIsNone(re.match('^\d$', x, re.UNICODE))
@@ -767,11 +767,15 @@ class ReTests(unittest.TestCase):
    def test_empty_array(self):
        # SF buf 1647541
        import array
-        for typecode in 'cbBuhHiIlLfd':
+        typecodes = 'cbBhHiIlLfd'
+        if have_unicode:
+            typecodes += 'u'
+        for typecode in typecodes:
            a = array.array(typecode)
            self.assertIsNone(re.compile("bla").match(a))
            self.assertEqual(re.compile("").match(a).groups(), ())

+    @requires_unicode
    def test_inline_flags(self):
        # Bug #1700
        upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
@@ -906,9 +910,10 @@ class ReTests(unittest.TestCase):
                pattern = '.' + reps + mod + 'yz'
                self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
                                 ['xyz'], msg=pattern)
-                pattern = pattern.encode()
-                self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
-                                 [b'xyz'], msg=pattern)
+                if have_unicode:
+                    pattern = unicode(pattern)
+                    self.assertEqual(re.compile(pattern, re.S).findall(u'xyz'),
+                                     [u'xyz'], msg=pattern)


    def test_bug_2537(self):