Commit c304552d authored by Marc-André Lemburg's avatar Marc-André Lemburg

Removing UTF-16 aware Unicode comparison code. This kind of compare

function (together with other locale aware ones) should into a new collation
support module. See python-dev for a discussion of this removal.

Note: This patch should also be applied to the 1.6 branch.
parent 3165d488
test_unicode test_unicode
Testing Unicode comparisons... done. Testing Unicode comparisons... done.
Testing UTF-16 code point order comparisons... done.
Testing Unicode contains method... done. Testing Unicode contains method... done.
Testing Unicode formatting strings... done. Testing Unicode formatting strings... done.
Testing builtin codecs... done. Testing builtin codecs... done.
......
...@@ -168,17 +168,20 @@ assert 'abc' < u'abcd' ...@@ -168,17 +168,20 @@ assert 'abc' < u'abcd'
assert u'abc' < u'abcd' assert u'abc' < u'abcd'
print 'done.' print 'done.'
print 'Testing UTF-16 code point order comparisons...', if 0:
#No surrogates, no fixup required. # Move these tests to a Unicode collation module test...
assert u'\u0061' < u'\u20ac'
# Non surrogate below surrogate value, no fixup required print 'Testing UTF-16 code point order comparisons...',
assert u'\u0061' < u'\ud800\udc02' #No surrogates, no fixup required.
assert u'\u0061' < u'\u20ac'
# Non surrogate above surrogate value, fixup required # Non surrogate below surrogate value, no fixup required
def test_lecmp(s, s2): assert u'\u0061' < u'\ud800\udc02'
# Non surrogate above surrogate value, fixup required
def test_lecmp(s, s2):
assert s < s2 , "comparison failed on %s < %s" % (s, s2) assert s < s2 , "comparison failed on %s < %s" % (s, s2)
def test_fixup(s): def test_fixup(s):
s2 = u'\ud800\udc01' s2 = u'\ud800\udc01'
test_lecmp(s, s2) test_lecmp(s, s2)
s2 = u'\ud900\udc01' s2 = u'\ud900\udc01'
...@@ -212,12 +215,12 @@ def test_fixup(s): ...@@ -212,12 +215,12 @@ def test_fixup(s):
s2 = u'\udb00\udfff' s2 = u'\udb00\udfff'
test_lecmp(s, s2) test_lecmp(s, s2)
test_fixup(u'\ue000') test_fixup(u'\ue000')
test_fixup(u'\uff61') test_fixup(u'\uff61')
# Surrogates on both sides, no fixup required # Surrogates on both sides, no fixup required
assert u'\ud800\udc02' < u'\ud84d\udc56' assert u'\ud800\udc02' < u'\ud84d\udc56'
print 'done.' print 'done.'
test('ljust', u'abc', u'abc ', 10) test('ljust', u'abc', u'abc ', 10)
test('rjust', u'abc', u' abc', 10) test('rjust', u'abc', u' abc', 10)
......
...@@ -3169,6 +3169,12 @@ unicode_center(PyUnicodeObject *self, PyObject *args) ...@@ -3169,6 +3169,12 @@ unicode_center(PyUnicodeObject *self, PyObject *args)
return (PyObject*) pad(self, left, marg - left, ' '); return (PyObject*) pad(self, left, marg - left, ' ');
} }
#if 0
/* This code should go into some future Unicode collation support
module. The basic comparison should compare ordinals on a naive
basis (this is what Java does and thus JPython too).
/* speedy UTF-16 code point order comparison */ /* speedy UTF-16 code point order comparison */
/* gleaned from: */ /* gleaned from: */
/* http://www-4.ibm.com/software/developer/library/utf16.html?dwzone=unicode */ /* http://www-4.ibm.com/software/developer/library/utf16.html?dwzone=unicode */
...@@ -3213,6 +3219,33 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2) ...@@ -3213,6 +3219,33 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
return (len1 < len2) ? -1 : (len1 != len2); return (len1 < len2) ? -1 : (len1 != len2);
} }
#else
static int
unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
{
register int len1, len2;
Py_UNICODE *s1 = str1->str;
Py_UNICODE *s2 = str2->str;
len1 = str1->length;
len2 = str2->length;
while (len1 > 0 && len2 > 0) {
register long diff;
diff = (long)*s1++ - (long)*s2++;
if (diff)
return (diff < 0) ? -1 : (diff != 0);
len1--; len2--;
}
return (len1 < len2) ? -1 : (len1 != len2);
}
#endif
int PyUnicode_Compare(PyObject *left, int PyUnicode_Compare(PyObject *left,
PyObject *right) PyObject *right)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment