Commit 817918cc authored by Barry Warsaw's avatar Barry Warsaw

Committing patch #591250 which provides "str1 in str2" when str1 is a

string of longer than 1 character.
parent b57089cd
......@@ -432,15 +432,15 @@ This table lists the sequence operations sorted in ascending priority
and \var{j} are integers:
\begin{tableiii}{c|l|c}{code}{Operation}{Result}{Notes}
\lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{}
\lineiii{\var{x} in \var{s}}{\code{1} if an item of \var{s} is equal to \var{x}, else \code{0}}{(1)}
\lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is
equal to \var{x}, else \code{1}}{}
equal to \var{x}, else \code{1}}{(1)}
\hline
\lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{}
\lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(1)}
\lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(2)}
\hline
\lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(2)}
\lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(2), (3)}
\lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(3)}
\lineiii{\var{s}[\var{i}:\var{j}]}{slice of \var{s} from \var{i} to \var{j}}{(3), (4)}
\hline
\lineiii{len(\var{s})}{length of \var{s}}{}
\lineiii{min(\var{s})}{smallest item of \var{s}}{}
......@@ -461,7 +461,12 @@ equal to \var{x}, else \code{1}}{}
Notes:
\begin{description}
\item[(1)] Values of \var{n} less than \code{0} are treated as
\item[(1)] When \var{s} is a string or Unicode string object the
\code{in} and \code{not in} operations act like a substring test. In
Python versions before 2.3, \var{x} had to be a string of length 1.
In Python 2.3 and beyond, \var{x} may be a string of any length.
\item[(2)] Values of \var{n} less than \code{0} are treated as
\code{0} (which yields an empty sequence of the same type as
\var{s}). Note also that the copies are shallow; nested structures
are not copied. This often haunts new Python programmers; consider:
......@@ -489,12 +494,12 @@ Notes:
[[3], [5], [7]]
\end{verbatim}
\item[(2)] If \var{i} or \var{j} is negative, the index is relative to
\item[(3)] If \var{i} or \var{j} is negative, the index is relative to
the end of the string: \code{len(\var{s}) + \var{i}} or
\code{len(\var{s}) + \var{j}} is substituted. But note that \code{-0} is
still \code{0}.
\item[(3)] The slice of \var{s} from \var{i} to \var{j} is defined as
\item[(4)] The slice of \var{s} from \var{i} to \var{j} is defined as
the sequence of items with index \var{k} such that \code{\var{i} <=
\var{k} < \var{j}}. If \var{i} or \var{j} is greater than
\code{len(\var{s})}, use \code{len(\var{s})}. If \var{i} is omitted,
......
"""Common tests shared by test_string and test_userstring"""
import string
from test.test_support import verify, verbose, TestFailed, have_unicode
from test.test_support import verify, vereq, verbose, TestFailed, have_unicode
transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377'
......@@ -295,3 +295,23 @@ def run_method_tests(test):
data = 'x\x9c\xcbH\xcd\xc9\xc9W(\xcf/\xcaI\x01\x00\x1a\x0b\x04]'
verify('hello world'.encode('zlib') == data)
verify(data.decode('zlib') == 'hello world')
def test_exception(lhs, rhs, msg):
try:
lhs in rhs
except TypeError:
pass
else:
raise TestFailed, msg
def run_contains_tests(test):
vereq('' in '', True)
vereq('' in 'abc', True)
vereq('\0' in 'abc', False)
vereq('\0' in '\0abc', True)
vereq('\0' in 'abc\0', True)
vereq('a' in '\0abc', True)
vereq('asdf' in 'asdf', True)
vereq('asdf' in 'asd', False)
vereq('asdf' in '', False)
......@@ -45,17 +45,8 @@ except TypeError:
check('c' in 'abc', "'c' not in 'abc'")
check('d' not in 'abc', "'d' in 'abc'")
try:
'' in 'abc'
check(0, "'' in 'abc' did not raise error")
except TypeError:
pass
try:
'ab' in 'abc'
check(0, "'ab' in 'abc' did not raise error")
except TypeError:
pass
check('' in '', "'' not in ''")
check('' in 'abc', "'' not in 'abc'")
try:
None in 'abc'
......@@ -71,17 +62,12 @@ if have_unicode:
check('c' in unicode('abc'), "'c' not in u'abc'")
check('d' not in unicode('abc'), "'d' in u'abc'")
try:
'' in unicode('abc')
check(0, "'' in u'abc' did not raise error")
except TypeError:
pass
try:
'ab' in unicode('abc')
check(0, "'ab' in u'abc' did not raise error")
except TypeError:
pass
check('' in unicode(''), "'' not in u''")
check(unicode('') in '', "u'' not in ''")
check(unicode('') in unicode(''), "u'' not in u''")
check('' in unicode('abc'), "'' not in u'abc'")
check(unicode('') in 'abc', "u'' not in 'abc'")
check(unicode('') in unicode('abc'), "u'' not in u'abc'")
try:
None in unicode('abc')
......@@ -94,35 +80,11 @@ if have_unicode:
check(unicode('c') in unicode('abc'), "u'c' not in u'abc'")
check(unicode('d') not in unicode('abc'), "u'd' in u'abc'")
try:
unicode('') in unicode('abc')
check(0, "u'' in u'abc' did not raise error")
except TypeError:
pass
try:
unicode('ab') in unicode('abc')
check(0, "u'ab' in u'abc' did not raise error")
except TypeError:
pass
# Test Unicode char in string
check(unicode('c') in 'abc', "u'c' not in 'abc'")
check(unicode('d') not in 'abc', "u'd' in 'abc'")
try:
unicode('') in 'abc'
check(0, "u'' in 'abc' did not raise error")
except TypeError:
pass
try:
unicode('ab') in 'abc'
check(0, "u'ab' in 'abc' did not raise error")
except TypeError:
pass
# A collection of tests on builtin sequence types
a = range(10)
for i in a:
......
......@@ -51,6 +51,7 @@ def test(name, input, output, *args):
string_tests.run_module_tests(test)
string_tests.run_method_tests(test)
string_tests.run_contains_tests(test)
string.whitespace
string.lowercase
......
......@@ -6,7 +6,7 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
from test.test_support import verify, verbose, TestFailed
from test.test_support import verify, vereq, verbose, TestFailed
import sys, string
if not sys.platform.startswith('java'):
......@@ -396,23 +396,23 @@ test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c
# Contains:
print 'Testing Unicode contains method...',
verify(('a' in u'abdb') == 1)
verify(('a' in u'bdab') == 1)
verify(('a' in u'bdaba') == 1)
verify(('a' in u'bdba') == 1)
verify(('a' in u'bdba') == 1)
verify((u'a' in u'bdba') == 1)
verify((u'a' in u'bdb') == 0)
verify((u'a' in 'bdb') == 0)
verify((u'a' in 'bdba') == 1)
verify((u'a' in ('a',1,None)) == 1)
verify((u'a' in (1,None,'a')) == 1)
verify((u'a' in (1,None,u'a')) == 1)
verify(('a' in ('a',1,None)) == 1)
verify(('a' in (1,None,'a')) == 1)
verify(('a' in (1,None,u'a')) == 1)
verify(('a' in ('x',1,u'y')) == 0)
verify(('a' in ('x',1,None)) == 0)
vereq(('a' in u'abdb'), True)
vereq(('a' in u'bdab'), True)
vereq(('a' in u'bdaba'), True)
vereq(('a' in u'bdba'), True)
vereq(('a' in u'bdba'), True)
vereq((u'a' in u'bdba'), True)
vereq((u'a' in u'bdb'), False)
vereq((u'a' in 'bdb'), False)
vereq((u'a' in 'bdba'), True)
vereq((u'a' in ('a',1,None)), True)
vereq((u'a' in (1,None,'a')), True)
vereq((u'a' in (1,None,u'a')), True)
vereq(('a' in ('a',1,None)), True)
vereq(('a' in (1,None,'a')), True)
vereq(('a' in (1,None,u'a')), True)
vereq(('a' in ('x',1,u'y')), False)
vereq(('a' in ('x',1,None)), False)
print 'done.'
# Formatting:
......@@ -758,3 +758,42 @@ print u'abc\n',
print u'def\n'
print u'def\n'
print 'done.'
def test_exception(lhs, rhs, msg):
try:
lhs in rhs
except TypeError:
pass
else:
raise TestFailed, msg
def run_contains_tests():
vereq(u'' in '', True)
vereq('' in u'', True)
vereq(u'' in u'', True)
vereq(u'' in 'abc', True)
vereq('' in u'abc', True)
vereq(u'' in u'abc', True)
vereq(u'\0' in 'abc', False)
vereq('\0' in u'abc', False)
vereq(u'\0' in u'abc', False)
vereq(u'\0' in '\0abc', True)
vereq('\0' in u'\0abc', True)
vereq(u'\0' in u'\0abc', True)
vereq(u'\0' in 'abc\0', True)
vereq('\0' in u'abc\0', True)
vereq(u'\0' in u'abc\0', True)
vereq(u'a' in '\0abc', True)
vereq('a' in u'\0abc', True)
vereq(u'a' in u'\0abc', True)
vereq(u'asdf' in 'asdf', True)
vereq('asdf' in u'asdf', True)
vereq(u'asdf' in u'asdf', True)
vereq(u'asdf' in 'asd', False)
vereq('asdf' in u'asd', False)
vereq(u'asdf' in u'asd', False)
vereq(u'asdf' in '', False)
vereq('asdf' in u'', False)
vereq(u'asdf' in u'', False)
run_contains_tests()
......@@ -41,3 +41,4 @@ def test(methodname, input, output, *args):
print (methodname, input, output, args, res[0], res[1], res[2])
string_tests.run_method_tests(test)
string_tests.run_contains_tests(test)
......@@ -803,24 +803,31 @@ string_slice(register PyStringObject *a, register int i, register int j)
static int
string_contains(PyObject *a, PyObject *el)
{
register char *s, *end;
register char c;
const char *lhs, *rhs, *end;
int size;
#ifdef Py_USING_UNICODE
if (PyUnicode_Check(el))
return PyUnicode_Contains(a, el);
#endif
if (!PyString_Check(el) || PyString_Size(el) != 1) {
if (!PyString_Check(el)) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires character as left operand");
"'in <string>' requires string as left operand");
return -1;
}
c = PyString_AsString(el)[0];
s = PyString_AsString(a);
end = s + PyString_Size(a);
while (s < end) {
if (c == *s++)
size = PyString_Size(el);
rhs = PyString_AS_STRING(el);
lhs = PyString_AS_STRING(a);
/* optimize for a single character */
if (size == 1)
return memchr(lhs, *rhs, PyString_Size(a)) != NULL;
end = lhs + (PyString_Size(a) - size);
while (lhs <= end) {
if (memcmp(lhs++, rhs, size) == 0)
return 1;
}
return 0;
}
......
......@@ -3732,15 +3732,14 @@ int PyUnicode_Contains(PyObject *container,
PyObject *element)
{
PyUnicodeObject *u = NULL, *v = NULL;
int result;
register const Py_UNICODE *p, *e;
register Py_UNICODE ch;
int result, size;
register const Py_UNICODE *lhs, *end, *rhs;
/* Coerce the two arguments */
v = (PyUnicodeObject *)PyUnicode_FromObject(element);
if (v == NULL) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires character as left operand");
"'in <string>' requires string as left operand");
goto onError;
}
u = (PyUnicodeObject *)PyUnicode_FromObject(container);
......@@ -3749,20 +3748,27 @@ int PyUnicode_Contains(PyObject *container,
goto onError;
}
/* Check v in u */
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"'in <string>' requires character as left operand");
goto onError;
}
ch = *PyUnicode_AS_UNICODE(v);
p = PyUnicode_AS_UNICODE(u);
e = p + PyUnicode_GET_SIZE(u);
size = PyUnicode_GET_SIZE(v);
rhs = PyUnicode_AS_UNICODE(v);
lhs = PyUnicode_AS_UNICODE(u);
result = 0;
while (p < e) {
if (*p++ == ch) {
result = 1;
break;
if (size == 1) {
end = lhs + PyUnicode_GET_SIZE(u);
while (lhs < end) {
if (*lhs++ == *rhs) {
result = 1;
break;
}
}
}
else {
end = lhs + (PyUnicode_GET_SIZE(u) - size);
while (lhs <= end) {
if (memcmp(lhs++, rhs, size) == 0) {
result = 1;
break;
}
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment