Commit 66a796e5 authored by Georg Brandl's avatar Georg Brandl

Patch #1601678: move intern() to sys.intern().

parent 376446dd
...@@ -1262,17 +1262,3 @@ bypass these functions without concerns about missing something important. ...@@ -1262,17 +1262,3 @@ bypass these functions without concerns about missing something important.
argument). argument).
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{intern}{string}
Enter \var{string} in the table of ``interned'' strings and return
the interned string -- which is \var{string} itself or a copy.
Interning strings is useful to gain a little performance on
dictionary lookup -- if the keys in a dictionary are interned, and
the lookup key is interned, the key comparisons (after hashing) can
be done by a pointer compare instead of a string compare. Normally,
the names used in Python programs are automatically interned, and
the dictionaries used to hold module, class or instance attributes
have interned keys. \versionchanged[Interned strings are not
immortal (like they used to be in Python 2.2 and before);
you must keep a reference to the return value of \function{intern()}
around to benefit from it]{2.3}
\end{funcdesc}
...@@ -340,6 +340,21 @@ else: ...@@ -340,6 +340,21 @@ else:
\versionadded{1.5.2} \versionadded{1.5.2}
\end{datadesc} \end{datadesc}
\begin{funcdesc}{intern}{string}
Enter \var{string} in the table of ``interned'' strings and return
the interned string -- which is \var{string} itself or a copy.
Interning strings is useful to gain a little performance on
dictionary lookup -- if the keys in a dictionary are interned, and
the lookup key is interned, the key comparisons (after hashing) can
be done by a pointer compare instead of a string compare. Normally,
the names used in Python programs are automatically interned, and
the dictionaries used to hold module, class or instance attributes
have interned keys. \versionchanged[Interned strings are not
immortal (like they used to be in Python 2.2 and before);
you must keep a reference to the return value of \function{intern()}
around to benefit from it]{2.3}
\end{funcdesc}
\begin{datadesc}{last_type} \begin{datadesc}{last_type}
\dataline{last_value} \dataline{last_value}
\dataline{last_traceback} \dataline{last_traceback}
......
...@@ -2700,7 +2700,7 @@ standard module \module{__builtin__}\refbimodindex{__builtin__}: ...@@ -2700,7 +2700,7 @@ standard module \module{__builtin__}\refbimodindex{__builtin__}:
'complex', 'copyright', 'credits', 'delattr', 'dict', 'dir', 'divmod', 'complex', 'copyright', 'credits', 'delattr', 'dict', 'dir', 'divmod',
'enumerate', 'eval', 'exec', 'execfile', 'exit', 'file', 'filter', 'float', 'enumerate', 'eval', 'exec', 'execfile', 'exit', 'file', 'filter', 'float',
'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'help', 'hex', 'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'help', 'hex',
'id', 'int', 'intern', 'isinstance', 'issubclass', 'iter', 'id', 'int', 'isinstance', 'issubclass', 'iter',
'len', 'license', 'list', 'locals', 'long', 'map', 'max', 'min', 'len', 'license', 'list', 'locals', 'long', 'map', 'max', 'min',
'object', 'oct', 'open', 'ord', 'pow', 'property', 'quit', 'range', 'object', 'oct', 'open', 'ord', 'pow', 'property', 'quit', 'range',
'reload', 'repr', 'reversed', 'round', 'set', 'reload', 'repr', 'reversed', 'round', 'set',
......
...@@ -28,7 +28,7 @@ functions should be applied to nil objects. ...@@ -28,7 +28,7 @@ functions should be applied to nil objects.
Interning strings (ob_sstate) tries to ensure that only one string Interning strings (ob_sstate) tries to ensure that only one string
object with a given value exists, so equality tests can be one pointer object with a given value exists, so equality tests can be one pointer
comparison. This is generally restricted to strings that "look like" comparison. This is generally restricted to strings that "look like"
Python identifiers, although the intern() builtin can be used to force Python identifiers, although the sys.intern() function can be used to force
interning of any string. interning of any string.
Together, these sped the interpreter by up to 20%. */ Together, these sped the interpreter by up to 20%. */
......
...@@ -842,30 +842,6 @@ class BuiltinTest(unittest.TestCase): ...@@ -842,30 +842,6 @@ class BuiltinTest(unittest.TestCase):
self.assertEqual(int(Foo4()), 42L) self.assertEqual(int(Foo4()), 42L)
self.assertRaises(TypeError, int, Foo5()) self.assertRaises(TypeError, int, Foo5())
def test_intern(self):
self.assertRaises(TypeError, intern)
s = "never interned before"
self.assert_(intern(s) is s)
s2 = s.swapcase().swapcase()
self.assert_(intern(s2) is s)
# Subclasses of string can't be interned, because they
# provide too much opportunity for insane things to happen.
# We don't want them in the interned dict and if they aren't
# actually interned, we don't want to create the appearance
# that they are by allowing intern() to succeeed.
class S(str):
def __hash__(self):
return 123
self.assertRaises(TypeError, intern, S("abc"))
# It's still safe to pass these strings to routines that
# call intern internally, e.g. PyObject_SetAttr().
s = S("abc")
setattr(s, s, s)
self.assertEqual(getattr(s, s), s)
def test_iter(self): def test_iter(self):
self.assertRaises(TypeError, iter) self.assertRaises(TypeError, iter)
self.assertRaises(TypeError, iter, 42, 42) self.assertRaises(TypeError, iter, 42, 42)
......
...@@ -350,6 +350,31 @@ class SysModuleTest(unittest.TestCase): ...@@ -350,6 +350,31 @@ class SysModuleTest(unittest.TestCase):
# the test runs under regrtest. # the test runs under regrtest.
self.assert_(sys.__stdout__.encoding == sys.__stderr__.encoding) self.assert_(sys.__stdout__.encoding == sys.__stderr__.encoding)
def test_intern(self):
self.assertRaises(TypeError, sys.intern)
s = "never interned before"
self.assert_(sys.intern(s) is s)
s2 = s.swapcase().swapcase()
self.assert_(sys.intern(s2) is s)
# Subclasses of string can't be interned, because they
# provide too much opportunity for insane things to happen.
# We don't want them in the interned dict and if they aren't
# actually interned, we don't want to create the appearance
# that they are by allowing intern() to succeeed.
class S(str):
def __hash__(self):
return 123
self.assertRaises(TypeError, sys.intern, S("abc"))
# It's still safe to pass these strings to routines that
# call intern internally, e.g. PyObject_SetAttr().
s = S("abc")
setattr(s, s, s)
self.assertEqual(getattr(s, s), s)
def test_main(): def test_main():
test.test_support.run_unittest(SysModuleTest) test.test_support.run_unittest(SysModuleTest)
......
...@@ -12,7 +12,7 @@ What's New in Python 3000? ...@@ -12,7 +12,7 @@ What's New in Python 3000?
TO DO TO DO
----- -----
- See PEP 3000. - See PEP 3000, 3100.
- Test merging certain changes from the 2.5 HEAD code. - Test merging certain changes from the 2.5 HEAD code.
...@@ -36,7 +36,11 @@ TO DO ...@@ -36,7 +36,11 @@ TO DO
Core and Builtins Core and Builtins
----------------- -----------------
- Renamed nb_nonzero to nb_bool and __nonzero__ to __bool__ - Moved intern() to sys.intern().
- exec is now a function.
- Renamed nb_nonzero to nb_bool and __nonzero__ to __bool__.
- Classic classes are a thing of the past. All classes are new style. - Classic classes are a thing of the past. All classes are new style.
...@@ -90,7 +94,7 @@ Core and Builtins ...@@ -90,7 +94,7 @@ Core and Builtins
- zip returns an iterator - zip returns an iterator
- Additions: - Additions:
set literals set literals, ellipsis literal
Extension Modules Extension Modules
......
...@@ -382,7 +382,7 @@ support for features needed by `python-mode'.") ...@@ -382,7 +382,7 @@ support for features needed by `python-mode'.")
"delattr" "dict" "dir" "divmod" "delattr" "dict" "dir" "divmod"
"enumerate" "eval" "execfile" "exit" "file" "enumerate" "eval" "execfile" "exit" "file"
"filter" "float" "getattr" "globals" "hasattr" "filter" "float" "getattr" "globals" "hasattr"
"hash" "hex" "id" "int" "intern" "hash" "hex" "id" "int"
"isinstance" "issubclass" "iter" "len" "license" "isinstance" "issubclass" "iter" "len" "license"
"list" "locals" "long" "map" "max" "min" "object" "list" "locals" "long" "map" "max" "min" "object"
"oct" "open" "ord" "pow" "property" "range" "oct" "open" "ord" "pow" "property" "range"
......
...@@ -1118,31 +1118,6 @@ PyDoc_STRVAR(hex_doc, ...@@ -1118,31 +1118,6 @@ PyDoc_STRVAR(hex_doc,
Return the hexadecimal representation of an integer or long integer."); Return the hexadecimal representation of an integer or long integer.");
static PyObject *
builtin_intern(PyObject *self, PyObject *args)
{
PyObject *s;
if (!PyArg_ParseTuple(args, "S:intern", &s))
return NULL;
if (!PyString_CheckExact(s)) {
PyErr_SetString(PyExc_TypeError,
"can't intern subclass of string");
return NULL;
}
Py_INCREF(s);
PyString_InternInPlace(&s);
return s;
}
PyDoc_STRVAR(intern_doc,
"intern(string) -> string\n\
\n\
``Intern'' the given string. This enters the string in the (global)\n\
table of interned strings whose purpose is to speed up dictionary lookups.\n\
Return the string itself or the previously interned string object with the\n\
same value.");
static PyObject * static PyObject *
builtin_iter(PyObject *self, PyObject *args) builtin_iter(PyObject *self, PyObject *args)
{ {
...@@ -2069,7 +2044,6 @@ static PyMethodDef builtin_methods[] = { ...@@ -2069,7 +2044,6 @@ static PyMethodDef builtin_methods[] = {
{"hash", builtin_hash, METH_O, hash_doc}, {"hash", builtin_hash, METH_O, hash_doc},
{"hex", builtin_hex, METH_O, hex_doc}, {"hex", builtin_hex, METH_O, hex_doc},
{"id", builtin_id, METH_O, id_doc}, {"id", builtin_id, METH_O, id_doc},
{"intern", builtin_intern, METH_VARARGS, intern_doc},
{"isinstance", builtin_isinstance, METH_VARARGS, isinstance_doc}, {"isinstance", builtin_isinstance, METH_VARARGS, isinstance_doc},
{"issubclass", builtin_issubclass, METH_VARARGS, issubclass_doc}, {"issubclass", builtin_issubclass, METH_VARARGS, issubclass_doc},
{"iter", builtin_iter, METH_VARARGS, iter_doc}, {"iter", builtin_iter, METH_VARARGS, iter_doc},
......
...@@ -264,6 +264,32 @@ operating system filenames." ...@@ -264,6 +264,32 @@ operating system filenames."
#endif #endif
static PyObject *
sys_intern(PyObject *self, PyObject *args)
{
PyObject *s;
if (!PyArg_ParseTuple(args, "S:intern", &s))
return NULL;
if (!PyString_CheckExact(s)) {
PyErr_SetString(PyExc_TypeError,
"can't intern subclass of string");
return NULL;
}
Py_INCREF(s);
PyString_InternInPlace(&s);
return s;
}
PyDoc_STRVAR(intern_doc,
"intern(string) -> string\n\
\n\
``Intern'' the given string. This enters the string in the (global)\n\
table of interned strings whose purpose is to speed up dictionary lookups.\n\
Return the string itself or the previously interned string object with the\n\
same value.");
/* /*
* Cached interned string objects used for calling the profile and * Cached interned string objects used for calling the profile and
* trace functions. Initialized by trace_init(). * trace functions. Initialized by trace_init().
...@@ -772,6 +798,7 @@ static PyMethodDef sys_methods[] = { ...@@ -772,6 +798,7 @@ static PyMethodDef sys_methods[] = {
{"getwindowsversion", (PyCFunction)sys_getwindowsversion, METH_NOARGS, {"getwindowsversion", (PyCFunction)sys_getwindowsversion, METH_NOARGS,
getwindowsversion_doc}, getwindowsversion_doc},
#endif /* MS_WINDOWS */ #endif /* MS_WINDOWS */
{"intern", sys_intern, METH_VARARGS, intern_doc},
#ifdef USE_MALLOPT #ifdef USE_MALLOPT
{"mdebug", sys_mdebug, METH_VARARGS}, {"mdebug", sys_mdebug, METH_VARARGS},
#endif #endif
......
from pybench import Test from pybench import Test
from string import join from string import join
import sys
class ConcatStrings(Test): class ConcatStrings(Test):
...@@ -174,7 +175,7 @@ class CompareInternedStrings(Test): ...@@ -174,7 +175,7 @@ class CompareInternedStrings(Test):
def test(self): def test(self):
# Make sure the strings *are* interned # Make sure the strings *are* interned
s = intern(join(map(str,range(10)))) s = sys.intern(join(map(str,range(10))))
t = s t = s
for i in xrange(self.rounds): for i in xrange(self.rounds):
...@@ -240,7 +241,7 @@ class CompareInternedStrings(Test): ...@@ -240,7 +241,7 @@ class CompareInternedStrings(Test):
def calibrate(self): def calibrate(self):
s = intern(join(map(str,range(10)))) s = sys.intern(join(map(str,range(10))))
t = s t = s
for i in xrange(self.rounds): for i in xrange(self.rounds):
......
...@@ -198,7 +198,7 @@ def readwarnings(warningsfile): ...@@ -198,7 +198,7 @@ def readwarnings(warningsfile):
list = warnings.get(filename) list = warnings.get(filename)
if list is None: if list is None:
warnings[filename] = list = [] warnings[filename] = list = []
list.append((int(lineno), intern(what))) list.append((int(lineno), sys.intern(what)))
f.close() f.close()
return warnings return warnings
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment