Commit 58425d31 authored by Brett Cannon's avatar Brett Cannon

Make dbm.dumb encode strings as UTF-8. Also fix it so it accepts bytes and

strings.

Closes issue #3799.
parent 6e0d68e9
......@@ -84,6 +84,7 @@ class _Database(collections.MutableMapping):
for line in f:
line = line.rstrip()
key, pos_and_siz_pair = eval(line)
key = key.encode('Latin-1')
self._index[key] = pos_and_siz_pair
f.close()
......@@ -110,13 +111,16 @@ class _Database(collections.MutableMapping):
f = self._io.open(self._dirfile, 'w')
self._chmod(self._dirfile)
for key, pos_and_siz_pair in self._index.items():
f.write("%r, %r\n" % (key, pos_and_siz_pair))
# Use Latin-1 since it has no qualms with any value in any
# position; UTF-8, though, does care sometimes.
f.write("%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair))
f.close()
sync = _commit
def __getitem__(self, key):
key = key.decode("latin-1")
if isinstance(key, str):
key = key.encode('utf-8')
pos, siz = self._index[key] # may raise KeyError
f = _io.open(self._datfile, 'rb')
f.seek(pos)
......@@ -161,11 +165,12 @@ class _Database(collections.MutableMapping):
f.close()
def __setitem__(self, key, val):
if not isinstance(key, bytes):
raise TypeError("keys must be bytes")
key = key.decode("latin-1") # hashable bytes
if isinstance(key, str):
key = key.encode('utf-8')
elif not isinstance(key, (bytes, bytearray)):
raise TypeError("keys must be bytes or strings")
if not isinstance(val, (bytes, bytearray)):
raise TypeError("values must be byte strings")
raise TypeError("values must be bytes")
if key not in self._index:
self._addkey(key, self._addval(val))
else:
......@@ -191,7 +196,8 @@ class _Database(collections.MutableMapping):
# (so that _commit() never gets called).
def __delitem__(self, key):
key = key.decode("latin-1")
if isinstance(key, str):
key = key.encode('utf-8')
# The blocks used by the associated value are lost.
del self._index[key]
# XXX It's unclear why we do a _commit() here (the code always
......@@ -201,14 +207,14 @@ class _Database(collections.MutableMapping):
self._commit()
def keys(self):
return [key.encode("latin-1") for key in self._index.keys()]
return list(self._index.keys())
def items(self):
return [(key.encode("latin-1"), self[key.encode("latin-1")])
for key in self._index.keys()]
return [(key, self[key]) for key in self._index.keys()]
def __contains__(self, key):
key = key.decode("latin-1")
if isinstance(key, str):
key = key.encode('utf-8')
return key in self._index
def iterkeys(self):
......
......@@ -19,13 +19,14 @@ def _delete_files():
pass
class DumbDBMTestCase(unittest.TestCase):
_dict = {'0': b'',
'a': b'Python:',
'b': b'Programming',
'c': b'the',
'd': b'way',
'f': b'Guido',
'g': b'intended',
_dict = {b'0': b'',
b'a': b'Python:',
b'b': b'Programming',
b'c': b'the',
b'd': b'way',
b'f': b'Guido',
b'g': b'intended',
'\u00fc'.encode('utf-8') : b'!',
}
def __init__(self, *args):
......@@ -35,7 +36,7 @@ class DumbDBMTestCase(unittest.TestCase):
f = dumbdbm.open(_fname, 'c')
self.assertEqual(list(f.keys()), [])
for key in self._dict:
f[key.encode("ascii")] = self._dict[key]
f[key] = self._dict[key]
self.read_helper(f)
f.close()
......@@ -73,7 +74,7 @@ class DumbDBMTestCase(unittest.TestCase):
def test_dumbdbm_modification(self):
self.init_db()
f = dumbdbm.open(_fname, 'w')
self._dict['g'] = f[b'g'] = b"indented"
self._dict[b'g'] = f[b'g'] = b"indented"
self.read_helper(f)
f.close()
......@@ -105,6 +106,21 @@ class DumbDBMTestCase(unittest.TestCase):
self.assertEqual(f[b'1'], b'hello2')
f.close()
def test_str_read(self):
self.init_db()
f = dumbdbm.open(_fname, 'r')
self.assertEqual(f['\u00fc'], self._dict['\u00fc'.encode('utf-8')])
def test_str_write_contains(self):
self.init_db()
f = dumbdbm.open(_fname)
f['\u00fc'] = b'!'
f.close()
f = dumbdbm.open(_fname, 'r')
self.assert_('\u00fc' in f)
self.assertEqual(f['\u00fc'.encode('utf-8')],
self._dict['\u00fc'.encode('utf-8')])
def test_line_endings(self):
# test for bug #1172763: dumbdbm would die if the line endings
# weren't what was expected.
......@@ -129,16 +145,16 @@ class DumbDBMTestCase(unittest.TestCase):
def read_helper(self, f):
keys = self.keys_helper(f)
for key in self._dict:
self.assertEqual(self._dict[key], f[key.encode("ascii")])
self.assertEqual(self._dict[key], f[key])
def init_db(self):
f = dumbdbm.open(_fname, 'w')
for k in self._dict:
f[k.encode("ascii")] = self._dict[k]
f[k] = self._dict[k]
f.close()
def keys_helper(self, f):
keys = sorted(k.decode("ascii") for k in f.keys())
keys = sorted(f.keys())
dkeys = sorted(self._dict.keys())
self.assertEqual(keys, dkeys)
return keys
......@@ -155,12 +171,12 @@ class DumbDBMTestCase(unittest.TestCase):
if random.random() < 0.2:
if k in d:
del d[k]
del f[k.encode("ascii")]
del f[k]
else:
v = random.choice((b'a', b'b', b'c')) * random.randrange(10000)
d[k] = v
f[k.encode("ascii")] = v
self.assertEqual(f[k.encode("ascii")], v)
f[k] = v
self.assertEqual(f[k], v)
f.close()
f = dumbdbm.open(_fname)
......
......@@ -19,7 +19,7 @@ Core and Builtins
- Issue #3327: Don't overallocate in the modules_by_index list.
- Issue #1721812: Binary set operations and copy() returned the input type
instead of the appropriate base type. This was incorrect because set
instead of the appropriate base type. This was incorrect because set
subclasses would be created without their __init__() method being called.
The corrected behavior brings sets into line with lists and dicts.
......@@ -33,6 +33,9 @@ Core and Builtins
Library
-------
- Issue #3799: Fix dbm.dumb to accept strings as well as bytes for keys. String
keys are now written out in UTF-8.
- Issue #4338: Fix distutils upload command.
- Issue #4354: Fix distutils register command.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment