Commit 2fba0b3d authored by Antoine Pitrou's avatar Antoine Pitrou

Merged revisions 84635-84636 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r84635 | antoine.pitrou | 2010-09-08 22:57:48 +0200 (mer., 08 sept. 2010) | 5 lines

  Issue #9188: The gdb extension now handles correctly narrow (UCS2) as well
  as wide (UCS4) unicode builds for both the host interpreter (embedded
  inside gdb) and the interpreter under test.
........
  r84636 | antoine.pitrou | 2010-09-08 23:07:40 +0200 (mer., 08 sept. 2010) | 4 lines

  Add a safety limit to the number of unicode characters we fetch
  (followup to r84635, suggested by Dave Malcolm).
........
parent 0a7b65b7
...@@ -288,6 +288,13 @@ Extension Modules ...@@ -288,6 +288,13 @@ Extension Modules
- Issue #7567: Don't call `setupterm' twice. - Issue #7567: Don't call `setupterm' twice.
Tools/Demos
-----------
- Issue #9188: The gdb extension now handles correctly narrow (UCS2) as well
as wide (UCS4) unicode builds for both the host interpreter (embedded
inside gdb) and the interpreter under test.
Build Build
----- -----
......
...@@ -1011,6 +1011,18 @@ class PyTypeObjectPtr(PyObjectPtr): ...@@ -1011,6 +1011,18 @@ class PyTypeObjectPtr(PyObjectPtr):
_typename = 'PyTypeObject' _typename = 'PyTypeObject'
if sys.maxunicode >= 0x10000:
_unichr = unichr
else:
# Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
def _unichr(x):
if x < 0x10000:
return unichr(x)
x -= 0x10000
ch1 = 0xD800 | (x >> 10)
ch2 = 0xDC00 | (x & 0x3FF)
return unichr(ch1) + unichr(ch2)
class PyUnicodeObjectPtr(PyObjectPtr): class PyUnicodeObjectPtr(PyObjectPtr):
_typename = 'PyUnicodeObject' _typename = 'PyUnicodeObject'
...@@ -1027,36 +1039,35 @@ class PyUnicodeObjectPtr(PyObjectPtr): ...@@ -1027,36 +1039,35 @@ class PyUnicodeObjectPtr(PyObjectPtr):
# Gather a list of ints from the Py_UNICODE array; these are either # Gather a list of ints from the Py_UNICODE array; these are either
# UCS-2 or UCS-4 code points: # UCS-2 or UCS-4 code points:
if self.char_width() > 2:
Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
else:
# Convert the int code points to unicode characters, and generate a # A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
# local unicode instance: # inferior process: we must join surrogate pairs.
result = u''.join([unichr(ucs) for ucs in Py_UNICODEs]) Py_UNICODEs = []
return result
def write_repr(self, out, visited):
proxy = self.proxyval(visited)
if self.char_width() == 2:
# sizeof(Py_UNICODE)==2: join surrogates
proxy2 = []
i = 0 i = 0
while i < len(proxy): limit = safety_limit(field_length)
ch = proxy[i] while i < limit:
ucs = int(field_str[i])
i += 1 i += 1
if (i < len(proxy) if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
and 0xD800 <= ord(ch) < 0xDC00 \ Py_UNICODEs.append(ucs)
and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): continue
# Get code point from surrogate pair # This could be a surrogate pair.
ch2 = proxy[i] ucs2 = int(field_str[i])
code = (ord(ch) & 0x03FF) << 10 if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
code |= ord(ch2) & 0x03FF continue
code = (ucs & 0x03FF) << 10
code |= ucs2 & 0x03FF
code += 0x00010000 code += 0x00010000
Py_UNICODEs.append(code)
i += 1 i += 1
proxy2.append(unichr(code))
else: # Convert the int code points to unicode characters, and generate a
proxy2.append(ch) # local unicode instance.
proxy = u''.join(proxy2) # This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
out.write(repr(proxy)) result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
return result
def int_from_int(gdbval): def int_from_int(gdbval):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment