Commit 7c6598c1 authored by Mark Florisson's avatar Mark Florisson

Merged with libpython.py from Python3.2 + prevented continuation with negative...

Merged with libpython.py from Python3.2 + prevented continuation with negative pointer values gdb was giving to avoid segfaulting the inferior when using 'cy exec'
parent 391a3837
......@@ -42,7 +42,9 @@ def make_command_file(path_to_debug_info, prefix_code='', no_import=False):
f.write('python from Cython.Debugger import libcython\n')
if no_import:
f.write("file %s\n" % sys.executable)
# don't do this, this overrides file command in .gdbinit
# f.write("file %s\n" % sys.executable)
pass
else:
path = os.path.join(path_to_debug_info, "cython_debug", "interpreter")
interpreter = open(path).read()
......
......@@ -1152,7 +1152,7 @@ class CyExec(CythonCommand, libpython.PyExec):
pystringp = executor.alloc_pystring(name)
code = '''
PyDict_SetItem(
(PyObject *) PyDict_SetItem(
(PyObject *) %d,
(PyObject *) %d,
(PyObject *) %s)
......
......@@ -48,6 +48,7 @@ from __future__ import with_statement
import os
import re
import sys
import locale
import atexit
import warnings
import tempfile
......@@ -70,6 +71,7 @@ Py_TPFLAGS_LONG_SUBCLASS = (1L << 24)
Py_TPFLAGS_LIST_SUBCLASS = (1L << 25)
Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26)
Py_TPFLAGS_STRING_SUBCLASS = (1L << 27)
Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27)
Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28)
Py_TPFLAGS_DICT_SUBCLASS = (1L << 29)
Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
......@@ -78,6 +80,10 @@ Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31)
MAX_OUTPUT_LEN=1024
hexdigits = "0123456789abcdef"
ENCODING = locale.getpreferredencoding()
class NullPyObjectPtr(RuntimeError):
pass
......@@ -94,6 +100,30 @@ def safe_range(val):
# threshold in case the data was corrupted
return xrange(safety_limit(val))
def write_unicode(file, text):
# Write a byte or unicode string to file. Unicode strings are encoded to
# ENCODING encoding with 'backslashreplace' error handler to avoid
# UnicodeEncodeError.
if isinstance(text, unicode):
text = text.encode(ENCODING, 'backslashreplace')
file.write(text)
def os_fsencode(filename):
if not isinstance(filename, unicode):
return filename
encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
# mbcs doesn't support surrogateescape
return filename.encode(encoding)
encoded = []
for char in filename:
# surrogateescape error handler
if 0xDC80 <= ord(char) <= 0xDCFF:
byte = chr(ord(char) - 0xDC00)
else:
byte = char.encode(encoding)
encoded.append(byte)
return ''.join(encoded)
class StringTruncated(RuntimeError):
pass
......@@ -160,15 +190,8 @@ class PyObjectPtr(object):
return pyo_ptr.dereference()[name]
if name == 'ob_size':
try:
# Python 2:
return self._gdbval.dereference()[name]
except RuntimeError:
# Python 3:
try:
return self._gdbval.dereference()['ob_base'][name]
except RuntimeError:
return 0
pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
return pyo_ptr.dereference()[name]
# General case: look it up inside the object:
return self._gdbval.dereference()[name]
......@@ -370,6 +393,8 @@ class PyObjectPtr(object):
def as_address(self):
return long(self._gdbval)
class PyVarObjectPtr(PyObjectPtr):
_typename = 'PyVarObject'
class ProxyAlreadyVisited(object):
'''
......@@ -785,6 +810,22 @@ class PyLongObjectPtr(PyObjectPtr):
result = -result
return result
def write_repr(self, out, visited):
# Write this out as a Python 3 int literal, i.e. without the "L" suffix
proxy = self.proxyval(visited)
out.write("%s" % proxy)
class PyBoolObjectPtr(PyLongObjectPtr):
"""
Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
<bool> instances (Py_True/Py_False) within the process being debugged.
"""
def proxyval(self, visited):
if PyLongObjectPtr.proxyval(self, visited):
return True
else:
return False
class PyNoneStructPtr(PyObjectPtr):
"""
......@@ -898,7 +939,8 @@ class PyFrameObjectPtr(PyObjectPtr):
newline character'''
if self.is_optimized_out():
return '(frame information optimized out)'
with open(self.filename(), 'r') as f:
filename = self.filename()
with open(os_fsencode(filename), 'r') as f:
all_lines = f.readlines()
# Convert from 1-based current_line_num to 0-based list offset:
return all_lines[self.current_line_num()-1]
......@@ -909,9 +951,9 @@ class PyFrameObjectPtr(PyObjectPtr):
return
out.write('Frame 0x%x, for file %s, line %i, in %s ('
% (self.as_address(),
self.co_filename,
self.co_filename.proxyval(visited),
self.current_line_num(),
self.co_name))
self.co_name.proxyval(visited)))
first = True
for pyop_name, pyop_value in self.iter_locals():
if not first:
......@@ -948,7 +990,8 @@ class PySetObjectPtr(PyObjectPtr):
return set(members)
def write_repr(self, out, visited):
out.write(self.safe_tp_name())
# Emulate Python 3's set_repr
tp_name = self.safe_tp_name()
# Guard against infinite loops:
if self.as_address() in visited:
......@@ -956,7 +999,18 @@ class PySetObjectPtr(PyObjectPtr):
return
visited.add(self.as_address())
out.write('([')
# Python 3's set_repr special-cases the empty set:
if not self.field('used'):
out.write(tp_name)
out.write('()')
return
# Python 3 uses {} for set literals:
if tp_name != 'set':
out.write(tp_name)
out.write('(')
out.write('{')
first = True
table = self.field('table')
for i in safe_range(self.field('mask')+1):
......@@ -970,23 +1024,55 @@ class PySetObjectPtr(PyObjectPtr):
out.write(', ')
first = False
pyop_key.write_repr(out, visited)
out.write('])')
out.write('}')
if tp_name != 'set':
out.write(')')
class PyStringObjectPtr(PyObjectPtr):
_typename = 'PyStringObject'
class PyBytesObjectPtr(PyObjectPtr):
_typename = 'PyBytesObject'
def __str__(self):
field_ob_size = self.field('ob_size')
field_ob_sval = self.field('ob_sval')
char_ptr = field_ob_sval.address.cast(
gdb.lookup_type('char').pointer())
char_ptr = field_ob_sval.address.cast(_type_unsigned_char_ptr)
return ''.join([chr(char_ptr[i]) for i in safe_range(field_ob_size)])
def proxyval(self, visited):
return str(self)
def write_repr(self, out, visited):
# Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
# Get a PyStringObject* within the Python 2 gdb process:
proxy = self.proxyval(visited)
# Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
# to Python 2 code:
quote = "'"
if "'" in proxy and not '"' in proxy:
quote = '"'
out.write('b')
out.write(quote)
for byte in proxy:
if byte == quote or byte == '\\':
out.write('\\')
out.write(byte)
elif byte == '\t':
out.write('\\t')
elif byte == '\n':
out.write('\\n')
elif byte == '\r':
out.write('\\r')
elif byte < ' ' or ord(byte) >= 0x7f:
out.write('\\x')
out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
out.write(hexdigits[ord(byte) & 0xf])
else:
out.write(byte)
out.write(quote)
class PyTupleObjectPtr(PyObjectPtr):
_typename = 'PyTupleObject'
......@@ -1027,6 +1113,13 @@ class PyTypeObjectPtr(PyObjectPtr):
_typename = 'PyTypeObject'
def _unichr_is_printable(char):
# Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
if char == u" ":
return True
import unicodedata
return unicodedata.category(char) not in ("C", "Z")
if sys.maxunicode >= 0x10000:
_unichr = unichr
else:
......@@ -1085,6 +1178,116 @@ class PyUnicodeObjectPtr(PyObjectPtr):
result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
return result
def write_repr(self, out, visited):
# Write this out as a Python 3 str literal, i.e. without a "u" prefix
# Get a PyUnicodeObject* within the Python 2 gdb process:
proxy = self.proxyval(visited)
# Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
# to Python 2:
if "'" in proxy and '"' not in proxy:
quote = '"'
else:
quote = "'"
out.write(quote)
i = 0
while i < len(proxy):
ch = proxy[i]
i += 1
# Escape quotes and backslashes
if ch == quote or ch == '\\':
out.write('\\')
out.write(ch)
# Map special whitespace to '\t', \n', '\r'
elif ch == '\t':
out.write('\\t')
elif ch == '\n':
out.write('\\n')
elif ch == '\r':
out.write('\\r')
# Map non-printable US ASCII to '\xhh' */
elif ch < ' ' or ch == 0x7F:
out.write('\\x')
out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
out.write(hexdigits[ord(ch) & 0x000F])
# Copy ASCII characters as-is
elif ord(ch) < 0x7F:
out.write(ch)
# Non-ASCII characters
else:
ucs = ch
ch2 = None
if sys.maxunicode < 0x10000:
# If sizeof(Py_UNICODE) is 2 here (in gdb), join
# surrogate pairs before calling _unichr_is_printable.
if (i < len(proxy)
and 0xD800 <= ord(ch) < 0xDC00 \
and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
ch2 = proxy[i]
ucs = ch + ch2
i += 1
# Unfortuately, Python 2's unicode type doesn't seem
# to expose the "isprintable" method
printable = _unichr_is_printable(ucs)
if printable:
try:
ucs.encode(ENCODING)
except UnicodeEncodeError:
printable = False
# Map Unicode whitespace and control characters
# (categories Z* and C* except ASCII space)
if not printable:
if ch2 is not None:
# Match Python 3's representation of non-printable
# wide characters.
code = (ord(ch) & 0x03FF) << 10
code |= ord(ch2) & 0x03FF
code += 0x00010000
else:
code = ord(ucs)
# Map 8-bit characters to '\\xhh'
if code <= 0xff:
out.write('\\x')
out.write(hexdigits[(code >> 4) & 0x000F])
out.write(hexdigits[code & 0x000F])
# Map 21-bit characters to '\U00xxxxxx'
elif code >= 0x10000:
out.write('\\U')
out.write(hexdigits[(code >> 28) & 0x0000000F])
out.write(hexdigits[(code >> 24) & 0x0000000F])
out.write(hexdigits[(code >> 20) & 0x0000000F])
out.write(hexdigits[(code >> 16) & 0x0000000F])
out.write(hexdigits[(code >> 12) & 0x0000000F])
out.write(hexdigits[(code >> 8) & 0x0000000F])
out.write(hexdigits[(code >> 4) & 0x0000000F])
out.write(hexdigits[code & 0x0000000F])
# Map 16-bit characters to '\uxxxx'
else:
out.write('\\u')
out.write(hexdigits[(code >> 12) & 0x000F])
out.write(hexdigits[(code >> 8) & 0x000F])
out.write(hexdigits[(code >> 4) & 0x000F])
out.write(hexdigits[code & 0x000F])
else:
# Copy characters as-is
out.write(ch)
if ch2 is not None:
out.write(ch2)
out.write(quote)
def int_from_int(gdbval):
return int(str(gdbval))
......@@ -1254,7 +1457,8 @@ class Frame(object):
if self.is_evalframeex():
pyop = self.get_pyop()
if pyop:
sys.stdout.write('#%i %s\n' % (self.get_index(), pyop.get_truncated_repr(MAX_OUTPUT_LEN)))
line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
sys.stdout.write(pyop.current_line())
else:
sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
......@@ -1315,7 +1519,7 @@ class PyList(gdb.Command):
if start<1:
start = 1
with open(filename, 'r') as f:
with open(os_fsencode(filename), 'r') as f:
all_lines = f.readlines()
# start and end are 1-based, all_lines is 0-based;
# so [start-1:end] as a python slice gives us [start, end] as a
......@@ -1487,8 +1691,7 @@ class PyNameEquals(gdb.Function):
pyframe = frame.get_pyop()
if pyframe is None:
return None
return str(getattr(pyframe, attr))
val = str(getattr(pyframe, attr)); print val, val.proxyval(set()); return val
return None
......@@ -1922,7 +2125,7 @@ Py_single_input = 256
Py_file_input = 257
Py_eval_input = 258
def pointervalue(gdbval):
def _pointervalue(gdbval):
"""
Return the value of the pionter as a Python int.
......@@ -1936,11 +2139,23 @@ def pointervalue(gdbval):
# still convert the pointer to an int
return long(gdbval)
def pointervalue(gdbval):
pointer = _pointervalue(gdbval)
try:
if pointer < 0:
raise gdb.GdbError("Negative pointer value, presumably a bug "
"in gdb, aborting.")
except RuntimeError:
# work around yet another bug in gdb where you get random behaviour
# and tracebacks
pass
return pointer
class PythonCodeExecutor(object):
def malloc(self, size):
chunk = (gdb.parse_and_eval("(void *) malloc(%d)" % size))
chunk = (gdb.parse_and_eval("(void *) malloc((size_t) %d)" % size))
pointer = pointervalue(chunk)
if pointer == 0:
......@@ -1958,8 +2173,9 @@ class PythonCodeExecutor(object):
stringp = self.alloc_string(string)
try:
result = gdb.parse_and_eval(
'PyString_FromStringAndSize((char *) %d, (size_t) %d)' %
(stringp, len(string)))
'(PyObject *) PyString_FromStringAndSize('
'(char *) %d,'
'(size_t) %d)' % (stringp, len(string)))
finally:
self.free(stringp)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment