Commit 3ce7258a authored by Robert Bradshaw's avatar Robert Bradshaw

merge

parents c9430057 1611a7e8
......@@ -2,6 +2,7 @@
# Cython - Command Line Parsing
#
import os
import sys
import Options
......@@ -27,6 +28,7 @@ Options:
Level indicates aggressiveness, default 0 releases nothing.
-w, --working <directory> Sets the working directory for Cython (the directory modules
are searched from)
--gdb Output debug information for cygdb
-D, --no-docstrings Remove docstrings.
-a, --annotate Produce a colorized HTML version of the source.
......@@ -114,6 +116,9 @@ def parse_command_line(args):
Options.convert_range = True
elif option == "--line-directives":
options.emit_linenums = True
elif option == "--gdb":
options.gdb_debug = True
options.output_dir = os.curdir
elif option == '-2':
options.language_level = 2
elif option == '-3':
......
......@@ -916,6 +916,14 @@ class CCodeWriter(object):
return self.buffer.getvalue()
def write(self, s):
# also put invalid markers (lineno 0), to indicate that those lines
# have no Cython source code correspondence
if self.marker is None:
cython_lineno = self.last_marker_line
else:
cython_lineno = self.marker[0]
self.buffer.markers.extend([cython_lineno] * s.count('\n'))
self.buffer.write(s)
def insertion_point(self):
......@@ -1000,6 +1008,7 @@ class CCodeWriter(object):
self.emit_marker()
if self.emit_linenums and self.last_marker_line != 0:
self.write('\n#line %s "%s"\n' % (self.last_marker_line, self.source_desc))
if code:
if safe:
self.put_safe(code)
......
......@@ -97,6 +97,10 @@ class CompilerCrash(CompileError):
message += u'%s: %s' % (cause.__class__.__name__, cause)
CompileError.__init__(self, pos, message)
class NoElementTreeInstalledException(PyrexError):
"""raised when the user enabled options.gdb_debug but no ElementTree
implementation was found
"""
listing_file = None
num_errors = 0
......
......@@ -13,7 +13,9 @@ except NameError:
# Python 2.3
from sets import Set as set
import itertools
from time import time
import Code
import Errors
import Parsing
......@@ -85,6 +87,8 @@ class Context(object):
self.set_language_level(language_level)
self.gdb_debug_outputwriter = None
def set_language_level(self, level):
self.language_level = level
if level >= 3:
......@@ -178,13 +182,22 @@ class Context(object):
from Cython.TestUtils import TreeAssertVisitor
test_support.append(TreeAssertVisitor())
return ([
create_parse(self),
] + self.create_pipeline(pxd=False, py=py) + test_support + [
inject_pxd_code,
abort_on_errors,
generate_pyx_code,
])
if options.gdb_debug:
from Cython.Debugger import DebugWriter
from ParseTreeTransforms import DebugTransform
self.gdb_debug_outputwriter = DebugWriter.CythonDebugWriter(
options.output_dir)
debug_transform = [DebugTransform(self, options, result)]
else:
debug_transform = []
return list(itertools.chain(
[create_parse(self)],
self.create_pipeline(pxd=False, py=py),
test_support,
[inject_pxd_code, abort_on_errors],
debug_transform,
[generate_pyx_code]))
def create_pxd_pipeline(self, scope, module_name):
def parse_pxd(source_desc):
......@@ -808,4 +821,5 @@ default_options = dict(
evaluate_tree_assertions = False,
emit_linenums = False,
language_level = 2,
gdb_debug = False,
)
......@@ -297,12 +297,34 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
f = open_new_file(result.c_file)
rootwriter.copyto(f)
if options.gdb_debug:
self._serialize_lineno_map(env, rootwriter)
f.close()
result.c_file_generated = 1
if Options.annotate or options.annotate:
self.annotate(rootwriter)
rootwriter.save_annotation(result.main_source_file, result.c_file)
def _serialize_lineno_map(self, env, ccodewriter):
tb = env.context.gdb_debug_outputwriter
markers = ccodewriter.buffer.allmarkers()
d = {}
for c_lineno, cython_lineno in enumerate(markers):
if cython_lineno > 0:
d.setdefault(cython_lineno, []).append(c_lineno + 1)
tb.start('LineNumberMapping')
for cython_lineno, c_linenos in sorted(d.iteritems()):
attrs = {
'c_linenos': ' '.join(map(str, c_linenos)),
'cython_lineno': str(cython_lineno),
}
tb.start('LineNumber', attrs)
tb.end('LineNumber')
tb.end('LineNumberMapping')
tb.serialize()
def find_referenced_modules(self, env, module_list, modules_seen):
if env not in modules_seen:
modules_seen[env] = 1
......
......@@ -20,6 +20,12 @@ from Cython.Compiler.TreeFragment import TreeFragment, TemplateTransform
from Cython.Compiler.StringEncoding import EncodedString
from Cython.Compiler.Errors import error, warning, CompileError, InternalError
try:
set
except NameError:
from sets import Set as set
import copy
......@@ -1574,3 +1580,136 @@ class TransformBuiltinMethods(EnvTransform):
self.visitchildren(node)
return node
class DebugTransform(CythonTransform):
"""
Create debug information and all functions' visibility to extern in order
to enable debugging.
"""
def __init__(self, context, options, result):
super(DebugTransform, self).__init__(context)
self.visited = set()
# our treebuilder and debug output writer
# (see Cython.Debugger.debug_output.CythonDebugWriter)
self.tb = self.context.gdb_debug_outputwriter
#self.c_output_file = options.output_file
self.c_output_file = result.c_file
# tells visit_NameNode whether it should register step-into functions
self.register_stepinto = False
def visit_ModuleNode(self, node):
self.tb.module_name = node.full_module_name
attrs = dict(
module_name=node.full_module_name,
filename=node.pos[0].filename,
c_filename=self.c_output_file)
self.tb.start('Module', attrs)
# serialize functions
self.tb.start('Functions')
self.visitchildren(node)
self.tb.end('Functions')
# 2.3 compatibility. Serialize global variables
self.tb.start('Globals')
entries = {}
for k, v in node.scope.entries.iteritems():
if (v.qualified_name not in self.visited and not
v.name.startswith('__pyx_') and not
v.type.is_cfunction and not
v.type.is_extension_type):
entries[k]= v
self.serialize_local_variables(entries)
self.tb.end('Globals')
# self.tb.end('Module') # end Module after the line number mapping in
# Cython.Compiler.ModuleNode.ModuleNode._serialize_lineno_map
return node
def visit_FuncDefNode(self, node):
self.visited.add(node.local_scope.qualified_name)
# node.entry.visibility = 'extern'
if node.py_func is None:
pf_cname = ''
else:
pf_cname = node.py_func.entry.func_cname
attrs = dict(
name=node.entry.name,
cname=node.entry.func_cname,
pf_cname=pf_cname,
qualified_name=node.local_scope.qualified_name,
lineno=str(node.pos[1]))
self.tb.start('Function', attrs=attrs)
self.tb.start('Locals')
self.serialize_local_variables(node.local_scope.entries)
self.tb.end('Locals')
self.tb.start('Arguments')
for arg in node.local_scope.arg_entries:
self.tb.start(arg.name)
self.tb.end(arg.name)
self.tb.end('Arguments')
self.tb.start('StepIntoFunctions')
self.register_stepinto = True
self.visitchildren(node)
self.register_stepinto = False
self.tb.end('StepIntoFunctions')
self.tb.end('Function')
return node
def visit_NameNode(self, node):
if (self.register_stepinto and
node.type.is_cfunction and
getattr(node, 'is_called', False) and
node.entry.func_cname is not None):
# don't check node.entry.in_cinclude, as 'cdef extern: ...'
# declared functions are not 'in_cinclude'.
# This means we will list called 'cdef' functions as
# "step into functions", but this is not an issue as they will be
# recognized as Cython functions anyway.
attrs = dict(name=node.entry.func_cname)
self.tb.start('StepIntoFunction', attrs=attrs)
self.tb.end('StepIntoFunction')
self.visitchildren(node)
return node
def serialize_local_variables(self, entries):
for entry in entries.values():
if entry.type.is_pyobject:
vartype = 'PythonObject'
else:
vartype = 'CObject'
cname = entry.cname
# if entry.type.is_extension_type:
# cname = entry.type.typeptr_cname
if not entry.pos:
# this happens for variables that are not in the user's code,
# e.g. for the global __builtins__, __doc__, etc. We can just
# set the lineno to 0 for those.
lineno = '0'
else:
lineno = str(entry.pos[1])
attrs = dict(
name=entry.name,
cname=cname,
qualified_name=entry.qualified_name,
type=vartype,
lineno=lineno)
self.tb.start('LocalVar', attrs)
self.tb.end('LocalVar')
import os
from Cython.Compiler import CmdLine
from Cython.TestUtils import TransformTest
from Cython.Compiler.ParseTreeTransforms import *
from Cython.Compiler.Nodes import *
class TestNormalizeTree(TransformTest):
def test_parserbehaviour_is_what_we_coded_for(self):
t = self.fragment(u"if x: y").root
......@@ -140,6 +144,80 @@ class TestWithTransform(object): # (TransformTest): # Disabled!
""", t)
if sys.version_info[:2] > (2, 4):
from Cython.Debugger import DebugWriter
from Cython.Debugger.Tests.TestLibCython import DebuggerTestCase
else:
# skip test, don't let it inherit unittest.TestCase
DebuggerTestCase = object
class TestDebugTransform(DebuggerTestCase):
def elem_hasattrs(self, elem, attrs):
# we shall supporteth python 2.3 !
return all([attr in elem.attrib for attr in attrs])
def test_debug_info(self):
try:
assert os.path.exists(self.debug_dest)
t = DebugWriter.etree.parse(self.debug_dest)
# the xpath of the standard ElementTree is primitive, don't use
# anything fancy
L = list(t.find('/Module/Globals'))
# assertTrue is retarded, use the normal assert statement
assert L
xml_globals = dict(
[(e.attrib['name'], e.attrib['type']) for e in L])
self.assertEqual(len(L), len(xml_globals))
L = list(t.find('/Module/Functions'))
assert L
xml_funcs = dict([(e.attrib['qualified_name'], e) for e in L])
self.assertEqual(len(L), len(xml_funcs))
# test globals
self.assertEqual('CObject', xml_globals.get('c_var'))
self.assertEqual('PythonObject', xml_globals.get('python_var'))
# test functions
funcnames = 'codefile.spam', 'codefile.ham', 'codefile.eggs'
required_xml_attrs = 'name', 'cname', 'qualified_name'
assert all([f in xml_funcs for f in funcnames])
spam, ham, eggs = [xml_funcs[funcname] for funcname in funcnames]
self.assertEqual(spam.attrib['name'], 'spam')
self.assertNotEqual('spam', spam.attrib['cname'])
assert self.elem_hasattrs(spam, required_xml_attrs)
# test locals of functions
spam_locals = list(spam.find('Locals'))
assert spam_locals
spam_locals.sort(key=lambda e: e.attrib['name'])
names = [e.attrib['name'] for e in spam_locals]
self.assertEqual(list('abcd'), names)
assert self.elem_hasattrs(spam_locals[0], required_xml_attrs)
# test arguments of functions
spam_arguments = list(spam.find('Arguments'))
assert spam_arguments
self.assertEqual(1, len(list(spam_arguments)))
# test step-into functions
step_into = spam.find('StepIntoFunctions')
spam_stepinto = [x.attrib['name'] for x in step_into]
assert spam_stepinto
self.assertEqual(2, len(spam_stepinto))
assert 'puts' in spam_stepinto
assert 'some_c_function' in spam_stepinto
except:
print open(self.debug_dest).read()
raise
if __name__ == "__main__":
import unittest
unittest.main()
#!/usr/bin/env python
"""
The Cython debugger
The current directory should contain a directory named 'cython_debug', or a
path to the cython project directory should be given (the parent directory of
cython_debug).
Additional gdb args can be provided only if a path to the project directory is
given.
"""
import os
import sys
import glob
import tempfile
import textwrap
import subprocess
usage = "Usage: cygdb [PATH [GDB_ARGUMENTS]]"
def make_command_file(path_to_debug_info, prefix_code='', no_import=False):
if not no_import:
pattern = os.path.join(path_to_debug_info,
'cython_debug',
'cython_debug_info_*')
debug_files = glob.glob(pattern)
if not debug_files:
sys.exit('%s.\nNo debug files were found in %s. Aborting.' % (
usage, os.path.abspath(path_to_debug_info)))
fd, tempfilename = tempfile.mkstemp()
f = os.fdopen(fd, 'w')
f.write(prefix_code)
f.write('set breakpoint pending on\n')
f.write("set print pretty on\n")
f.write('python from Cython.Debugger import libcython, libpython\n')
if no_import:
# don't do this, this overrides file command in .gdbinit
# f.write("file %s\n" % sys.executable)
pass
else:
path = os.path.join(path_to_debug_info, "cython_debug", "interpreter")
interpreter = open(path).read()
f.write("file %s\n" % interpreter)
f.write('\n'.join('cy import %s\n' % fn for fn in debug_files))
f.write(textwrap.dedent('''\
python
import sys
try:
gdb.lookup_type('PyModuleObject')
except RuntimeError:
sys.stderr.write(
'Python was not compiled with debug symbols (or it was '
'stripped). Some functionality may not work (properly).\\n')
end
'''))
f.close()
return tempfilename
def main(path_to_debug_info=None, gdb_argv=None, no_import=False):
"""
Start the Cython debugger. This tells gdb to import the Cython and Python
extensions (libcython.py and libpython.py) and it enables gdb's pending
breakpoints.
path_to_debug_info is the path to the Cython build directory
gdb_argv is the list of options to gdb
no_import tells cygdb whether it should import debug information
"""
if path_to_debug_info is None:
if len(sys.argv) > 1:
path_to_debug_info = sys.argv[1]
else:
path_to_debug_info = os.curdir
if gdb_argv is None:
gdb_argv = sys.argv[2:]
if path_to_debug_info == '--':
no_import = True
tempfilename = make_command_file(path_to_debug_info, no_import=no_import)
p = subprocess.Popen(['gdb', '-command', tempfilename] + gdb_argv)
while True:
try:
p.wait()
except KeyboardInterrupt:
pass
else:
break
os.remove(tempfilename)
from __future__ import with_statement
import os
import sys
import errno
try:
from lxml import etree
have_lxml = True
except ImportError:
have_lxml = False
try:
# Python 2.5
from xml.etree import cElementTree as etree
except ImportError:
try:
# Python 2.5
from xml.etree import ElementTree as etree
except ImportError:
try:
# normal cElementTree install
import cElementTree as etree
except ImportError:
try:
# normal ElementTree install
import elementtree.ElementTree as etree
except ImportError:
etree = None
from Cython.Compiler import Errors
class CythonDebugWriter(object):
"""
Class to output debugging information for cygdb
It writes debug information to cython_debug/cython_debug_info_<modulename>
in the build directory.
"""
def __init__(self, output_dir):
if etree is None:
raise Errors.NoElementTreeInstalledException()
self.output_dir = os.path.join(output_dir, 'cython_debug')
self.tb = etree.TreeBuilder()
# set by Cython.Compiler.ParseTreeTransforms.DebugTransform
self.module_name = None
self.start('cython_debug', attrs=dict(version='1.0'))
def start(self, name, attrs=None):
self.tb.start(name, attrs or {})
def end(self, name):
self.tb.end(name)
def serialize(self):
self.tb.end('Module')
self.tb.end('cython_debug')
xml_root_element = self.tb.close()
try:
os.makedirs(self.output_dir)
except OSError, e:
if e.errno != errno.EEXIST:
raise
et = etree.ElementTree(xml_root_element)
kw = {}
if have_lxml:
kw['pretty_print'] = True
fn = "cython_debug_info_" + self.module_name
et.write(os.path.join(self.output_dir, fn), encoding="UTF-8", **kw)
interpreter_path = os.path.join(self.output_dir, 'interpreter')
with open(interpreter_path, 'w') as f:
f.write(sys.executable)
from __future__ import with_statement
import os
import re
import sys
import uuid
import shutil
import warnings
import textwrap
import unittest
import tempfile
import subprocess
import distutils.core
from distutils import sysconfig
from distutils import ccompiler
import runtests
import Cython.Distutils.extension
from Cython.Debugger import Cygdb as cygdb
root = os.path.dirname(os.path.abspath(__file__))
codefile = os.path.join(root, 'codefile')
cfuncs_file = os.path.join(root, 'cfuncs.c')
with open(codefile) as f:
source_to_lineno = dict((line.strip(), i + 1) for i, line in enumerate(f))
class DebuggerTestCase(unittest.TestCase):
def setUp(self):
"""
Run gdb and have cygdb import the debug information from the code
defined in TestParseTreeTransforms's setUp method
"""
self.tempdir = tempfile.mkdtemp()
self.destfile = os.path.join(self.tempdir, 'codefile.pyx')
self.debug_dest = os.path.join(self.tempdir,
'cython_debug',
'cython_debug_info_codefile')
self.cfuncs_destfile = os.path.join(self.tempdir, 'cfuncs')
self.cwd = os.getcwd()
os.chdir(self.tempdir)
shutil.copy(codefile, self.destfile)
shutil.copy(cfuncs_file, self.cfuncs_destfile + '.c')
compiler = ccompiler.new_compiler()
compiler.compile(['cfuncs.c'], debug=True)
opts = dict(
test_directory=self.tempdir,
module='codefile',
)
cython_compile_testcase = runtests.CythonCompileTestCase(
workdir=self.tempdir,
# we clean up everything (not only compiled files)
cleanup_workdir=False,
**opts
)
cython_compile_testcase.run_cython(
targetdir=self.tempdir,
incdir=None,
annotate=False,
extra_compile_options={
'gdb_debug':True,
'output_dir':self.tempdir,
},
**opts
)
cython_compile_testcase.run_distutils(
incdir=None,
workdir=self.tempdir,
extra_extension_args={'extra_objects':['cfuncs.o']},
**opts
)
# ext = Cython.Distutils.extension.Extension(
# 'codefile',
# ['codefile.pyx'],
# pyrex_gdb=True,
# extra_objects=['cfuncs.o'])
#
# distutils.core.setup(
# script_args=['build_ext', '--inplace'],
# ext_modules=[ext],
# cmdclass=dict(build_ext=Cython.Distutils.build_ext)
# )
def tearDown(self):
os.chdir(self.cwd)
shutil.rmtree(self.tempdir)
class GdbDebuggerTestCase(DebuggerTestCase):
def setUp(self):
super(GdbDebuggerTestCase, self).setUp()
prefix_code = textwrap.dedent('''\
python
import os
import sys
import traceback
def excepthook(type, value, tb):
traceback.print_exception(type, value, tb)
os._exit(1)
sys.excepthook = excepthook
# Have tracebacks end up on sys.stderr (gdb replaces sys.stderr
# with an object that calls gdb.write())
sys.stderr = sys.__stderr__
end
''')
code = textwrap.dedent('''\
python
from Cython.Debugger.Tests import test_libcython_in_gdb
test_libcython_in_gdb.main()
end
''')
self.gdb_command_file = cygdb.make_command_file(self.tempdir,
prefix_code)
open(self.gdb_command_file, 'a').write(code)
args = ['gdb', '-batch', '-x', self.gdb_command_file, '-n', '--args',
sys.executable, '-c', 'import codefile']
paths = []
path = os.environ.get('PYTHONPATH')
if path:
paths.append(path)
paths.append(os.path.dirname(os.path.dirname(
os.path.abspath(Cython.__file__))))
env = dict(os.environ, PYTHONPATH=os.pathsep.join(paths))
try:
p = subprocess.Popen(['gdb', '-v'], stdout=subprocess.PIPE)
have_gdb = True
except OSError:
# gdb was not installed
have_gdb = False
else:
gdb_version = p.stdout.read()
p.wait()
p.stdout.close()
if have_gdb:
# Based on Lib/test/test_gdb.py
regex = "^GNU gdb [^\d]*(\d+)\.(\d+)"
gdb_version_number = re.search(regex, gdb_version).groups()
if not have_gdb or map(int, gdb_version_number) < [7, 2]:
self.p = None
warnings.warn('Skipping gdb tests, need gdb >= 7.2')
else:
self.p = subprocess.Popen(
args,
stdout=open(os.devnull, 'w'),
stderr=subprocess.PIPE,
env=env)
def tearDown(self):
super(GdbDebuggerTestCase, self).tearDown()
if self.p:
self.p.stderr.close()
self.p.wait()
os.remove(self.gdb_command_file)
class TestAll(GdbDebuggerTestCase):
def test_all(self):
if self.p is None:
return
out, err = self.p.communicate()
border = '*' * 30
start = '%s v INSIDE GDB v %s' % (border, border)
end = '%s ^ INSIDE GDB ^ %s' % (border, border)
errmsg = '\n%s\n%s%s' % (start, err, end)
self.assertEquals(0, self.p.wait(), errmsg)
sys.stderr.write(err)
if __name__ == '__main__':
unittest.main()
\ No newline at end of file
void
some_c_function(void)
{
int a, b, c;
a = 1;
b = 2;
}
\ No newline at end of file
cdef extern from "stdio.h":
int puts(char *s)
cdef extern:
void some_c_function()
import os
cdef int c_var = 12
python_var = 13
def spam(a=0):
cdef:
int b, c
b = c = d = 0
b = 1
c = 2
int(10)
puts("spam")
os.path.join("foo", "bar")
some_c_function()
cdef ham():
pass
cpdef eggs():
pass
cdef class SomeClass(object):
def spam(self):
pass
spam()
print "bye!"
"""
Tests that run inside GDB.
Note: debug information is already imported by the file generated by
Cython.Debugger.Cygdb.make_command_file()
"""
import os
import re
import sys
import trace
import inspect
import warnings
import unittest
import textwrap
import tempfile
import traceback
import itertools
from test import test_support
import gdb
from Cython.Debugger import libcython
from Cython.Debugger import libpython
from Cython.Debugger.Tests import TestLibCython as test_libcython
# for some reason sys.argv is missing in gdb
sys.argv = ['gdb']
class DebugTestCase(unittest.TestCase):
"""
Base class for test cases. On teardown it kills the inferior and unsets
all breakpoints.
"""
def __init__(self, name):
super(DebugTestCase, self).__init__(name)
self.cy = libcython.cy
self.module = libcython.cy.cython_namespace['codefile']
self.spam_func, self.spam_meth = libcython.cy.functions_by_name['spam']
self.ham_func = libcython.cy.functions_by_qualified_name[
'codefile.ham']
self.eggs_func = libcython.cy.functions_by_qualified_name[
'codefile.eggs']
def read_var(self, varname, cast_to=None):
result = gdb.parse_and_eval('$cy_cvalue("%s")' % varname)
if cast_to:
result = cast_to(result)
return result
def local_info(self):
return gdb.execute('info locals', to_string=True)
def lineno_equals(self, source_line=None, lineno=None):
if source_line is not None:
lineno = test_libcython.source_to_lineno[source_line]
frame = gdb.selected_frame()
self.assertEqual(libcython.cy.step.lineno(frame), lineno)
def break_and_run(self, source_line):
break_lineno = test_libcython.source_to_lineno[source_line]
gdb.execute('cy break codefile:%d' % break_lineno, to_string=True)
gdb.execute('run', to_string=True)
def tearDown(self):
gdb.execute('delete breakpoints', to_string=True)
try:
gdb.execute('kill inferior 1', to_string=True)
except RuntimeError:
pass
gdb.execute('set args -c "import codefile"')
libcython.cy.step.static_breakpoints.clear()
libcython.cy.step.runtime_breakpoints.clear()
libcython.cy.step.init_breakpoints()
class TestDebugInformationClasses(DebugTestCase):
def test_CythonModule(self):
"test that debug information was parsed properly into data structures"
self.assertEqual(self.module.name, 'codefile')
global_vars = ('c_var', 'python_var', '__name__',
'__builtins__', '__doc__', '__file__')
assert set(global_vars).issubset(self.module.globals)
def test_CythonVariable(self):
module_globals = self.module.globals
c_var = module_globals['c_var']
python_var = module_globals['python_var']
self.assertEqual(c_var.type, libcython.CObject)
self.assertEqual(python_var.type, libcython.PythonObject)
self.assertEqual(c_var.qualified_name, 'codefile.c_var')
def test_CythonFunction(self):
self.assertEqual(self.spam_func.qualified_name, 'codefile.spam')
self.assertEqual(self.spam_meth.qualified_name,
'codefile.SomeClass.spam')
self.assertEqual(self.spam_func.module, self.module)
assert self.eggs_func.pf_cname
assert not self.ham_func.pf_cname
assert not self.spam_func.pf_cname
assert not self.spam_meth.pf_cname
self.assertEqual(self.spam_func.type, libcython.CObject)
self.assertEqual(self.ham_func.type, libcython.CObject)
self.assertEqual(self.spam_func.arguments, ['a'])
self.assertEqual(self.spam_func.step_into_functions,
set(['puts', 'some_c_function']))
expected_lineno = test_libcython.source_to_lineno['def spam(a=0):']
self.assertEqual(self.spam_func.lineno, expected_lineno)
self.assertEqual(sorted(self.spam_func.locals), list('abcd'))
class TestParameters(unittest.TestCase):
def test_parameters(self):
gdb.execute('set cy_colorize_code on')
assert libcython.parameters.colorize_code
gdb.execute('set cy_colorize_code off')
assert not libcython.parameters.colorize_code
class TestBreak(DebugTestCase):
def test_break(self):
breakpoint_amount = len(gdb.breakpoints())
gdb.execute('cy break codefile.spam')
self.assertEqual(len(gdb.breakpoints()), breakpoint_amount + 1)
bp = gdb.breakpoints()[-1]
self.assertEqual(bp.type, gdb.BP_BREAKPOINT)
assert self.spam_func.cname in bp.location
assert bp.enabled
def test_python_break(self):
gdb.execute('cy break -p join')
assert 'def join(' in gdb.execute('cy run', to_string=True)
class TestKilled(DebugTestCase):
def test_abort(self):
gdb.execute("set args -c 'import os; os.abort()'")
output = gdb.execute('cy run', to_string=True)
assert 'abort' in output.lower()
class DebugStepperTestCase(DebugTestCase):
def step(self, varnames_and_values, source_line=None, lineno=None):
gdb.execute(self.command)
for varname, value in varnames_and_values:
self.assertEqual(self.read_var(varname), value, self.local_info())
self.lineno_equals(source_line, lineno)
class TestStep(DebugStepperTestCase):
"""
Test stepping. Stepping happens in the code found in
Cython/Debugger/Tests/codefile.
"""
def test_cython_step(self):
gdb.execute('cy break codefile.spam')
gdb.execute('run', to_string=True)
self.lineno_equals('def spam(a=0):')
gdb.execute('cy step', to_string=True)
self.lineno_equals('b = c = d = 0')
self.command = 'cy step'
self.step([('b', 0)], source_line='b = 1')
self.step([('b', 1), ('c', 0)], source_line='c = 2')
self.step([('c', 2)], source_line='int(10)')
self.step([], source_line='puts("spam")')
gdb.execute('cont', to_string=True)
self.assertEqual(len(gdb.inferiors()), 1)
self.assertEqual(gdb.inferiors()[0].pid, 0)
def test_c_step(self):
self.break_and_run('some_c_function()')
gdb.execute('cy step', to_string=True)
self.assertEqual(gdb.selected_frame().name(), 'some_c_function')
def test_python_step(self):
self.break_and_run('os.path.join("foo", "bar")')
result = gdb.execute('cy step', to_string=True)
curframe = gdb.selected_frame()
self.assertEqual(curframe.name(), 'PyEval_EvalFrameEx')
pyframe = libpython.Frame(curframe).get_pyop()
self.assertEqual(str(pyframe.co_name), 'join')
assert re.match(r'\d+ def join\(', result), result
class TestNext(DebugStepperTestCase):
def test_cython_next(self):
self.break_and_run('c = 2')
lines = (
'int(10)',
'puts("spam")',
'os.path.join("foo", "bar")',
'some_c_function()',
)
for line in lines:
gdb.execute('cy next')
self.lineno_equals(line)
class TestLocalsGlobals(DebugTestCase):
def test_locals(self):
self.break_and_run('int(10)')
result = gdb.execute('cy locals', to_string=True)
assert 'a = 0', repr(result)
assert 'b = (int) 1', result
assert 'c = (int) 2' in result, repr(result)
def test_globals(self):
self.break_and_run('int(10)')
result = gdb.execute('cy globals', to_string=True)
assert '__name__ ' in result, repr(result)
assert '__doc__ ' in result, repr(result)
assert 'os ' in result, repr(result)
assert 'c_var ' in result, repr(result)
assert 'python_var ' in result, repr(result)
class TestBacktrace(DebugTestCase):
def test_backtrace(self):
libcython.parameters.colorize_code.value = False
self.break_and_run('os.path.join("foo", "bar")')
result = gdb.execute('cy bt', to_string=True)
_debug(libpython.execute, libpython._execute, gdb.execute)
_debug(gdb.execute('cy list', to_string=True))
_debug(repr(result))
assert re.search(r'\#\d+ *0x.* in spam\(\) at .*codefile\.pyx:22',
result), result
assert 'os.path.join("foo", "bar")' in result, result
gdb.execute("cy step")
gdb.execute('cy bt')
result = gdb.execute('cy bt -a', to_string=True)
assert re.search(r'\#0 *0x.* in main\(\) at', result), result
class TestFunctions(DebugTestCase):
def test_functions(self):
self.break_and_run('c = 2')
result = gdb.execute('print $cy_cname("b")', to_string=True)
assert re.search('__pyx_.*b', result), result
result = gdb.execute('print $cy_lineno()', to_string=True)
supposed_lineno = test_libcython.source_to_lineno['c = 2']
assert str(supposed_lineno) in result, (supposed_lineno, result)
result = gdb.execute('print $cy_cvalue("b")', to_string=True)
assert '= 1' in result
class TestPrint(DebugTestCase):
def test_print(self):
self.break_and_run('c = 2')
result = gdb.execute('cy print b', to_string=True)
self.assertEqual('b = (int) 1\n', result)
class TestUpDown(DebugTestCase):
def test_updown(self):
self.break_and_run('os.path.join("foo", "bar")')
gdb.execute('cy step')
self.assertRaises(RuntimeError, gdb.execute, 'cy down')
result = gdb.execute('cy up', to_string=True)
assert 'spam()' in result
assert 'os.path.join("foo", "bar")' in result
class TestExec(DebugTestCase):
def setUp(self):
super(TestExec, self).setUp()
self.fd, self.tmpfilename = tempfile.mkstemp()
self.tmpfile = os.fdopen(self.fd, 'r+')
def tearDown(self):
super(TestExec, self).tearDown()
try:
self.tmpfile.close()
finally:
os.remove(self.tmpfilename)
def eval_command(self, command):
gdb.execute('cy exec open(%r, "w").write(str(%s))' %
(self.tmpfilename, command))
return self.tmpfile.read().strip()
def test_cython_exec(self):
self.break_and_run('os.path.join("foo", "bar")')
# test normal behaviour
self.assertEqual("[0]", self.eval_command('[a]'))
# test multiline code
result = gdb.execute(textwrap.dedent('''\
cy exec
pass
"nothing"
end
'''))
result = self.tmpfile.read().rstrip()
self.assertEqual('', result)
def test_python_exec(self):
self.break_and_run('os.path.join("foo", "bar")')
gdb.execute('cy step')
gdb.execute('cy exec some_random_var = 14')
self.assertEqual('14', self.eval_command('some_random_var'))
_do_debug = os.environ.get('CYTHON_GDB_DEBUG')
if _do_debug:
_debug_file = open('/dev/tty', 'w')
def _debug(*messages):
if _do_debug:
messages = itertools.chain([sys._getframe(1).f_code.co_name],
messages)
_debug_file.write(' '.join(str(msg) for msg in messages) + '\n')
def _main():
try:
gdb.lookup_type('PyModuleObject')
except RuntimeError:
msg = ("Unable to run tests, Python was not compiled with "
"debugging information. Either compile python with "
"-g or get a debug build (configure with --with-pydebug).")
warnings.warn(msg)
os._exit(1)
else:
m = __import__(__name__, fromlist=[''])
tests = inspect.getmembers(m, inspect.isclass)
# test_support.run_unittest(tests)
test_loader = unittest.TestLoader()
suite = unittest.TestSuite(
[test_loader.loadTestsFromTestCase(cls) for name, cls in tests])
result = unittest.TextTestRunner(verbosity=1).run(suite)
if not result.wasSuccessful():
os._exit(1)
def main(trace_code=False):
if trace_code:
tracer = trace.Trace(count=False, trace=True, outfile=sys.stderr,
ignoredirs=[sys.prefix, sys.exec_prefix])
tracer.runfunc(_main)
else:
_main()
main()
\ No newline at end of file
"""
GDB extension that adds Cython support.
"""
from __future__ import with_statement
import os
import sys
import textwrap
import operator
import traceback
import functools
import itertools
import collections
import gdb
try:
from lxml import etree
have_lxml = True
except ImportError:
have_lxml = False
try:
# Python 2.5
from xml.etree import cElementTree as etree
except ImportError:
try:
# Python 2.5
from xml.etree import ElementTree as etree
except ImportError:
try:
# normal cElementTree install
import cElementTree as etree
except ImportError:
# normal ElementTree install
import elementtree.ElementTree as etree
try:
import pygments.lexers
import pygments.formatters
except ImportError:
pygments = None
sys.stderr.write("Install pygments for colorized source code.\n")
if hasattr(gdb, 'string_to_argv'):
from gdb import string_to_argv
else:
from shlex import split as string_to_argv
from Cython.Debugger import libpython
# C or Python type
CObject = 'CObject'
PythonObject = 'PythonObject'
_data_types = dict(CObject=CObject, PythonObject=PythonObject)
_filesystemencoding = sys.getfilesystemencoding() or 'UTF-8'
# decorators
def dont_suppress_errors(function):
"*sigh*, readline"
@functools.wraps(function)
def wrapper(*args, **kwargs):
try:
return function(*args, **kwargs)
except Exception:
traceback.print_exc()
raise
return wrapper
def default_selected_gdb_frame(err=True):
def decorator(function):
@functools.wraps(function)
def wrapper(self, frame=None, *args, **kwargs):
try:
frame = frame or gdb.selected_frame()
except RuntimeError:
raise gdb.GdbError("No frame is currently selected.")
if err and frame.name() is None:
raise NoFunctionNameInFrameError()
return function(self, frame, *args, **kwargs)
return wrapper
return decorator
def require_cython_frame(function):
@functools.wraps(function)
@require_running_program
def wrapper(self, *args, **kwargs):
frame = kwargs.get('frame') or gdb.selected_frame()
if not self.is_cython_function(frame):
raise gdb.GdbError('Selected frame does not correspond with a '
'Cython function we know about.')
return function(self, *args, **kwargs)
return wrapper
def dispatch_on_frame(c_command, python_command=None):
def decorator(function):
@functools.wraps(function)
def wrapper(self, *args, **kwargs):
is_cy = self.is_cython_function()
is_py = self.is_python_function()
if is_cy or (is_py and not python_command):
function(self, *args, **kwargs)
elif is_py:
gdb.execute(python_command)
elif self.is_relevant_function():
gdb.execute(c_command)
else:
raise gdb.GdbError("Not a function cygdb knows about. "
"Use the normal GDB commands instead.")
return wrapper
return decorator
def require_running_program(function):
@functools.wraps(function)
def wrapper(*args, **kwargs):
try:
gdb.selected_frame()
except RuntimeError:
raise gdb.GdbError("No frame is currently selected.")
return function(*args, **kwargs)
return wrapper
def gdb_function_value_to_unicode(function):
@functools.wraps(function)
def wrapper(self, string, *args, **kwargs):
if isinstance(string, gdb.Value):
string = string.string()
return function(self, string, *args, **kwargs)
return wrapper
# Classes that represent the debug information
# Don't rename the parameters of these classes, they come directly from the XML
class CythonModule(object):
def __init__(self, module_name, filename, c_filename):
self.name = module_name
self.filename = filename
self.c_filename = c_filename
self.globals = {}
# {cython_lineno: min(c_linenos)}
self.lineno_cy2c = {}
# {c_lineno: cython_lineno}
self.lineno_c2cy = {}
self.functions = {}
def qualified_name(self, varname):
return '.'.join(self.name, varname)
class CythonVariable(object):
def __init__(self, name, cname, qualified_name, type, lineno):
self.name = name
self.cname = cname
self.qualified_name = qualified_name
self.type = type
self.lineno = int(lineno)
class CythonFunction(CythonVariable):
def __init__(self,
module,
name,
cname,
pf_cname,
qualified_name,
lineno,
type=CObject):
super(CythonFunction, self).__init__(name,
cname,
qualified_name,
type,
lineno)
self.module = module
self.pf_cname = pf_cname
self.locals = {}
self.arguments = []
self.step_into_functions = set()
# General purpose classes
class CythonBase(object):
@default_selected_gdb_frame(err=False)
def is_cython_function(self, frame):
return frame.name() in self.cy.functions_by_cname
@default_selected_gdb_frame(err=False)
def is_python_function(self, frame):
"""
Tells if a frame is associated with a Python function.
If we can't read the Python frame information, don't regard it as such.
"""
if frame.name() == 'PyEval_EvalFrameEx':
pyframe = libpython.Frame(frame).get_pyop()
return pyframe and not pyframe.is_optimized_out()
return False
@default_selected_gdb_frame()
def get_c_function_name(self, frame):
return frame.name()
@default_selected_gdb_frame()
def get_c_lineno(self, frame):
return frame.find_sal().line
@default_selected_gdb_frame()
def get_cython_function(self, frame):
result = self.cy.functions_by_cname.get(frame.name())
if result is None:
raise NoCythonFunctionInFrameError()
return result
@default_selected_gdb_frame()
def get_cython_lineno(self, frame):
"""
Get the current Cython line number. Returns 0 if there is no
correspondence between the C and Cython code.
"""
cyfunc = self.get_cython_function(frame)
return cyfunc.module.lineno_c2cy.get(self.get_c_lineno(frame), 0)
@default_selected_gdb_frame()
def get_source_desc(self, frame):
filename = lineno = lexer = None
if self.is_cython_function(frame):
filename = self.get_cython_function(frame).module.filename
lineno = self.get_cython_lineno(frame)
if pygments:
lexer = pygments.lexers.CythonLexer(stripall=False)
elif self.is_python_function(frame):
pyframeobject = libpython.Frame(frame).get_pyop()
if not pyframeobject:
raise gdb.GdbError('Unable to read information on python frame')
filename = pyframeobject.filename()
lineno = pyframeobject.current_line_num()
if pygments:
lexer = pygments.lexers.PythonLexer(stripall=False)
else:
symbol_and_line_obj = frame.find_sal()
if not symbol_and_line_obj or not symbol_and_line_obj.symtab:
filename = None
lineno = 0
else:
filename = symbol_and_line_obj.symtab.filename
lineno = symbol_and_line_obj.line
if pygments:
lexer = pygments.lexers.CLexer(stripall=False)
return SourceFileDescriptor(filename, lexer), lineno
@default_selected_gdb_frame()
def get_source_line(self, frame):
source_desc, lineno = self.get_source_desc()
return source_desc.get_source(lineno)
@default_selected_gdb_frame()
def is_relevant_function(self, frame):
"""
returns whether we care about a frame on the user-level when debugging
Cython code
"""
name = frame.name()
older_frame = frame.older()
if self.is_cython_function(frame) or self.is_python_function(frame):
return True
elif older_frame and self.is_cython_function(older_frame):
# direct C function call from a Cython function
cython_func = self.get_cython_function(older_frame)
return name in cython_func.step_into_functions
return False
@default_selected_gdb_frame(err=False)
def print_stackframe(self, frame, index, is_c=False):
"""
Print a C, Cython or Python stack frame and the line of source code
if available.
"""
# do this to prevent the require_cython_frame decorator from
# raising GdbError when calling self.cy.cy_cvalue.invoke()
selected_frame = gdb.selected_frame()
frame.select()
try:
source_desc, lineno = self.get_source_desc(frame)
except NoFunctionNameInFrameError:
print '#%-2d Unknown Frame (compile with -g)' % index
return
if not is_c and self.is_python_function(frame):
pyframe = libpython.Frame(frame).get_pyop()
if pyframe is None or pyframe.is_optimized_out():
# print this python function as a C function
return self.print_stackframe(frame, index, is_c=True)
func_name = pyframe.co_name
func_cname = 'PyEval_EvalFrameEx'
func_args = []
elif self.is_cython_function(frame):
cyfunc = self.get_cython_function(frame)
f = lambda arg: self.cy.cy_cvalue.invoke(arg, frame=frame)
func_name = cyfunc.name
func_cname = cyfunc.cname
func_args = [] # [(arg, f(arg)) for arg in cyfunc.arguments]
else:
source_desc, lineno = self.get_source_desc(frame)
func_name = frame.name()
func_cname = func_name
func_args = []
try:
gdb_value = gdb.parse_and_eval(func_cname)
except RuntimeError:
func_address = 0
else:
# Seriously? Why is the address not an int?
func_address = int(str(gdb_value.address).split()[0], 0)
a = ', '.join('%s=%s' % (name, val) for name, val in func_args)
print '#%-2d 0x%016x in %s(%s)' % (index, func_address, func_name, a),
if source_desc.filename is not None:
print 'at %s:%s' % (source_desc.filename, lineno),
print
try:
print ' ' + source_desc.get_source(lineno)
except gdb.GdbError:
pass
selected_frame.select()
def get_remote_cython_globals_dict(self):
m = gdb.parse_and_eval('__pyx_m')
try:
PyModuleObject = gdb.lookup_type('PyModuleObject')
except RuntimeError:
raise gdb.GdbError(textwrap.dedent("""\
Unable to lookup type PyModuleObject, did you compile python
with debugging support (-g)?"""))
m = m.cast(PyModuleObject.pointer())
return m['md_dict']
def get_cython_globals_dict(self):
"""
Get the Cython globals dict where the remote names are turned into
local strings.
"""
remote_dict = self.get_remote_cython_globals_dict()
pyobject_dict = libpython.PyObjectPtr.from_pyobject_ptr(remote_dict)
result = {}
seen = set()
for k, v in pyobject_dict.iteritems():
result[k.proxyval(seen)] = v
return result
def print_gdb_value(self, name, value, max_name_length=None, prefix=''):
if libpython.pretty_printer_lookup(value):
typename = ''
else:
typename = '(%s) ' % (value.type,)
if max_name_length is None:
print '%s%s = %s%s' % (prefix, name, typename, value)
else:
print '%s%-*s = %s%s' % (prefix, max_name_length, name, typename,
value)
def is_initialized(self, cython_func, local_name):
cur_lineno = self.get_cython_lineno()
return (local_name in cython_func.arguments or
(local_name in cython_func.locals and
cur_lineno > cython_func.locals[local_name].lineno))
class SourceFileDescriptor(object):
def __init__(self, filename, lexer, formatter=None):
self.filename = filename
self.lexer = lexer
self.formatter = formatter
def valid(self):
return self.filename is not None
def lex(self, code):
if pygments and self.lexer and parameters.colorize_code:
bg = parameters.terminal_background.value
if self.formatter is None:
formatter = pygments.formatters.TerminalFormatter(bg=bg)
else:
formatter = self.formatter
return pygments.highlight(code, self.lexer, formatter)
return code
def _get_source(self, start, stop, lex_source, mark_line, lex_entire):
with open(self.filename) as f:
# to provide "correct" colouring, the entire code needs to be
# lexed. However, this makes a lot of things terribly slow, so
# we decide not to. Besides, it's unlikely to matter.
if lex_source and lex_entire:
f = self.lex(f.read()).splitlines()
slice = itertools.islice(f, start - 1, stop - 1)
for idx, line in enumerate(slice):
if start + idx == mark_line:
prefix = '>'
else:
prefix = ' '
if lex_source and not lex_entire:
line = self.lex(line)
yield '%s %4d %s' % (prefix, start + idx, line.rstrip())
def get_source(self, start, stop=None, lex_source=True, mark_line=0,
lex_entire=False):
exc = gdb.GdbError('Unable to retrieve source code')
if not self.filename:
raise exc
start = max(start, 1)
if stop is None:
stop = start + 1
try:
return '\n'.join(
self._get_source(start, stop, lex_source, mark_line, lex_entire))
except IOError:
raise exc
# Errors
class CyGDBError(gdb.GdbError):
"""
Base class for Cython-command related erorrs
"""
def __init__(self, *args):
args = args or (self.msg,)
super(CyGDBError, self).__init__(*args)
class NoCythonFunctionInFrameError(CyGDBError):
"""
raised when the user requests the current cython function, which is
unavailable
"""
msg = "Current function is a function cygdb doesn't know about"
class NoFunctionNameInFrameError(NoCythonFunctionInFrameError):
"""
raised when the name of the C function could not be determined
in the current C stack frame
"""
msg = ('C function name could not be determined in the current C stack '
'frame')
# Parameters
class CythonParameter(gdb.Parameter):
"""
Base class for cython parameters
"""
def __init__(self, name, command_class, parameter_class, default=None):
self.show_doc = self.set_doc = self.__class__.__doc__
super(CythonParameter, self).__init__(name, command_class,
parameter_class)
if default is not None:
self.value = default
def __nonzero__(self):
return bool(self.value)
__bool__ = __nonzero__ # python 3
class CompleteUnqualifiedFunctionNames(CythonParameter):
"""
Have 'cy break' complete unqualified function or method names.
"""
class ColorizeSourceCode(CythonParameter):
"""
Tell cygdb whether to colorize source code.
"""
class TerminalBackground(CythonParameter):
"""
Tell cygdb about the user's terminal background (light or dark).
"""
class CythonParameters(object):
"""
Simple container class that might get more functionality in the distant
future (mostly to remind us that we're dealing with parameters).
"""
def __init__(self):
self.complete_unqualified = CompleteUnqualifiedFunctionNames(
'cy_complete_unqualified',
gdb.COMMAND_BREAKPOINTS,
gdb.PARAM_BOOLEAN,
True)
self.colorize_code = ColorizeSourceCode(
'cy_colorize_code',
gdb.COMMAND_FILES,
gdb.PARAM_BOOLEAN,
True)
self.terminal_background = TerminalBackground(
'cy_terminal_background_color',
gdb.COMMAND_FILES,
gdb.PARAM_STRING,
"dark")
parameters = CythonParameters()
# Commands
class CythonCommand(gdb.Command, CythonBase):
"""
Base class for Cython commands
"""
command_class = gdb.COMMAND_NONE
@classmethod
def _register(cls, clsname, args, kwargs):
if not hasattr(cls, 'completer_class'):
return cls(clsname, cls.command_class, *args, **kwargs)
else:
return cls(clsname, cls.command_class, cls.completer_class,
*args, **kwargs)
@classmethod
def register(cls, *args, **kwargs):
alias = getattr(cls, 'alias', None)
if alias:
cls._register(cls.alias, args, kwargs)
return cls._register(cls.name, args, kwargs)
class CyCy(CythonCommand):
"""
Invoke a Cython command. Available commands are:
cy import
cy break
cy step
cy next
cy run
cy cont
cy finish
cy up
cy down
cy select
cy bt / cy backtrace
cy list
cy print
cy locals
cy globals
cy exec
"""
name = 'cy'
command_class = gdb.COMMAND_NONE
completer_class = gdb.COMPLETE_COMMAND
def __init__(self, name, command_class, completer_class):
# keep the signature 2.5 compatible (i.e. do not use f(*a, k=v)
super(CythonCommand, self).__init__(name, command_class,
completer_class, prefix=True)
commands = dict(
import_ = CyImport.register(),
break_ = CyBreak.register(),
step = CyStep.register(),
next = CyNext.register(),
run = CyRun.register(),
cont = CyCont.register(),
finish = CyFinish.register(),
up = CyUp.register(),
down = CyDown.register(),
select = CySelect.register(),
bt = CyBacktrace.register(),
list = CyList.register(),
print_ = CyPrint.register(),
locals = CyLocals.register(),
globals = CyGlobals.register(),
exec_ = libpython.FixGdbCommand('cy exec', '-cy-exec'),
_exec = CyExec.register(),
cy_cname = CyCName('cy_cname'),
cy_cvalue = CyCValue('cy_cvalue'),
cy_lineno = CyLine('cy_lineno'),
)
for command_name, command in commands.iteritems():
command.cy = self
setattr(self, command_name, command)
self.cy = self
# Cython module namespace
self.cython_namespace = {}
# maps (unique) qualified function names (e.g.
# cythonmodule.ClassName.method_name) to the CythonFunction object
self.functions_by_qualified_name = {}
# unique cnames of Cython functions
self.functions_by_cname = {}
# map function names like method_name to a list of all such
# CythonFunction objects
self.functions_by_name = collections.defaultdict(list)
class CyImport(CythonCommand):
"""
Import debug information outputted by the Cython compiler
Example: cy import FILE...
"""
name = 'cy import'
command_class = gdb.COMMAND_STATUS
completer_class = gdb.COMPLETE_FILENAME
def invoke(self, args, from_tty):
args = args.encode(_filesystemencoding)
for arg in string_to_argv(args):
try:
f = open(arg)
except OSError, e:
raise gdb.GdbError('Unable to open file %r: %s' %
(args, e.args[1]))
t = etree.parse(f)
for module in t.getroot():
cython_module = CythonModule(**module.attrib)
self.cy.cython_namespace[cython_module.name] = cython_module
for variable in module.find('Globals'):
d = variable.attrib
cython_module.globals[d['name']] = CythonVariable(**d)
for function in module.find('Functions'):
cython_function = CythonFunction(module=cython_module,
**function.attrib)
# update the global function mappings
name = cython_function.name
qname = cython_function.qualified_name
self.cy.functions_by_name[name].append(cython_function)
self.cy.functions_by_qualified_name[
cython_function.qualified_name] = cython_function
self.cy.functions_by_cname[
cython_function.cname] = cython_function
d = cython_module.functions[qname] = cython_function
for local in function.find('Locals'):
d = local.attrib
cython_function.locals[d['name']] = CythonVariable(**d)
for step_into_func in function.find('StepIntoFunctions'):
d = step_into_func.attrib
cython_function.step_into_functions.add(d['name'])
cython_function.arguments.extend(
funcarg.tag for funcarg in function.find('Arguments'))
for marker in module.find('LineNumberMapping'):
cython_lineno = int(marker.attrib['cython_lineno'])
c_linenos = map(int, marker.attrib['c_linenos'].split())
cython_module.lineno_cy2c[cython_lineno] = min(c_linenos)
for c_lineno in c_linenos:
cython_module.lineno_c2cy[c_lineno] = cython_lineno
self.cy.step.init_breakpoints()
class CyBreak(CythonCommand):
"""
Set a breakpoint for Cython code using Cython qualified name notation, e.g.:
cy break cython_modulename.ClassName.method_name...
or normal notation:
cy break function_or_method_name...
or for a line number:
cy break cython_module:lineno...
Set a Python breakpoint:
Break on any function or method named 'func' in module 'modname'
cy break -p modname.func...
Break on any function or method named 'func'
cy break -p func...
"""
name = 'cy break'
command_class = gdb.COMMAND_BREAKPOINTS
def _break_pyx(self, name):
modulename, _, lineno = name.partition(':')
lineno = int(lineno)
if modulename:
cython_module = self.cy.cython_namespace[modulename]
else:
cython_module = self.get_cython_function().module
if lineno in cython_module.lineno_cy2c:
c_lineno = cython_module.lineno_cy2c[lineno]
breakpoint = '%s:%s' % (cython_module.c_filename, c_lineno)
gdb.execute('break ' + breakpoint)
else:
raise GdbError("Not a valid line number. "
"Does it contain actual code?")
def _break_funcname(self, funcname):
func = self.cy.functions_by_qualified_name.get(funcname)
break_funcs = [func]
if not func:
funcs = self.cy.functions_by_name.get(funcname)
if not funcs:
gdb.execute('break ' + funcname)
return
if len(funcs) > 1:
# multiple functions, let the user pick one
print 'There are multiple such functions:'
for idx, func in enumerate(funcs):
print '%3d) %s' % (idx, func.qualified_name)
while True:
try:
result = raw_input(
"Select a function, press 'a' for all "
"functions or press 'q' or '^D' to quit: ")
except EOFError:
return
else:
if result.lower() == 'q':
return
elif result.lower() == 'a':
break_funcs = funcs
break
elif (result.isdigit() and
0 <= int(result) < len(funcs)):
break_funcs = [funcs[int(result)]]
break
else:
print 'Not understood...'
else:
break_funcs = [funcs[0]]
for func in break_funcs:
gdb.execute('break %s' % func.cname)
if func.pf_cname:
gdb.execute('break %s' % func.pf_cname)
def invoke(self, function_names, from_tty):
argv = string_to_argv(function_names.encode('UTF-8'))
if function_names.startswith('-p'):
argv = argv[1:]
python_breakpoints = True
else:
python_breakpoints = False
for funcname in argv:
if python_breakpoints:
gdb.execute('py-break %s' % funcname)
elif ':' in funcname:
self._break_pyx(funcname)
else:
self._break_funcname(funcname)
@dont_suppress_errors
def complete(self, text, word):
names = self.cy.functions_by_qualified_name
if parameters.complete_unqualified:
names = itertools.chain(names, self.cy.functions_by_name)
words = text.strip().split()
if words and '.' in words[-1]:
lastword = words[-1]
compl = [n for n in self.cy.functions_by_qualified_name
if n.startswith(lastword)]
else:
seen = set(text[:-len(word)].split())
return [n for n in names if n.startswith(word) and n not in seen]
if len(lastword) > len(word):
# readline sees something (e.g. a '.') as a word boundary, so don't
# "recomplete" this prefix
strip_prefix_length = len(lastword) - len(word)
compl = [n[strip_prefix_length:] for n in compl]
return compl
class CythonCodeStepper(CythonCommand, libpython.GenericCodeStepper):
"""
Base class for CyStep and CyNext. It implements the interface dictated by
libpython.GenericCodeStepper.
"""
def lineno(self, frame):
# Take care of the Python and Cython levels. We need to care for both
# as we can't simply dispath to 'py-step', since that would work for
# stepping through Python code, but it would not step back into Cython-
# related code. The C level should be dispatched to the 'step' command.
if self.is_cython_function(frame):
return self.get_cython_lineno(frame)
else:
return libpython.py_step.lineno(frame)
def get_source_line(self, frame):
try:
line = super(CythonCodeStepper, self).get_source_line(frame)
except gdb.GdbError:
return None
else:
return line.strip() or None
@classmethod
def register(cls):
return cls(cls.name, stepinto=getattr(cls, 'stepinto', False))
def runtime_break_functions(self):
if self.is_cython_function():
return self.get_cython_function().step_into_functions
def static_break_functions(self):
result = ['PyEval_EvalFrameEx']
result.extend(self.cy.functions_by_cname)
return result
def invoke(self, args, from_tty):
if not self.is_cython_function() and not self.is_python_function():
if self.stepinto:
command = 'step'
else:
command = 'next'
self.finish_executing(gdb.execute(command, to_string=True))
else:
self.step()
class CyStep(CythonCodeStepper):
"Step through Cython, Python or C code."
name = 'cy step'
stepinto = True
class CyNext(CythonCodeStepper):
"Step-over Python code."
name = 'cy next'
stepinto = False
class CyRun(CythonCodeStepper):
"""
Run a Cython program. This is like the 'run' command, except that it
displays Cython or Python source lines as well
"""
name = 'cy run'
invoke = CythonCodeStepper.run
class CyCont(CyRun):
"""
Continue a Cython program. This is like the 'run' command, except that it
displays Cython or Python source lines as well.
"""
name = 'cy cont'
invoke = CythonCodeStepper.cont
class CyFinish(CyRun):
"""
Execute until the function returns.
"""
name = 'cy finish'
invoke = CythonCodeStepper.finish
class CyUp(CythonCommand):
"""
Go up a Cython, Python or relevant C frame.
"""
name = 'cy up'
_command = 'up'
def invoke(self, *args):
try:
gdb.execute(self._command, to_string=True)
while not self.is_relevant_function(gdb.selected_frame()):
gdb.execute(self._command, to_string=True)
except RuntimeError, e:
raise gdb.GdbError(*e.args)
frame = gdb.selected_frame()
index = 0
while frame:
frame = frame.older()
index += 1
self.print_stackframe(index=index - 1)
class CyDown(CyUp):
"""
Go down a Cython, Python or relevant C frame.
"""
name = 'cy down'
_command = 'down'
class CySelect(CythonCodeStepper):
"""
Select a frame. Use frame numbers as listed in `cy backtrace`.
This command is useful because `cy backtrace` prints a reversed backtrace.
"""
name = 'cy select'
def invoke(self, stackno, from_tty):
try:
stackno = int(stackno)
except ValueError:
raise gdb.GdbError("Not a valid number: %r" % (stackno,))
frame = gdb.selected_frame()
while frame.newer():
frame = frame.newer()
stackdepth = self._stackdepth(frame)
try:
gdb.execute('select %d' % (stackdepth - stackno - 1,))
except RuntimeError, e:
raise gdb.GdbError(*e.args)
class CyBacktrace(CythonCommand):
'Print the Cython stack'
name = 'cy bt'
alias = 'cy backtrace'
command_class = gdb.COMMAND_STACK
completer_class = gdb.COMPLETE_NONE
@require_running_program
def invoke(self, args, from_tty):
# get the first frame
selected_frame = frame = gdb.selected_frame()
while frame.older():
frame = frame.older()
print_all = args == '-a'
index = 0
while frame:
is_c = False
is_relevant = False
try:
is_relevant = self.is_relevant_function(frame)
except CyGDBError:
pass
if print_all or is_relevant:
self.print_stackframe(frame, index)
index += 1
frame = frame.newer()
selected_frame.select()
class CyList(CythonCommand):
"""
List Cython source code. To disable to customize colouring see the cy_*
parameters.
"""
name = 'cy list'
command_class = gdb.COMMAND_FILES
completer_class = gdb.COMPLETE_NONE
@dispatch_on_frame(c_command='list')
def invoke(self, _, from_tty):
sd, lineno = self.get_source_desc()
source = sd.get_source(lineno - 5, lineno + 5, mark_line=lineno,
lex_entire=True)
print source
class CyPrint(CythonCommand):
"""
Print a Cython variable using 'cy-print x' or 'cy-print module.function.x'
"""
name = 'cy print'
command_class = gdb.COMMAND_DATA
def invoke(self, name, from_tty, max_name_length=None):
if self.is_python_function():
return gdb.execute('py-print ' + name)
elif self.is_cython_function():
value = self.cy.cy_cvalue.invoke(name.lstrip('*'))
for c in name:
if c == '*':
value = value.dereference()
else:
break
self.print_gdb_value(name, value, max_name_length)
else:
gdb.execute('print ' + name)
def complete(self):
if self.is_cython_function():
f = self.get_cython_function()
return list(itertools.chain(f.locals, f.globals))
else:
return []
sortkey = lambda (name, value): name.lower()
class CyLocals(CythonCommand):
"""
List the locals from the current Cython frame.
"""
name = 'cy locals'
command_class = gdb.COMMAND_STACK
completer_class = gdb.COMPLETE_NONE
@dispatch_on_frame(c_command='info locals', python_command='py-locals')
def invoke(self, args, from_tty):
local_cython_vars = self.get_cython_function().locals
max_name_length = len(max(local_cython_vars, key=len))
for name, cyvar in sorted(local_cython_vars.iteritems(), key=sortkey):
if self.is_initialized(self.get_cython_function(), cyvar.name):
value = gdb.parse_and_eval(cyvar.cname)
if not value.is_optimized_out:
self.print_gdb_value(cyvar.name, value,
max_name_length, '')
class CyGlobals(CyLocals):
"""
List the globals from the current Cython module.
"""
name = 'cy globals'
command_class = gdb.COMMAND_STACK
completer_class = gdb.COMPLETE_NONE
@dispatch_on_frame(c_command='info variables', python_command='py-globals')
def invoke(self, args, from_tty):
global_python_dict = self.get_cython_globals_dict()
module_globals = self.get_cython_function().module.globals
max_globals_len = 0
max_globals_dict_len = 0
if module_globals:
max_globals_len = len(max(module_globals, key=len))
if global_python_dict:
max_globals_dict_len = len(max(global_python_dict))
max_name_length = max(max_globals_len, max_globals_dict_len)
seen = set()
print 'Python globals:'
for k, v in sorted(global_python_dict.iteritems(), key=sortkey):
v = v.get_truncated_repr(libpython.MAX_OUTPUT_LEN)
seen.add(k)
print ' %-*s = %s' % (max_name_length, k, v)
print 'C globals:'
for name, cyvar in sorted(module_globals.iteritems(), key=sortkey):
if name not in seen:
try:
value = gdb.parse_and_eval(cyvar.cname)
except RuntimeError:
pass
else:
if not value.is_optimized_out:
self.print_gdb_value(cyvar.name, value,
max_name_length, ' ')
class CyExec(CythonCommand, libpython.PyExec):
"""
Execute Python code in the nearest Python or Cython frame.
"""
name = '-cy-exec'
command_class = gdb.COMMAND_STACK
completer_class = gdb.COMPLETE_NONE
def _fill_locals_dict(self, executor, local_dict_pointer):
"Fill a remotely allocated dict with values from the Cython C stack"
cython_func = self.get_cython_function()
current_lineno = self.get_cython_lineno()
for name, cyvar in cython_func.locals.iteritems():
if (cyvar.type == PythonObject and
self.is_initialized(cython_func, name)):
try:
val = gdb.parse_and_eval(cyvar.cname)
except RuntimeError:
continue
else:
if val.is_optimized_out:
continue
pystringp = executor.alloc_pystring(name)
code = '''
(PyObject *) PyDict_SetItem(
(PyObject *) %d,
(PyObject *) %d,
(PyObject *) %s)
''' % (local_dict_pointer, pystringp, cyvar.cname)
try:
if gdb.parse_and_eval(code) < 0:
gdb.parse_and_eval('PyErr_Print()')
raise gdb.GdbError("Unable to execute Python code.")
finally:
# PyDict_SetItem doesn't steal our reference
executor.decref(pystringp)
def _find_first_cython_or_python_frame(self):
frame = gdb.selected_frame()
while frame:
if (self.is_cython_function(frame) or
self.is_python_function(frame)):
return frame
frame = frame.older()
raise gdb.GdbError("There is no Cython or Python frame on the stack.")
def invoke(self, expr, from_tty):
frame = self._find_first_cython_or_python_frame()
if self.is_python_function(frame):
libpython.py_exec.invoke(expr, from_tty)
return
expr, input_type = self.readcode(expr)
executor = libpython.PythonCodeExecutor()
with libpython.FetchAndRestoreError():
# get the dict of Cython globals and construct a dict in the
# inferior with Cython locals
global_dict = gdb.parse_and_eval(
'(PyObject *) PyModule_GetDict(__pyx_m)')
local_dict = gdb.parse_and_eval('(PyObject *) PyDict_New()')
cython_function = self.get_cython_function()
try:
self._fill_locals_dict(executor,
libpython.pointervalue(local_dict))
executor.evalcode(expr, input_type, global_dict, local_dict)
finally:
executor.decref(libpython.pointervalue(local_dict))
# Functions
class CyCName(gdb.Function, CythonBase):
"""
Get the C name of a Cython variable in the current context.
Examples:
print $cy_cname("function")
print $cy_cname("Class.method")
print $cy_cname("module.function")
"""
@require_cython_frame
@gdb_function_value_to_unicode
def invoke(self, cyname, frame=None):
frame = frame or gdb.selected_frame()
cname = None
if self.is_cython_function(frame):
cython_function = self.get_cython_function(frame)
if cyname in cython_function.locals:
cname = cython_function.locals[cyname].cname
elif cyname in cython_function.module.globals:
cname = cython_function.module.globals[cyname].cname
else:
qname = '%s.%s' % (cython_function.module.name, cyname)
if qname in cython_function.module.functions:
cname = cython_function.module.functions[qname].cname
if not cname:
cname = self.cy.functions_by_qualified_name.get(cyname)
if not cname:
raise gdb.GdbError('No such Cython variable: %s' % cyname)
return cname
class CyCValue(CyCName):
"""
Get the value of a Cython variable.
"""
@require_cython_frame
@gdb_function_value_to_unicode
def invoke(self, cyname, frame=None):
try:
cname = super(CyCValue, self).invoke(cyname, frame=frame)
return gdb.parse_and_eval(cname)
except (gdb.GdbError, RuntimeError), e:
# variable exists but may not have been initialized yet, or may be
# in the globals dict of the Cython module
d = self.get_cython_globals_dict()
if cyname in d:
return d[cyname]._gdbval
raise gdb.GdbError(str(e))
class CyLine(gdb.Function, CythonBase):
"""
Get the current Cython line.
"""
@require_cython_frame
def invoke(self):
return self.get_cython_lineno()
cy = CyCy.register()
\ No newline at end of file
#!/usr/bin/python
# NOTE: this file is taken from the Python source distribution
# It can be found under Tools/gdb/libpython.py. It is shipped with Cython
# because it's not installed as a python module, and because changes are only
# merged into new python versions (v3.2+).
'''
From gdb 7 onwards, gdb's build can be configured --with-python, allowing gdb
to be extended with Python code e.g. for library-specific data visualizations,
such as for the C++ STL types. Documentation on this API can be seen at:
http://sourceware.org/gdb/current/onlinedocs/gdb/Python-API.html
This python module deals with the case when the process being debugged (the
"inferior process" in gdb parlance) is itself python, or more specifically,
linked against libpython. In this situation, almost every item of data is a
(PyObject*), and having the debugger merely print their addresses is not very
enlightening.
This module embeds knowledge about the implementation details of libpython so
that we can emit useful visualizations e.g. a string, a list, a dict, a frame
giving file/line information and the state of local variables
In particular, given a gdb.Value corresponding to a PyObject* in the inferior
process, we can generate a "proxy value" within the gdb process. For example,
given a PyObject* in the inferior process that is in fact a PyListObject*
holding three PyObject* that turn out to be PyStringObject* instances, we can
generate a proxy value within the gdb process that is a list of strings:
["foo", "bar", "baz"]
Doing so can be expensive for complicated graphs of objects, and could take
some time, so we also have a "write_repr" method that writes a representation
of the data to a file-like object. This allows us to stop the traversal by
having the file-like object raise an exception if it gets too much data.
With both "proxyval" and "write_repr" we keep track of the set of all addresses
visited so far in the traversal, to avoid infinite recursion due to cycles in
the graph of object references.
We try to defer gdb.lookup_type() invocations for python types until as late as
possible: for a dynamically linked python binary, when the process starts in
the debugger, the libpython.so hasn't been dynamically loaded yet, so none of
the type names are known to the debugger
The module also extends gdb with some python-specific commands.
'''
from __future__ import with_statement
import os
import re
import sys
import struct
import locale
import atexit
import warnings
import tempfile
import itertools
import gdb
if sys.version_info[0] < 3:
# I think this is the only way to fix this bug :'(
# http://sourceware.org/bugzilla/show_bug.cgi?id=12285
out, err = sys.stdout, sys.stderr
reload(sys).setdefaultencoding('UTF-8')
sys.stdout = out
sys.stderr = err
# Look up the gdb.Type for some standard types:
_type_char_ptr = gdb.lookup_type('char').pointer() # char*
_type_unsigned_char_ptr = gdb.lookup_type('unsigned char').pointer() # unsigned char*
_type_void_ptr = gdb.lookup_type('void').pointer() # void*
SIZEOF_VOID_P = _type_void_ptr.sizeof
Py_TPFLAGS_HEAPTYPE = (1L << 9)
Py_TPFLAGS_INT_SUBCLASS = (1L << 23)
Py_TPFLAGS_LONG_SUBCLASS = (1L << 24)
Py_TPFLAGS_LIST_SUBCLASS = (1L << 25)
Py_TPFLAGS_TUPLE_SUBCLASS = (1L << 26)
Py_TPFLAGS_STRING_SUBCLASS = (1L << 27)
Py_TPFLAGS_BYTES_SUBCLASS = (1L << 27)
Py_TPFLAGS_UNICODE_SUBCLASS = (1L << 28)
Py_TPFLAGS_DICT_SUBCLASS = (1L << 29)
Py_TPFLAGS_BASE_EXC_SUBCLASS = (1L << 30)
Py_TPFLAGS_TYPE_SUBCLASS = (1L << 31)
MAX_OUTPUT_LEN=1024
hexdigits = "0123456789abcdef"
ENCODING = locale.getpreferredencoding()
class NullPyObjectPtr(RuntimeError):
pass
def safety_limit(val):
# Given a integer value from the process being debugged, limit it to some
# safety threshold so that arbitrary breakage within said process doesn't
# break the gdb process too much (e.g. sizes of iterations, sizes of lists)
return min(val, 1000)
def safe_range(val):
# As per range, but don't trust the value too much: cap it to a safety
# threshold in case the data was corrupted
return xrange(safety_limit(val))
def write_unicode(file, text):
# Write a byte or unicode string to file. Unicode strings are encoded to
# ENCODING encoding with 'backslashreplace' error handler to avoid
# UnicodeEncodeError.
if isinstance(text, unicode):
text = text.encode(ENCODING, 'backslashreplace')
file.write(text)
def os_fsencode(filename):
if not isinstance(filename, unicode):
return filename
encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
# mbcs doesn't support surrogateescape
return filename.encode(encoding)
encoded = []
for char in filename:
# surrogateescape error handler
if 0xDC80 <= ord(char) <= 0xDCFF:
byte = chr(ord(char) - 0xDC00)
else:
byte = char.encode(encoding)
encoded.append(byte)
return ''.join(encoded)
class StringTruncated(RuntimeError):
pass
class TruncatedStringIO(object):
'''Similar to cStringIO, but can truncate the output by raising a
StringTruncated exception'''
def __init__(self, maxlen=None):
self._val = ''
self.maxlen = maxlen
def write(self, data):
if self.maxlen:
if len(data) + len(self._val) > self.maxlen:
# Truncation:
self._val += data[0:self.maxlen - len(self._val)]
raise StringTruncated()
self._val += data
def getvalue(self):
return self._val
class PyObjectPtr(object):
"""
Class wrapping a gdb.Value that's a either a (PyObject*) within the
inferior process, or some subclass pointer e.g. (PyStringObject*)
There will be a subclass for every refined PyObject type that we care
about.
Note that at every stage the underlying pointer could be NULL, point
to corrupt data, etc; this is the debugger, after all.
"""
_typename = 'PyObject'
def __init__(self, gdbval, cast_to=None):
if cast_to:
self._gdbval = gdbval.cast(cast_to)
else:
self._gdbval = gdbval
def field(self, name):
'''
Get the gdb.Value for the given field within the PyObject, coping with
some python 2 versus python 3 differences.
Various libpython types are defined using the "PyObject_HEAD" and
"PyObject_VAR_HEAD" macros.
In Python 2, this these are defined so that "ob_type" and (for a var
object) "ob_size" are fields of the type in question.
In Python 3, this is defined as an embedded PyVarObject type thus:
PyVarObject ob_base;
so that the "ob_size" field is located insize the "ob_base" field, and
the "ob_type" is most easily accessed by casting back to a (PyObject*).
'''
if self.is_null():
raise NullPyObjectPtr(self)
if name == 'ob_type':
pyo_ptr = self._gdbval.cast(PyObjectPtr.get_gdb_type())
return pyo_ptr.dereference()[name]
if name == 'ob_size':
pyo_ptr = self._gdbval.cast(PyVarObjectPtr.get_gdb_type())
return pyo_ptr.dereference()[name]
# General case: look it up inside the object:
return self._gdbval.dereference()[name]
def pyop_field(self, name):
'''
Get a PyObjectPtr for the given PyObject* field within this PyObject,
coping with some python 2 versus python 3 differences.
'''
return PyObjectPtr.from_pyobject_ptr(self.field(name))
def write_field_repr(self, name, out, visited):
'''
Extract the PyObject* field named "name", and write its representation
to file-like object "out"
'''
field_obj = self.pyop_field(name)
field_obj.write_repr(out, visited)
def get_truncated_repr(self, maxlen):
'''
Get a repr-like string for the data, but truncate it at "maxlen" bytes
(ending the object graph traversal as soon as you do)
'''
out = TruncatedStringIO(maxlen)
try:
self.write_repr(out, set())
except StringTruncated:
# Truncation occurred:
return out.getvalue() + '...(truncated)'
# No truncation occurred:
return out.getvalue()
def type(self):
return PyTypeObjectPtr(self.field('ob_type'))
def is_null(self):
return 0 == long(self._gdbval)
def is_optimized_out(self):
'''
Is the value of the underlying PyObject* visible to the debugger?
This can vary with the precise version of the compiler used to build
Python, and the precise version of gdb.
See e.g. https://bugzilla.redhat.com/show_bug.cgi?id=556975 with
PyEval_EvalFrameEx's "f"
'''
return self._gdbval.is_optimized_out
def safe_tp_name(self):
try:
return self.type().field('tp_name').string()
except NullPyObjectPtr:
# NULL tp_name?
return 'unknown'
except RuntimeError:
# Can't even read the object at all?
return 'unknown'
def proxyval(self, visited):
'''
Scrape a value from the inferior process, and try to represent it
within the gdb process, whilst (hopefully) avoiding crashes when
the remote data is corrupt.
Derived classes will override this.
For example, a PyIntObject* with ob_ival 42 in the inferior process
should result in an int(42) in this process.
visited: a set of all gdb.Value pyobject pointers already visited
whilst generating this value (to guard against infinite recursion when
visiting object graphs with loops). Analogous to Py_ReprEnter and
Py_ReprLeave
'''
class FakeRepr(object):
"""
Class representing a non-descript PyObject* value in the inferior
process for when we don't have a custom scraper, intended to have
a sane repr().
"""
def __init__(self, tp_name, address):
self.tp_name = tp_name
self.address = address
def __repr__(self):
# For the NULL pointer, we have no way of knowing a type, so
# special-case it as per
# http://bugs.python.org/issue8032#msg100882
if self.address == 0:
return '0x0'
return '<%s at remote 0x%x>' % (self.tp_name, self.address)
return FakeRepr(self.safe_tp_name(),
long(self._gdbval))
def write_repr(self, out, visited):
'''
Write a string representation of the value scraped from the inferior
process to "out", a file-like object.
'''
# Default implementation: generate a proxy value and write its repr
# However, this could involve a lot of work for complicated objects,
# so for derived classes we specialize this
return out.write(repr(self.proxyval(visited)))
@classmethod
def subclass_from_type(cls, t):
'''
Given a PyTypeObjectPtr instance wrapping a gdb.Value that's a
(PyTypeObject*), determine the corresponding subclass of PyObjectPtr
to use
Ideally, we would look up the symbols for the global types, but that
isn't working yet:
(gdb) python print gdb.lookup_symbol('PyList_Type')[0].value
Traceback (most recent call last):
File "<string>", line 1, in <module>
NotImplementedError: Symbol type not yet supported in Python scripts.
Error while executing Python code.
For now, we use tp_flags, after doing some string comparisons on the
tp_name for some special-cases that don't seem to be visible through
flags
'''
try:
tp_name = t.field('tp_name').string()
tp_flags = int(t.field('tp_flags'))
except RuntimeError:
# Handle any kind of error e.g. NULL ptrs by simply using the base
# class
return cls
#print 'tp_flags = 0x%08x' % tp_flags
#print 'tp_name = %r' % tp_name
name_map = {'bool': PyBoolObjectPtr,
'classobj': PyClassObjectPtr,
'instance': PyInstanceObjectPtr,
'NoneType': PyNoneStructPtr,
'frame': PyFrameObjectPtr,
'set' : PySetObjectPtr,
'frozenset' : PySetObjectPtr,
'builtin_function_or_method' : PyCFunctionObjectPtr,
}
if tp_name in name_map:
return name_map[tp_name]
if tp_flags & Py_TPFLAGS_HEAPTYPE:
return HeapTypeObjectPtr
if tp_flags & Py_TPFLAGS_INT_SUBCLASS:
return PyIntObjectPtr
if tp_flags & Py_TPFLAGS_LONG_SUBCLASS:
return PyLongObjectPtr
if tp_flags & Py_TPFLAGS_LIST_SUBCLASS:
return PyListObjectPtr
if tp_flags & Py_TPFLAGS_TUPLE_SUBCLASS:
return PyTupleObjectPtr
if tp_flags & Py_TPFLAGS_STRING_SUBCLASS:
try:
gdb.lookup_type('PyBytesObject')
return PyBytesObjectPtr
except RuntimeError:
return PyStringObjectPtr
if tp_flags & Py_TPFLAGS_UNICODE_SUBCLASS:
return PyUnicodeObjectPtr
if tp_flags & Py_TPFLAGS_DICT_SUBCLASS:
return PyDictObjectPtr
if tp_flags & Py_TPFLAGS_BASE_EXC_SUBCLASS:
return PyBaseExceptionObjectPtr
#if tp_flags & Py_TPFLAGS_TYPE_SUBCLASS:
# return PyTypeObjectPtr
# Use the base class:
return cls
@classmethod
def from_pyobject_ptr(cls, gdbval):
'''
Try to locate the appropriate derived class dynamically, and cast
the pointer accordingly.
'''
try:
p = PyObjectPtr(gdbval)
cls = cls.subclass_from_type(p.type())
return cls(gdbval, cast_to=cls.get_gdb_type())
except RuntimeError, exc:
# Handle any kind of error e.g. NULL ptrs by simply using the base
# class
pass
return cls(gdbval)
@classmethod
def get_gdb_type(cls):
return gdb.lookup_type(cls._typename).pointer()
def as_address(self):
return long(self._gdbval)
class PyVarObjectPtr(PyObjectPtr):
_typename = 'PyVarObject'
class ProxyAlreadyVisited(object):
'''
Placeholder proxy to use when protecting against infinite recursion due to
loops in the object graph.
Analogous to the values emitted by the users of Py_ReprEnter and Py_ReprLeave
'''
def __init__(self, rep):
self._rep = rep
def __repr__(self):
return self._rep
def _write_instance_repr(out, visited, name, pyop_attrdict, address):
'''Shared code for use by old-style and new-style classes:
write a representation to file-like object "out"'''
out.write('<')
out.write(name)
# Write dictionary of instance attributes:
if isinstance(pyop_attrdict, PyDictObjectPtr):
out.write('(')
first = True
for pyop_arg, pyop_val in pyop_attrdict.iteritems():
if not first:
out.write(', ')
first = False
out.write(pyop_arg.proxyval(visited))
out.write('=')
pyop_val.write_repr(out, visited)
out.write(')')
out.write(' at remote 0x%x>' % address)
class InstanceProxy(object):
def __init__(self, cl_name, attrdict, address):
self.cl_name = cl_name
self.attrdict = attrdict
self.address = address
def __repr__(self):
if isinstance(self.attrdict, dict):
kwargs = ', '.join(["%s=%r" % (arg, val)
for arg, val in self.attrdict.iteritems()])
return '<%s(%s) at remote 0x%x>' % (self.cl_name,
kwargs, self.address)
else:
return '<%s at remote 0x%x>' % (self.cl_name,
self.address)
def _PyObject_VAR_SIZE(typeobj, nitems):
return ( ( typeobj.field('tp_basicsize') +
nitems * typeobj.field('tp_itemsize') +
(SIZEOF_VOID_P - 1)
) & ~(SIZEOF_VOID_P - 1)
).cast(gdb.lookup_type('size_t'))
class HeapTypeObjectPtr(PyObjectPtr):
_typename = 'PyObject'
def get_attr_dict(self):
'''
Get the PyDictObject ptr representing the attribute dictionary
(or None if there's a problem)
'''
try:
typeobj = self.type()
dictoffset = int_from_int(typeobj.field('tp_dictoffset'))
if dictoffset != 0:
if dictoffset < 0:
type_PyVarObject_ptr = gdb.lookup_type('PyVarObject').pointer()
tsize = int_from_int(self._gdbval.cast(type_PyVarObject_ptr)['ob_size'])
if tsize < 0:
tsize = -tsize
size = _PyObject_VAR_SIZE(typeobj, tsize)
dictoffset += size
assert dictoffset > 0
assert dictoffset % SIZEOF_VOID_P == 0
dictptr = self._gdbval.cast(_type_char_ptr) + dictoffset
PyObjectPtrPtr = PyObjectPtr.get_gdb_type().pointer()
dictptr = dictptr.cast(PyObjectPtrPtr)
return PyObjectPtr.from_pyobject_ptr(dictptr.dereference())
except RuntimeError:
# Corrupt data somewhere; fail safe
pass
# Not found, or some kind of error:
return None
def proxyval(self, visited):
'''
Support for new-style classes.
Currently we just locate the dictionary using a transliteration to
python of _PyObject_GetDictPtr, ignoring descriptors
'''
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('<...>')
visited.add(self.as_address())
pyop_attr_dict = self.get_attr_dict()
if pyop_attr_dict:
attr_dict = pyop_attr_dict.proxyval(visited)
else:
attr_dict = {}
tp_name = self.safe_tp_name()
# New-style class:
return InstanceProxy(tp_name, attr_dict, long(self._gdbval))
def write_repr(self, out, visited):
# Guard against infinite loops:
if self.as_address() in visited:
out.write('<...>')
return
visited.add(self.as_address())
pyop_attrdict = self.get_attr_dict()
_write_instance_repr(out, visited,
self.safe_tp_name(), pyop_attrdict, self.as_address())
class ProxyException(Exception):
def __init__(self, tp_name, args):
self.tp_name = tp_name
self.args = args
def __repr__(self):
return '%s%r' % (self.tp_name, self.args)
class PyBaseExceptionObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyBaseExceptionObject* i.e. an exception
within the process being debugged.
"""
_typename = 'PyBaseExceptionObject'
def proxyval(self, visited):
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('(...)')
visited.add(self.as_address())
arg_proxy = self.pyop_field('args').proxyval(visited)
return ProxyException(self.safe_tp_name(),
arg_proxy)
def write_repr(self, out, visited):
# Guard against infinite loops:
if self.as_address() in visited:
out.write('(...)')
return
visited.add(self.as_address())
out.write(self.safe_tp_name())
self.write_field_repr('args', out, visited)
class PyClassObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyClassObject* i.e. a <classobj>
instance within the process being debugged.
"""
_typename = 'PyClassObject'
class BuiltInFunctionProxy(object):
def __init__(self, ml_name):
self.ml_name = ml_name
def __repr__(self):
return "<built-in function %s>" % self.ml_name
class BuiltInMethodProxy(object):
def __init__(self, ml_name, pyop_m_self):
self.ml_name = ml_name
self.pyop_m_self = pyop_m_self
def __repr__(self):
return ('<built-in method %s of %s object at remote 0x%x>'
% (self.ml_name,
self.pyop_m_self.safe_tp_name(),
self.pyop_m_self.as_address())
)
class PyCFunctionObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyCFunctionObject*
(see Include/methodobject.h and Objects/methodobject.c)
"""
_typename = 'PyCFunctionObject'
def proxyval(self, visited):
m_ml = self.field('m_ml') # m_ml is a (PyMethodDef*)
ml_name = m_ml['ml_name'].string()
pyop_m_self = self.pyop_field('m_self')
if pyop_m_self.is_null():
return BuiltInFunctionProxy(ml_name)
else:
return BuiltInMethodProxy(ml_name, pyop_m_self)
class PyCodeObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyCodeObject* i.e. a <code> instance
within the process being debugged.
"""
_typename = 'PyCodeObject'
def addr2line(self, addrq):
'''
Get the line number for a given bytecode offset
Analogous to PyCode_Addr2Line; translated from pseudocode in
Objects/lnotab_notes.txt
'''
co_lnotab = self.pyop_field('co_lnotab').proxyval(set())
# Initialize lineno to co_firstlineno as per PyCode_Addr2Line
# not 0, as lnotab_notes.txt has it:
lineno = int_from_int(self.field('co_firstlineno'))
addr = 0
for addr_incr, line_incr in zip(co_lnotab[::2], co_lnotab[1::2]):
addr += ord(addr_incr)
if addr > addrq:
return lineno
lineno += ord(line_incr)
return lineno
class PyDictObjectPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyDictObject* i.e. a dict instance
within the process being debugged.
"""
_typename = 'PyDictObject'
def iteritems(self):
'''
Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
analagous to dict.iteritems()
'''
for i in safe_range(self.field('ma_mask') + 1):
ep = self.field('ma_table') + i
pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
if not pyop_value.is_null():
pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
yield (pyop_key, pyop_value)
def proxyval(self, visited):
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('{...}')
visited.add(self.as_address())
result = {}
for pyop_key, pyop_value in self.iteritems():
proxy_key = pyop_key.proxyval(visited)
proxy_value = pyop_value.proxyval(visited)
result[proxy_key] = proxy_value
return result
def write_repr(self, out, visited):
# Guard against infinite loops:
if self.as_address() in visited:
out.write('{...}')
return
visited.add(self.as_address())
out.write('{')
first = True
for pyop_key, pyop_value in self.iteritems():
if not first:
out.write(', ')
first = False
pyop_key.write_repr(out, visited)
out.write(': ')
pyop_value.write_repr(out, visited)
out.write('}')
class PyInstanceObjectPtr(PyObjectPtr):
_typename = 'PyInstanceObject'
def proxyval(self, visited):
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('<...>')
visited.add(self.as_address())
# Get name of class:
in_class = self.pyop_field('in_class')
cl_name = in_class.pyop_field('cl_name').proxyval(visited)
# Get dictionary of instance attributes:
in_dict = self.pyop_field('in_dict').proxyval(visited)
# Old-style class:
return InstanceProxy(cl_name, in_dict, long(self._gdbval))
def write_repr(self, out, visited):
# Guard against infinite loops:
if self.as_address() in visited:
out.write('<...>')
return
visited.add(self.as_address())
# Old-style class:
# Get name of class:
in_class = self.pyop_field('in_class')
cl_name = in_class.pyop_field('cl_name').proxyval(visited)
# Get dictionary of instance attributes:
pyop_in_dict = self.pyop_field('in_dict')
_write_instance_repr(out, visited,
cl_name, pyop_in_dict, self.as_address())
class PyIntObjectPtr(PyObjectPtr):
_typename = 'PyIntObject'
def proxyval(self, visited):
result = int_from_int(self.field('ob_ival'))
return result
class PyListObjectPtr(PyObjectPtr):
_typename = 'PyListObject'
def __getitem__(self, i):
# Get the gdb.Value for the (PyObject*) with the given index:
field_ob_item = self.field('ob_item')
return field_ob_item[i]
def proxyval(self, visited):
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('[...]')
visited.add(self.as_address())
result = [PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
for i in safe_range(int_from_int(self.field('ob_size')))]
return result
def write_repr(self, out, visited):
# Guard against infinite loops:
if self.as_address() in visited:
out.write('[...]')
return
visited.add(self.as_address())
out.write('[')
for i in safe_range(int_from_int(self.field('ob_size'))):
if i > 0:
out.write(', ')
element = PyObjectPtr.from_pyobject_ptr(self[i])
element.write_repr(out, visited)
out.write(']')
class PyLongObjectPtr(PyObjectPtr):
_typename = 'PyLongObject'
def proxyval(self, visited):
'''
Python's Include/longobjrep.h has this declaration:
struct _longobject {
PyObject_VAR_HEAD
digit ob_digit[1];
};
with this description:
The absolute value of a number is equal to
SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i)
Negative numbers are represented with ob_size < 0;
zero is represented by ob_size == 0.
where SHIFT can be either:
#define PyLong_SHIFT 30
#define PyLong_SHIFT 15
'''
ob_size = long(self.field('ob_size'))
if ob_size == 0:
return 0L
ob_digit = self.field('ob_digit')
if gdb.lookup_type('digit').sizeof == 2:
SHIFT = 15L
else:
SHIFT = 30L
digits = [long(ob_digit[i]) * 2**(SHIFT*i)
for i in safe_range(abs(ob_size))]
result = sum(digits)
if ob_size < 0:
result = -result
return result
def write_repr(self, out, visited):
# Write this out as a Python 3 int literal, i.e. without the "L" suffix
proxy = self.proxyval(visited)
out.write("%s" % proxy)
class PyBoolObjectPtr(PyLongObjectPtr):
"""
Class wrapping a gdb.Value that's a PyBoolObject* i.e. one of the two
<bool> instances (Py_True/Py_False) within the process being debugged.
"""
def proxyval(self, visited):
return bool(PyLongObjectPtr.proxyval(self, visited))
class PyNoneStructPtr(PyObjectPtr):
"""
Class wrapping a gdb.Value that's a PyObject* pointing to the
singleton (we hope) _Py_NoneStruct with ob_type PyNone_Type
"""
_typename = 'PyObject'
def proxyval(self, visited):
return None
class PyFrameObjectPtr(PyObjectPtr):
_typename = 'PyFrameObject'
def __init__(self, gdbval, cast_to=None):
PyObjectPtr.__init__(self, gdbval, cast_to)
if not self.is_optimized_out():
self.co = PyCodeObjectPtr.from_pyobject_ptr(self.field('f_code'))
self.co_name = self.co.pyop_field('co_name')
self.co_filename = self.co.pyop_field('co_filename')
self.f_lineno = int_from_int(self.field('f_lineno'))
self.f_lasti = int_from_int(self.field('f_lasti'))
self.co_nlocals = int_from_int(self.co.field('co_nlocals'))
self.co_varnames = PyTupleObjectPtr.from_pyobject_ptr(self.co.field('co_varnames'))
def iter_locals(self):
'''
Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
the local variables of this frame
'''
if self.is_optimized_out():
return
f_localsplus = self.field('f_localsplus')
for i in safe_range(self.co_nlocals):
pyop_value = PyObjectPtr.from_pyobject_ptr(f_localsplus[i])
if not pyop_value.is_null():
pyop_name = PyObjectPtr.from_pyobject_ptr(self.co_varnames[i])
yield (pyop_name, pyop_value)
def iter_globals(self):
'''
Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
the global variables of this frame
'''
if self.is_optimized_out():
return
pyop_globals = self.pyop_field('f_globals')
return pyop_globals.iteritems()
def iter_builtins(self):
'''
Yield a sequence of (name,value) pairs of PyObjectPtr instances, for
the builtin variables
'''
if self.is_optimized_out():
return
pyop_builtins = self.pyop_field('f_builtins')
return pyop_builtins.iteritems()
def get_var_by_name(self, name):
'''
Look for the named local variable, returning a (PyObjectPtr, scope) pair
where scope is a string 'local', 'global', 'builtin'
If not found, return (None, None)
'''
for pyop_name, pyop_value in self.iter_locals():
if name == pyop_name.proxyval(set()):
return pyop_value, 'local'
for pyop_name, pyop_value in self.iter_globals():
if name == pyop_name.proxyval(set()):
return pyop_value, 'global'
for pyop_name, pyop_value in self.iter_builtins():
if name == pyop_name.proxyval(set()):
return pyop_value, 'builtin'
return None, None
def filename(self):
'''Get the path of the current Python source file, as a string'''
if self.is_optimized_out():
return '(frame information optimized out)'
return self.co_filename.proxyval(set())
def current_line_num(self):
'''Get current line number as an integer (1-based)
Translated from PyFrame_GetLineNumber and PyCode_Addr2Line
See Objects/lnotab_notes.txt
'''
if self.is_optimized_out():
return None
f_trace = self.field('f_trace')
if long(f_trace) != 0:
# we have a non-NULL f_trace:
return self.f_lineno
else:
#try:
return self.co.addr2line(self.f_lasti)
#except ValueError:
# return self.f_lineno
def current_line(self):
'''Get the text of the current source line as a string, with a trailing
newline character'''
if self.is_optimized_out():
return '(frame information optimized out)'
filename = self.filename()
with open(os_fsencode(filename), 'r') as f:
all_lines = f.readlines()
# Convert from 1-based current_line_num to 0-based list offset:
return all_lines[self.current_line_num()-1]
def write_repr(self, out, visited):
if self.is_optimized_out():
out.write('(frame information optimized out)')
return
out.write('Frame 0x%x, for file %s, line %i, in %s ('
% (self.as_address(),
self.co_filename.proxyval(visited),
self.current_line_num(),
self.co_name.proxyval(visited)))
first = True
for pyop_name, pyop_value in self.iter_locals():
if not first:
out.write(', ')
first = False
out.write(pyop_name.proxyval(visited))
out.write('=')
pyop_value.write_repr(out, visited)
out.write(')')
class PySetObjectPtr(PyObjectPtr):
_typename = 'PySetObject'
def proxyval(self, visited):
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('%s(...)' % self.safe_tp_name())
visited.add(self.as_address())
members = []
table = self.field('table')
for i in safe_range(self.field('mask')+1):
setentry = table[i]
key = setentry['key']
if key != 0:
key_proxy = PyObjectPtr.from_pyobject_ptr(key).proxyval(visited)
if key_proxy != '<dummy key>':
members.append(key_proxy)
if self.safe_tp_name() == 'frozenset':
return frozenset(members)
else:
return set(members)
def write_repr(self, out, visited):
# Emulate Python 3's set_repr
tp_name = self.safe_tp_name()
# Guard against infinite loops:
if self.as_address() in visited:
out.write('(...)')
return
visited.add(self.as_address())
# Python 3's set_repr special-cases the empty set:
if not self.field('used'):
out.write(tp_name)
out.write('()')
return
# Python 3 uses {} for set literals:
if tp_name != 'set':
out.write(tp_name)
out.write('(')
out.write('{')
first = True
table = self.field('table')
for i in safe_range(self.field('mask')+1):
setentry = table[i]
key = setentry['key']
if key != 0:
pyop_key = PyObjectPtr.from_pyobject_ptr(key)
key_proxy = pyop_key.proxyval(visited) # FIXME!
if key_proxy != '<dummy key>':
if not first:
out.write(', ')
first = False
pyop_key.write_repr(out, visited)
out.write('}')
if tp_name != 'set':
out.write(')')
class PyBytesObjectPtr(PyObjectPtr):
_typename = 'PyBytesObject'
def __str__(self):
field_ob_size = self.field('ob_size')
field_ob_sval = self.field('ob_sval')
return ''.join(struct.pack('b', field_ob_sval[i])
for i in safe_range(field_ob_size))
def proxyval(self, visited):
return str(self)
def write_repr(self, out, visited):
# Write this out as a Python 3 bytes literal, i.e. with a "b" prefix
# Get a PyStringObject* within the Python 2 gdb process:
proxy = self.proxyval(visited)
# Transliteration of Python 3's Objects/bytesobject.c:PyBytes_Repr
# to Python 2 code:
quote = "'"
if "'" in proxy and not '"' in proxy:
quote = '"'
out.write('b')
out.write(quote)
for byte in proxy:
if byte == quote or byte == '\\':
out.write('\\')
out.write(byte)
elif byte == '\t':
out.write('\\t')
elif byte == '\n':
out.write('\\n')
elif byte == '\r':
out.write('\\r')
elif byte < ' ' or ord(byte) >= 0x7f:
out.write('\\x')
out.write(hexdigits[(ord(byte) & 0xf0) >> 4])
out.write(hexdigits[ord(byte) & 0xf])
else:
out.write(byte)
out.write(quote)
class PyStringObjectPtr(PyBytesObjectPtr):
_typename = 'PyStringObject'
class PyTupleObjectPtr(PyObjectPtr):
_typename = 'PyTupleObject'
def __getitem__(self, i):
# Get the gdb.Value for the (PyObject*) with the given index:
field_ob_item = self.field('ob_item')
return field_ob_item[i]
def proxyval(self, visited):
# Guard against infinite loops:
if self.as_address() in visited:
return ProxyAlreadyVisited('(...)')
visited.add(self.as_address())
result = tuple([PyObjectPtr.from_pyobject_ptr(self[i]).proxyval(visited)
for i in safe_range(int_from_int(self.field('ob_size')))])
return result
def write_repr(self, out, visited):
# Guard against infinite loops:
if self.as_address() in visited:
out.write('(...)')
return
visited.add(self.as_address())
out.write('(')
for i in safe_range(int_from_int(self.field('ob_size'))):
if i > 0:
out.write(', ')
element = PyObjectPtr.from_pyobject_ptr(self[i])
element.write_repr(out, visited)
if self.field('ob_size') == 1:
out.write(',)')
else:
out.write(')')
class PyTypeObjectPtr(PyObjectPtr):
_typename = 'PyTypeObject'
def _unichr_is_printable(char):
# Logic adapted from Python 3's Tools/unicode/makeunicodedata.py
if char == u" ":
return True
import unicodedata
return unicodedata.category(char) not in ("C", "Z")
if sys.maxunicode >= 0x10000:
_unichr = unichr
else:
# Needed for proper surrogate support if sizeof(Py_UNICODE) is 2 in gdb
def _unichr(x):
if x < 0x10000:
return unichr(x)
x -= 0x10000
ch1 = 0xD800 | (x >> 10)
ch2 = 0xDC00 | (x & 0x3FF)
return unichr(ch1) + unichr(ch2)
class PyUnicodeObjectPtr(PyObjectPtr):
_typename = 'PyUnicodeObject'
def char_width(self):
_type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
return _type_Py_UNICODE.sizeof
def proxyval(self, visited):
# From unicodeobject.h:
# Py_ssize_t length; /* Length of raw Unicode data in buffer */
# Py_UNICODE *str; /* Raw Unicode buffer */
field_length = long(self.field('length'))
field_str = self.field('str')
# Gather a list of ints from the Py_UNICODE array; these are either
# UCS-2 or UCS-4 code points:
if self.char_width() > 2:
Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
else:
# A more elaborate routine if sizeof(Py_UNICODE) is 2 in the
# inferior process: we must join surrogate pairs.
Py_UNICODEs = []
i = 0
limit = safety_limit(field_length)
while i < limit:
ucs = int(field_str[i])
i += 1
if ucs < 0xD800 or ucs >= 0xDC00 or i == field_length:
Py_UNICODEs.append(ucs)
continue
# This could be a surrogate pair.
ucs2 = int(field_str[i])
if ucs2 < 0xDC00 or ucs2 > 0xDFFF:
continue
code = (ucs & 0x03FF) << 10
code |= ucs2 & 0x03FF
code += 0x00010000
Py_UNICODEs.append(code)
i += 1
# Convert the int code points to unicode characters, and generate a
# local unicode instance.
# This splits surrogate pairs if sizeof(Py_UNICODE) is 2 here (in gdb).
result = u''.join([_unichr(ucs) for ucs in Py_UNICODEs])
return result
def write_repr(self, out, visited):
# Write this out as a Python 3 str literal, i.e. without a "u" prefix
# Get a PyUnicodeObject* within the Python 2 gdb process:
proxy = self.proxyval(visited)
# Transliteration of Python 3's Object/unicodeobject.c:unicode_repr
# to Python 2:
if "'" in proxy and '"' not in proxy:
quote = '"'
else:
quote = "'"
out.write(quote)
i = 0
while i < len(proxy):
ch = proxy[i]
i += 1
# Escape quotes and backslashes
if ch == quote or ch == '\\':
out.write('\\')
out.write(ch)
# Map special whitespace to '\t', \n', '\r'
elif ch == '\t':
out.write('\\t')
elif ch == '\n':
out.write('\\n')
elif ch == '\r':
out.write('\\r')
# Map non-printable US ASCII to '\xhh' */
elif ch < ' ' or ch == 0x7F:
out.write('\\x')
out.write(hexdigits[(ord(ch) >> 4) & 0x000F])
out.write(hexdigits[ord(ch) & 0x000F])
# Copy ASCII characters as-is
elif ord(ch) < 0x7F:
out.write(ch)
# Non-ASCII characters
else:
ucs = ch
ch2 = None
if sys.maxunicode < 0x10000:
# If sizeof(Py_UNICODE) is 2 here (in gdb), join
# surrogate pairs before calling _unichr_is_printable.
if (i < len(proxy)
and 0xD800 <= ord(ch) < 0xDC00 \
and 0xDC00 <= ord(proxy[i]) <= 0xDFFF):
ch2 = proxy[i]
ucs = ch + ch2
i += 1
# Unfortuately, Python 2's unicode type doesn't seem
# to expose the "isprintable" method
printable = _unichr_is_printable(ucs)
if printable:
try:
ucs.encode(ENCODING)
except UnicodeEncodeError:
printable = False
# Map Unicode whitespace and control characters
# (categories Z* and C* except ASCII space)
if not printable:
if ch2 is not None:
# Match Python 3's representation of non-printable
# wide characters.
code = (ord(ch) & 0x03FF) << 10
code |= ord(ch2) & 0x03FF
code += 0x00010000
else:
code = ord(ucs)
# Map 8-bit characters to '\\xhh'
if code <= 0xff:
out.write('\\x')
out.write(hexdigits[(code >> 4) & 0x000F])
out.write(hexdigits[code & 0x000F])
# Map 21-bit characters to '\U00xxxxxx'
elif code >= 0x10000:
out.write('\\U')
out.write(hexdigits[(code >> 28) & 0x0000000F])
out.write(hexdigits[(code >> 24) & 0x0000000F])
out.write(hexdigits[(code >> 20) & 0x0000000F])
out.write(hexdigits[(code >> 16) & 0x0000000F])
out.write(hexdigits[(code >> 12) & 0x0000000F])
out.write(hexdigits[(code >> 8) & 0x0000000F])
out.write(hexdigits[(code >> 4) & 0x0000000F])
out.write(hexdigits[code & 0x0000000F])
# Map 16-bit characters to '\uxxxx'
else:
out.write('\\u')
out.write(hexdigits[(code >> 12) & 0x000F])
out.write(hexdigits[(code >> 8) & 0x000F])
out.write(hexdigits[(code >> 4) & 0x000F])
out.write(hexdigits[code & 0x000F])
else:
# Copy characters as-is
out.write(ch)
if ch2 is not None:
out.write(ch2)
out.write(quote)
def int_from_int(gdbval):
return int(str(gdbval))
def stringify(val):
# TODO: repr() puts everything on one line; pformat can be nicer, but
# can lead to v.long results; this function isolates the choice
if True:
return repr(val)
else:
from pprint import pformat
return pformat(val)
class PyObjectPtrPrinter:
"Prints a (PyObject*)"
def __init__ (self, gdbval):
self.gdbval = gdbval
def to_string (self):
pyop = PyObjectPtr.from_pyobject_ptr(self.gdbval)
if True:
return pyop.get_truncated_repr(MAX_OUTPUT_LEN)
else:
# Generate full proxy value then stringify it.
# Doing so could be expensive
proxyval = pyop.proxyval(set())
return stringify(proxyval)
def pretty_printer_lookup(gdbval):
type = gdbval.type.unqualified()
if type.code == gdb.TYPE_CODE_PTR:
type = type.target().unqualified()
# do this every time to allow new subclasses to "register"
# alternatively, we could use a metaclass to register all the typenames
classes = [PyObjectPtr]
classes.extend(PyObjectPtr.__subclasses__())
if str(type) in [cls._typename for cls in classes]:
return PyObjectPtrPrinter(gdbval)
"""
During development, I've been manually invoking the code in this way:
(gdb) python
import sys
sys.path.append('/home/david/coding/python-gdb')
import libpython
end
then reloading it after each edit like this:
(gdb) python reload(libpython)
The following code should ensure that the prettyprinter is registered
if the code is autoloaded by gdb when visiting libpython.so, provided
that this python file is installed to the same path as the library (or its
.debug file) plus a "-gdb.py" suffix, e.g:
/usr/lib/libpython2.6.so.1.0-gdb.py
/usr/lib/debug/usr/lib/libpython2.6.so.1.0.debug-gdb.py
"""
def register (obj):
if obj == None:
obj = gdb
# Wire up the pretty-printer
obj.pretty_printers.append(pretty_printer_lookup)
register (gdb.current_objfile ())
# Unfortunately, the exact API exposed by the gdb module varies somewhat
# from build to build
# See http://bugs.python.org/issue8279?#msg102276
class Frame(object):
'''
Wrapper for gdb.Frame, adding various methods
'''
def __init__(self, gdbframe):
self._gdbframe = gdbframe
def older(self):
older = self._gdbframe.older()
if older:
return Frame(older)
else:
return None
def newer(self):
newer = self._gdbframe.newer()
if newer:
return Frame(newer)
else:
return None
def select(self):
'''If supported, select this frame and return True; return False if unsupported
Not all builds have a gdb.Frame.select method; seems to be present on Fedora 12
onwards, but absent on Ubuntu buildbot'''
if not hasattr(self._gdbframe, 'select'):
print ('Unable to select frame: '
'this build of gdb does not expose a gdb.Frame.select method')
return False
self._gdbframe.select()
return True
def get_index(self):
'''Calculate index of frame, starting at 0 for the newest frame within
this thread'''
index = 0
# Go down until you reach the newest frame:
iter_frame = self
while iter_frame.newer():
index += 1
iter_frame = iter_frame.newer()
return index
def is_evalframeex(self):
'''Is this a PyEval_EvalFrameEx frame?'''
if self._gdbframe.name() == 'PyEval_EvalFrameEx':
'''
I believe we also need to filter on the inline
struct frame_id.inline_depth, only regarding frames with
an inline depth of 0 as actually being this function
So we reject those with type gdb.INLINE_FRAME
'''
if self._gdbframe.type() == gdb.NORMAL_FRAME:
# We have a PyEval_EvalFrameEx frame:
return True
return False
def get_pyop(self):
try:
f = self._gdbframe.read_var('f')
return PyFrameObjectPtr.from_pyobject_ptr(f)
except ValueError:
return None
@classmethod
def get_selected_frame(cls):
_gdbframe = gdb.selected_frame()
if _gdbframe:
return Frame(_gdbframe)
return None
@classmethod
def get_selected_python_frame(cls):
'''Try to obtain the Frame for the python code in the selected frame,
or None'''
frame = cls.get_selected_frame()
while frame:
if frame.is_evalframeex():
return frame
frame = frame.older()
# Not found:
return None
def print_summary(self):
if self.is_evalframeex():
pyop = self.get_pyop()
if pyop:
line = pyop.get_truncated_repr(MAX_OUTPUT_LEN)
write_unicode(sys.stdout, '#%i %s\n' % (self.get_index(), line))
sys.stdout.write(pyop.current_line())
else:
sys.stdout.write('#%i (unable to read python frame information)\n' % self.get_index())
else:
sys.stdout.write('#%i\n' % self.get_index())
class PyList(gdb.Command):
'''List the current Python source code, if any
Use
py-list START
to list at a different line number within the python source.
Use
py-list START, END
to list a specific range of lines within the python source.
'''
def __init__(self):
gdb.Command.__init__ (self,
"py-list",
gdb.COMMAND_FILES,
gdb.COMPLETE_NONE)
def invoke(self, args, from_tty):
import re
start = None
end = None
m = re.match(r'\s*(\d+)\s*', args)
if m:
start = int(m.group(0))
end = start + 10
m = re.match(r'\s*(\d+)\s*,\s*(\d+)\s*', args)
if m:
start, end = map(int, m.groups())
frame = Frame.get_selected_python_frame()
if not frame:
print 'Unable to locate python frame'
return
pyop = frame.get_pyop()
if not pyop:
print 'Unable to read information on python frame'
return
filename = pyop.filename()
lineno = pyop.current_line_num()
if start is None:
start = lineno - 5
end = lineno + 5
if start<1:
start = 1
with open(os_fsencode(filename), 'r') as f:
all_lines = f.readlines()
# start and end are 1-based, all_lines is 0-based;
# so [start-1:end] as a python slice gives us [start, end] as a
# closed interval
for i, line in enumerate(all_lines[start-1:end]):
linestr = str(i+start)
# Highlight current line:
if i + start == lineno:
linestr = '>' + linestr
sys.stdout.write('%4s %s' % (linestr, line))
# ...and register the command:
PyList()
def move_in_stack(move_up):
'''Move up or down the stack (for the py-up/py-down command)'''
frame = Frame.get_selected_python_frame()
while frame:
if move_up:
iter_frame = frame.older()
else:
iter_frame = frame.newer()
if not iter_frame:
break
if iter_frame.is_evalframeex():
# Result:
if iter_frame.select():
iter_frame.print_summary()
return
frame = iter_frame
if move_up:
print 'Unable to find an older python frame'
else:
print 'Unable to find a newer python frame'
class PyUp(gdb.Command):
'Select and print the python stack frame that called this one (if any)'
def __init__(self):
gdb.Command.__init__ (self,
"py-up",
gdb.COMMAND_STACK,
gdb.COMPLETE_NONE)
def invoke(self, args, from_tty):
move_in_stack(move_up=True)
class PyDown(gdb.Command):
'Select and print the python stack frame called by this one (if any)'
def __init__(self):
gdb.Command.__init__ (self,
"py-down",
gdb.COMMAND_STACK,
gdb.COMPLETE_NONE)
def invoke(self, args, from_tty):
move_in_stack(move_up=False)
# Not all builds of gdb have gdb.Frame.select
if hasattr(gdb.Frame, 'select'):
PyUp()
PyDown()
class PyBacktrace(gdb.Command):
'Display the current python frame and all the frames within its call stack (if any)'
def __init__(self):
gdb.Command.__init__ (self,
"py-bt",
gdb.COMMAND_STACK,
gdb.COMPLETE_NONE)
def invoke(self, args, from_tty):
frame = Frame.get_selected_python_frame()
while frame:
if frame.is_evalframeex():
frame.print_summary()
frame = frame.older()
PyBacktrace()
class PyPrint(gdb.Command):
'Look up the given python variable name, and print it'
def __init__(self):
gdb.Command.__init__ (self,
"py-print",
gdb.COMMAND_DATA,
gdb.COMPLETE_NONE)
def invoke(self, args, from_tty):
name = str(args)
frame = Frame.get_selected_python_frame()
if not frame:
print 'Unable to locate python frame'
return
pyop_frame = frame.get_pyop()
if not pyop_frame:
print 'Unable to read information on python frame'
return
pyop_var, scope = pyop_frame.get_var_by_name(name)
if pyop_var:
print ('%s %r = %s'
% (scope,
name,
pyop_var.get_truncated_repr(MAX_OUTPUT_LEN)))
else:
print '%r not found' % name
PyPrint()
class PyLocals(gdb.Command):
'Look up the given python variable name, and print it'
def invoke(self, args, from_tty):
name = str(args)
frame = Frame.get_selected_python_frame()
if not frame:
print 'Unable to locate python frame'
return
pyop_frame = frame.get_pyop()
if not pyop_frame:
print 'Unable to read information on python frame'
return
namespace = self.get_namespace(pyop_frame)
namespace = [(name.proxyval(set()), val) for name, val in namespace]
if namespace:
name, val = max(namespace, key=lambda (name, val): len(name))
max_name_length = len(name)
for name, pyop_value in namespace:
value = pyop_value.get_truncated_repr(MAX_OUTPUT_LEN)
print ('%-*s = %s' % (max_name_length, name, value))
def get_namespace(self, pyop_frame):
return pyop_frame.iter_locals()
class PyGlobals(PyLocals):
'List all the globals in the currently select Python frame'
def get_namespace(self, pyop_frame):
return pyop_frame.iter_globals()
PyLocals("py-locals", gdb.COMMAND_DATA, gdb.COMPLETE_NONE)
PyGlobals("py-globals", gdb.COMMAND_DATA, gdb.COMPLETE_NONE)
class PyNameEquals(gdb.Function):
def _get_pycurframe_attr(self, attr):
frame = Frame(gdb.selected_frame())
if frame.is_evalframeex():
pyframe = frame.get_pyop()
if pyframe is None:
return None
return getattr(pyframe, attr).proxyval(set())
return None
def invoke(self, funcname):
attr = self._get_pycurframe_attr('co_name')
return attr is not None and attr == funcname.string()
PyNameEquals("pyname_equals")
class PyModEquals(PyNameEquals):
def invoke(self, modname):
attr = self._get_pycurframe_attr('co_filename')
if attr is not None:
filename, ext = os.path.splitext(os.path.basename(attr))
return filename == modname.string()
return False
PyModEquals("pymod_equals")
class PyBreak(gdb.Command):
"""
Set a Python breakpoint. Examples:
Break on any function or method named 'func' in module 'modname'
py-break modname.func
Break on any function or method named 'func'
py-break func
"""
def invoke(self, funcname, from_tty):
if '.' in funcname:
modname, dot, funcname = funcname.rpartition('.')
cond = '$pyname_equals("%s") && $pymod_equals("%s")' % (funcname,
modname)
else:
cond = '$pyname_equals("%s")' % funcname
gdb.execute('break PyEval_EvalFrameEx if ' + cond)
PyBreak("py-break", gdb.COMMAND_RUNNING, gdb.COMPLETE_NONE)
class _LoggingState(object):
"""
State that helps to provide a reentrant gdb.execute() function.
"""
def __init__(self):
self.fd, self.filename = tempfile.mkstemp()
self.file = os.fdopen(self.fd, 'r+')
_execute("set logging file %s" % self.filename)
self.file_position_stack = []
atexit.register(os.close, self.fd)
atexit.register(os.remove, self.filename)
def __enter__(self):
if not self.file_position_stack:
_execute("set logging redirect on")
_execute("set logging on")
_execute("set pagination off")
self.file_position_stack.append(os.fstat(self.fd).st_size)
return self
def getoutput(self):
gdb.flush()
self.file.seek(self.file_position_stack[-1])
result = self.file.read()
return result
def __exit__(self, exc_type, exc_val, tb):
startpos = self.file_position_stack.pop()
self.file.seek(startpos)
self.file.truncate()
if not self.file_position_stack:
_execute("set logging off")
_execute("set logging redirect off")
_execute("set pagination on")
def execute(command, from_tty=False, to_string=False):
"""
Replace gdb.execute() with this function and have it accept a 'to_string'
argument (new in 7.2). Have it properly capture stderr also. Ensure
reentrancy.
"""
if to_string:
with _logging_state as state:
_execute(command, from_tty)
return state.getoutput()
else:
_execute(command, from_tty)
_execute = gdb.execute
gdb.execute = execute
_logging_state = _LoggingState()
def get_selected_inferior():
"""
Return the selected inferior in gdb.
"""
# Woooh, another bug in gdb! Is there an end in sight?
# http://sourceware.org/bugzilla/show_bug.cgi?id=12212
return gdb.inferiors()[0]
selected_thread = gdb.selected_thread()
for inferior in gdb.inferiors():
for thread in inferior.threads():
if thread == selected_thread:
return inferior
class GenericCodeStepper(gdb.Command):
"""
Superclass for code stepping. Subclasses must implement the following
methods:
lineno(frame)
tells the current line number (only called for a relevant frame)
is_relevant_function(frame)
tells whether we care about frame 'frame'
get_source_line(frame)
get the line of source code for the current line (only called for a
relevant frame). If the source code cannot be retrieved this
function should return None
static_break_functions()
returns an iterable of function names that are considered relevant
and should halt step-into execution. This is needed to provide a
performing step-into
runtime_break_functions
list of functions that we should break into depending on the
context
This class provides an 'invoke' method that invokes a 'step' or 'step-over'
depending on the 'stepinto' argument.
"""
stepper = False
static_breakpoints = {}
runtime_breakpoints = {}
def __init__(self, name, stepinto=False):
super(GenericCodeStepper, self).__init__(name,
gdb.COMMAND_RUNNING,
gdb.COMPLETE_NONE)
self.stepinto = stepinto
def _break_func(self, funcname):
result = gdb.execute('break %s' % funcname, to_string=True)
return re.search(r'Breakpoint (\d+)', result).group(1)
def init_breakpoints(self):
"""
Keep all breakpoints around and simply disable/enable them each time
we are stepping. We need this because if you set and delete a
breakpoint, gdb will not repeat your command (this is due to 'delete').
Why? I'm buggered if I know. To further annoy us, we can't use the
breakpoint API because there's no option to make breakpoint setting
silent.
So now! We may have an insane amount of breakpoints to list when the
user does 'info breakpoints' :(
This method must be called whenever the list of functions we should
step into changes. It can be called on any GenericCodeStepper instance.
"""
break_funcs = set(self.static_break_functions())
for funcname in break_funcs:
if funcname not in self.static_breakpoints:
try:
gdb.Breakpoint('', gdb.BP_BREAKPOINT, internal=True)
except (AttributeError, TypeError):
# gdb.Breakpoint does not take an 'internal' argument, or
# gdb.Breakpoint does not exist.
breakpoint = self._break_func(funcname)
except RuntimeError:
# gdb.Breakpoint does take an 'internal' argument, use it
# and hide output
result = gdb.execute(
"python bp = gdb.Breakpoint(%r, gdb.BP_BREAKPOINT, internal=True); "
"print bp.number",
to_string=True)
breakpoint = int(result)
self.static_breakpoints[funcname] = breakpoint
for bp in set(self.static_breakpoints) - break_funcs:
gdb.execute("delete " + self.static_breakpoints[bp])
self.disable_breakpoints()
def enable_breakpoints(self):
for bp in self.static_breakpoints.itervalues():
gdb.execute('enable ' + bp)
runtime_break_functions = self.runtime_break_functions()
if runtime_break_functions is None:
return
for funcname in runtime_break_functions:
if (funcname not in self.static_breakpoints and
funcname not in self.runtime_breakpoints):
self.runtime_breakpoints[funcname] = self._break_func(funcname)
elif funcname in self.runtime_breakpoints:
gdb.execute('enable ' + self.runtime_breakpoints[funcname])
def disable_breakpoints(self):
chain = itertools.chain(self.static_breakpoints.itervalues(),
self.runtime_breakpoints.itervalues())
for bp in chain:
gdb.execute('disable ' + bp)
def runtime_break_functions(self):
"""
Implement this if the list of step-into functions depends on the
context.
"""
def stopped(self, result):
match = re.search('^Program received signal .*', result, re.MULTILINE)
if match:
return match.group(0)
elif get_selected_inferior().pid == 0:
return result
else:
return None
def _stackdepth(self, frame):
depth = 0
while frame:
frame = frame.older()
depth += 1
return depth
def finish_executing(self, result):
"""
After doing some kind of code running in the inferior, print the line
of source code or the result of the last executed gdb command (passed
in as the `result` argument).
"""
result = self.stopped(result)
if result:
print result.strip()
# check whether the program was killed by a signal, it should still
# have a stack.
try:
frame = gdb.selected_frame()
except RuntimeError:
pass
else:
print self.get_source_line(frame)
else:
frame = gdb.selected_frame()
output = None
if self.is_relevant_function(frame):
output = self.get_source_line(frame)
if output is None:
pframe = getattr(self, 'print_stackframe', None)
if pframe:
pframe(frame, index=0)
else:
print result.strip()
else:
print output
def _finish(self):
"""
Execute until the function returns (or until something else makes it
stop)
"""
if gdb.selected_frame().older() is not None:
return gdb.execute('finish', to_string=True)
else:
# outermost frame, continue
return gdb.execute('cont', to_string=True)
def finish(self, *args):
"""
Execute until the function returns to a relevant caller.
"""
while True:
result = self._finish()
try:
frame = gdb.selected_frame()
except RuntimeError:
break
hitbp = re.search(r'Breakpoint (\d+)', result)
is_relavant = self.is_relevant_function(frame)
if hitbp or is_relavant or self.stopped(result):
break
self.finish_executing(result)
def _step(self):
"""
Do a single step or step-over. Returns the result of the last gdb
command that made execution stop.
"""
if self.stepinto:
self.enable_breakpoints()
beginframe = gdb.selected_frame()
beginline = self.lineno(beginframe)
if not self.stepinto:
depth = self._stackdepth(beginframe)
newframe = beginframe
result = ''
while True:
if self.is_relevant_function(newframe):
result = gdb.execute('next', to_string=True)
else:
result = self._finish()
if self.stopped(result):
break
newframe = gdb.selected_frame()
is_relevant_function = self.is_relevant_function(newframe)
try:
framename = newframe.name()
except RuntimeError:
framename = None
m = re.search(r'Breakpoint (\d+)', result)
if m:
bp = self.runtime_breakpoints.get(framename)
if bp is None or (m.group(1) == bp and is_relevant_function):
# although we hit a breakpoint, we still need to check
# that the function, in case hit by a runtime breakpoint,
# is in the right context
break
if newframe != beginframe:
# new function
if not self.stepinto:
# see if we returned to the caller
newdepth = self._stackdepth(newframe)
is_relevant_function = (newdepth < depth and
is_relevant_function)
if is_relevant_function:
break
else:
if self.lineno(newframe) > beginline:
break
if self.stepinto:
self.disable_breakpoints()
return result
def step(self, *args):
return self.finish_executing(self._step())
def run(self, *args):
self.finish_executing(gdb.execute('run', to_string=True))
def cont(self, *args):
self.finish_executing(gdb.execute('cont', to_string=True))
class PythonCodeStepper(GenericCodeStepper):
def pyframe(self, frame):
pyframe = Frame(frame).get_pyop()
if pyframe:
return pyframe
else:
raise gdb.GdbError(
"Unable to find the Python frame, run your code with a debug "
"build (configure with --with-pydebug or compile with -g).")
def lineno(self, frame):
return self.pyframe(frame).current_line_num()
def is_relevant_function(self, frame):
return Frame(frame).is_evalframeex()
def get_source_line(self, frame):
try:
pyframe = self.pyframe(frame)
return '%4d %s' % (pyframe.current_line_num(),
pyframe.current_line().rstrip())
except IOError, e:
return None
def static_break_functions(self):
yield 'PyEval_EvalFrameEx'
class PyStep(PythonCodeStepper):
"Step through Python code."
invoke = PythonCodeStepper.step
class PyNext(PythonCodeStepper):
"Step-over Python code."
invoke = PythonCodeStepper.step
class PyFinish(PythonCodeStepper):
"Execute until function returns to a caller."
invoke = PythonCodeStepper.finish
class PyRun(PythonCodeStepper):
"Run the program."
invoke = PythonCodeStepper.run
class PyCont(PythonCodeStepper):
invoke = PythonCodeStepper.cont
py_step = PyStep('py-step', stepinto=True)
py_next = PyNext('py-next', stepinto=False)
py_finish = PyFinish('py-finish')
py_run = PyRun('py-run')
py_cont = PyCont('py-cont')
gdb.execute('set breakpoint pending on')
py_step.init_breakpoints()
Py_single_input = 256
Py_file_input = 257
Py_eval_input = 258
def _pointervalue(gdbval):
"""
Return the value of the pionter as a Python int.
gdbval.type must be a pointer type
"""
# don't convert with int() as it will raise a RuntimeError
if gdbval.address is not None:
return long(gdbval.address)
else:
# the address attribute is None sometimes, in which case we can
# still convert the pointer to an int
return long(gdbval)
def pointervalue(gdbval):
pointer = _pointervalue(gdbval)
try:
if pointer < 0:
raise gdb.GdbError("Negative pointer value, presumably a bug "
"in gdb, aborting.")
except RuntimeError:
# work around yet another bug in gdb where you get random behaviour
# and tracebacks
pass
return pointer
class PythonCodeExecutor(object):
def malloc(self, size):
chunk = (gdb.parse_and_eval("(void *) malloc((size_t) %d)" % size))
pointer = pointervalue(chunk)
if pointer == 0:
raise gdb.GdbError("No memory could be allocated in the inferior.")
return pointer
def alloc_string(self, string):
pointer = self.malloc(len(string))
get_selected_inferior().write_memory(pointer, string)
return pointer
def alloc_pystring(self, string):
stringp = self.alloc_string(string)
PyString_FromStringAndSize = 'PyString_FromStringAndSize'
try:
gdb.parse_and_eval(PyString_FromStringAndSize)
except RuntimeError:
try:
gdb.parse_and_eval('PyUnicode_FromStringAndSize')
except RuntimeError:
PyString_FromStringAndSize = 'PyUnicodeUCS2_FromStringAndSize'
else:
PyString_FromStringAndSize = 'PyUnicode_FromStringAndSize'
try:
result = gdb.parse_and_eval(
'(PyObject *) %s((char *) %d, (size_t) %d)' % (
PyString_FromStringAndSize, stringp, len(string)))
finally:
self.free(stringp)
pointer = pointervalue(result)
if pointer == 0:
raise gdb.GdbError("Unable to allocate Python string in "
"the inferior.")
return pointer
def free(self, pointer):
gdb.parse_and_eval("free((void *) %d)" % pointer)
def incref(self, pointer):
"Increment the reference count of a Python object in the inferior."
gdb.parse_and_eval('Py_IncRef((PyObject *) %d)' % pointer)
def decref(self, pointer):
"Decrement the reference count of a Python object in the inferior."
# Py_DecRef is like Py_XDECREF, but a function. So we don't have
# to check for NULL. This should also decref all our allocated
# Python strings.
gdb.parse_and_eval('Py_DecRef((PyObject *) %d)' % pointer)
def evalcode(self, code, input_type, global_dict=None, local_dict=None):
"""
Evaluate python code `code` given as a string in the inferior and
return the result as a gdb.Value. Returns a new reference in the
inferior.
Of course, executing any code in the inferior may be dangerous and may
leave the debuggee in an unsafe state or terminate it alltogether.
"""
if '\0' in code:
raise gdb.GdbError("String contains NUL byte.")
code += '\0'
pointer = self.alloc_string(code)
globalsp = pointervalue(global_dict)
localsp = pointervalue(local_dict)
if globalsp == 0 or localsp == 0:
raise gdb.GdbError("Unable to obtain or create locals or globals.")
code = """
PyRun_String(
(PyObject *) %(code)d,
(int) %(start)d,
(PyObject *) %(globals)s,
(PyObject *) %(locals)d)
""" % dict(code=pointer, start=input_type,
globals=globalsp, locals=localsp)
with FetchAndRestoreError():
try:
self.decref(gdb.parse_and_eval(code))
finally:
self.free(pointer)
class FetchAndRestoreError(PythonCodeExecutor):
"""
Context manager that fetches the error indicator in the inferior and
restores it on exit.
"""
def __init__(self):
self.sizeof_PyObjectPtr = gdb.lookup_type('PyObject').pointer().sizeof
self.pointer = self.malloc(self.sizeof_PyObjectPtr * 3)
type = self.pointer
value = self.pointer + self.sizeof_PyObjectPtr
traceback = self.pointer + self.sizeof_PyObjectPtr * 2
self.errstate = type, value, traceback
def __enter__(self):
gdb.parse_and_eval("PyErr_Fetch(%d, %d, %d)" % self.errstate)
def __exit__(self, *args):
if gdb.parse_and_eval("(int) PyErr_Occurred()"):
gdb.parse_and_eval("PyErr_Print()")
pyerr_restore = ("PyErr_Restore("
"(PyObject *) *%d,"
"(PyObject *) *%d,"
"(PyObject *) *%d)")
try:
gdb.parse_and_eval(pyerr_restore % self.errstate)
finally:
self.free(self.pointer)
class FixGdbCommand(gdb.Command):
def __init__(self, command, actual_command):
super(FixGdbCommand, self).__init__(command, gdb.COMMAND_DATA,
gdb.COMPLETE_NONE)
self.actual_command = actual_command
def fix_gdb(self):
"""
So, you must be wondering what the story is this time! Yeeees, indeed,
I have quite the story for you! It seems that invoking either 'cy exec'
and 'py-exec' work perfectly fine, but after this gdb's python API is
entirely broken. Some unset exception value is still set?
sys.exc_clear() didn't help. A demonstration:
(gdb) cy exec 'hello'
'hello'
(gdb) python gdb.execute('cont')
RuntimeError: Cannot convert value to int.
Error while executing Python code.
(gdb) python gdb.execute('cont')
[15148 refs]
Program exited normally.
"""
warnings.filterwarnings('ignore', r'.*', RuntimeWarning,
re.escape(__name__))
try:
long(gdb.parse_and_eval("(void *) 0")) == 0
except RuntimeError:
pass
# warnings.resetwarnings()
def invoke(self, args, from_tty):
self.fix_gdb()
try:
gdb.execute('%s %s' % (self.actual_command, args))
except RuntimeError, e:
raise gdb.GdbError(str(e))
self.fix_gdb()
class PyExec(gdb.Command):
def readcode(self, expr):
if expr:
return expr, Py_single_input
else:
lines = []
while True:
try:
line = raw_input('>')
except EOFError:
break
else:
if line.rstrip() == 'end':
break
lines.append(line)
return '\n'.join(lines), Py_file_input
def invoke(self, expr, from_tty):
expr, input_type = self.readcode(expr)
executor = PythonCodeExecutor()
global_dict = gdb.parse_and_eval('PyEval_GetGlobals()')
local_dict = gdb.parse_and_eval('PyEval_GetLocals()')
if pointervalue(global_dict) == 0 or pointervalue(local_dict) == 0:
raise gdb.GdbError("Unable to find the locals or globals of the "
"most recent Python function (relative to the "
"selected frame).")
executor.evalcode(expr, input_type, global_dict, local_dict)
py_exec = FixGdbCommand('py-exec', '-py-exec')
_py_exec = PyExec("-py-exec", gdb.COMMAND_DATA, gdb.COMPLETE_NONE)
\ No newline at end of file
......@@ -17,11 +17,55 @@ from distutils.dep_util import newer, newer_group
from distutils import log
from distutils.dir_util import mkpath
from distutils.command import build_ext as _build_ext
from distutils import sysconfig
extension_name_re = _build_ext.extension_name_re
show_compilers = _build_ext.show_compilers
class Optimization(object):
def __init__(self):
self.flags = (
'OPT',
'CFLAGS',
'CPPFLAGS',
'EXTRA_CFLAGS',
'BASECFLAGS',
'PY_CFLAGS',
)
self.state = sysconfig.get_config_vars(*self.flags)
self.config_vars = sysconfig.get_config_vars()
def disable_optimization(self):
"disable optimization for the C or C++ compiler"
badoptions = ('-O1', '-O2', '-O3')
for flag, option in zip(self.flags, self.state):
if option is not None:
L = [opt for opt in option.split() if opt not in badoptions]
self.config_vars[flag] = ' '.join(L)
def restore_state(self):
"restore the original state"
for flag, option in zip(self.flags, self.state):
if option is not None:
self.config_vars[flag] = option
optimization = Optimization()
try:
any
except NameError:
def any(it):
for x in it:
if x:
return True
return False
class build_ext(_build_ext.build_ext):
description = "build C/C++ and Cython extensions (compile/link to build directory)"
......@@ -47,10 +91,13 @@ class build_ext(_build_ext.build_ext):
"generate .pxi file for public declarations"),
('pyrex-directives=', None,
"compiler directive overrides"),
('pyrex-gdb', None,
"generate debug information for cygdb"),
])
boolean_options.extend([
'pyrex-cplus', 'pyrex-create-listing', 'pyrex-line-directives', 'pyrex-c-in-temp'
'pyrex-cplus', 'pyrex-create-listing', 'pyrex-line-directives',
'pyrex-c-in-temp', 'pyrex-gdb',
])
def initialize_options(self):
......@@ -62,6 +109,7 @@ class build_ext(_build_ext.build_ext):
self.pyrex_directives = None
self.pyrex_c_in_temp = 0
self.pyrex_gen_pxi = 0
self.pyrex_gdb = False
def finalize_options (self):
_build_ext.build_ext.finalize_options(self)
......@@ -74,9 +122,21 @@ class build_ext(_build_ext.build_ext):
self.pyrex_directives = {}
# finalize_options ()
def run(self):
# We have one shot at this before build_ext initializes the compiler.
# If --pyrex-gdb is in effect as a command line option or as option
# of any Extension module, disable optimization for the C or C++
# compiler.
if (self.pyrex_gdb or any([getattr(ext, 'pyrex_gdb', False)
for ext in self.extensions])):
optimization.disable_optimization()
_build_ext.build_ext.run(self)
def build_extensions(self):
# First, sanity-check the 'extensions' list
self.check_extensions_list(self.extensions)
for ext in self.extensions:
ext.sources = self.cython_sources(ext.sources, ext)
self.build_extension(ext)
......@@ -128,7 +188,7 @@ class build_ext(_build_ext.build_ext):
cplus = self.pyrex_cplus or getattr(extension, 'pyrex_cplus', 0) or \
(extension.language and extension.language.lower() == 'c++')
pyrex_gen_pxi = self.pyrex_gen_pxi or getattr(extension, 'pyrex_gen_pxi', 0)
pyrex_gdb = self.pyrex_gdb or getattr(extension, 'pyrex_gdb', False)
# Set up the include_path for the Cython compiler:
# 1. Start with the command line option.
# 2. Add in any (unique) paths from the extension
......@@ -201,6 +261,10 @@ class build_ext(_build_ext.build_ext):
if rebuild:
log.info("cythoning %s to %s", source, target)
self.mkpath(os.path.dirname(target))
if self.inplace:
output_dir = os.curdir
else:
output_dir = self.build_lib
options = CompilationOptions(pyrex_default_options,
use_listing_file = create_listing,
include_path = includes,
......@@ -208,7 +272,9 @@ class build_ext(_build_ext.build_ext):
output_file = target,
cplus = cplus,
emit_linenums = line_directives,
generate_pxi = pyrex_gen_pxi)
generate_pxi = pyrex_gen_pxi,
output_dir = output_dir,
gdb_debug = pyrex_gdb)
result = cython_compile(source, options=options,
full_module_name=module_name)
else:
......
......@@ -31,6 +31,8 @@ class Extension(_Extension.Extension):
put generated C files in temp directory.
pyrex_gen_pxi : boolean
generate .pxi file for public declarations
pyrex_gdb : boolean
generate Cython debug information for this extension for cygdb
"""
# When adding arguments to this constructor, be sure to update
......@@ -56,6 +58,7 @@ class Extension(_Extension.Extension):
pyrex_cplus = 0,
pyrex_c_in_temp = 0,
pyrex_gen_pxi = 0,
pyrex_gdb = False,
**kw):
_Extension.Extension.__init__(self, name, sources,
......@@ -81,6 +84,7 @@ class Extension(_Extension.Extension):
self.pyrex_cplus = pyrex_cplus
self.pyrex_c_in_temp = pyrex_c_in_temp
self.pyrex_gen_pxi = pyrex_gen_pxi
self.pyrex_gdb = pyrex_gdb
# class Extension
......
......@@ -11,6 +11,7 @@ class StringIOTree(object):
stream = StringIO()
self.stream = stream
self.write = stream.write
self.markers = []
def getvalue(self):
content = [x.getvalue() for x in self.prepended_children]
......@@ -31,6 +32,8 @@ class StringIOTree(object):
# itself is empty -- this makes it ready for insertion
if self.stream.tell():
self.prepended_children.append(StringIOTree(self.stream))
self.prepended_children[-1].markers = self.markers
self.markers = []
self.stream = StringIO()
self.write = self.stream.write
......@@ -59,6 +62,11 @@ class StringIOTree(object):
self.prepended_children.append(other)
return other
def allmarkers(self):
children = self.prepended_children
return [m for c in children for m in c.allmarkers()] + self.markers
__doc__ = r"""
Implements a buffer with insertion points. When you know you need to
"get back" to a place and write more later, simply call insertion_point()
......
import unittest
from Cython import StringIOTree as stringtree
code = """
cdef int spam # line 1
cdef ham():
a = 1
b = 2
c = 3
d = 4
def eggs():
pass
cpdef bacon():
print spam
print 'scotch'
print 'tea?'
print 'or coffee?' # line 16
"""
linemap = dict(enumerate(code.splitlines()))
class TestStringIOTree(unittest.TestCase):
def setUp(self):
self.tree = stringtree.StringIOTree()
def test_markers(self):
assert not self.tree.allmarkers()
def test_insertion(self):
self.write_lines((1, 2, 3))
line_4_to_6_insertion_point = self.tree.insertion_point()
self.write_lines((7, 8))
line_9_to_13_insertion_point = self.tree.insertion_point()
self.write_lines((14, 15, 16))
line_4_insertion_point = line_4_to_6_insertion_point.insertion_point()
self.write_lines((5, 6), tree=line_4_to_6_insertion_point)
line_9_to_12_insertion_point = (
line_9_to_13_insertion_point.insertion_point())
self.write_line(13, tree=line_9_to_13_insertion_point)
self.write_line(4, tree=line_4_insertion_point)
self.write_line(9, tree=line_9_to_12_insertion_point)
line_10_insertion_point = line_9_to_12_insertion_point.insertion_point()
self.write_line(11, tree=line_9_to_12_insertion_point)
self.write_line(10, tree=line_10_insertion_point)
self.write_line(12, tree=line_9_to_12_insertion_point)
self.assertEqual(self.tree.allmarkers(), range(1, 17))
self.assertEqual(code.strip(), self.tree.getvalue().strip())
def write_lines(self, linenos, tree=None):
for lineno in linenos:
self.write_line(lineno, tree=tree)
def write_line(self, lineno, tree=None):
if tree is None:
tree = self.tree
tree.markers.append(lineno)
tree.write(linemap[lineno] + '\n')
\ No newline at end of file
__version__ = "0.14.alpha0"
__version__ = "0.14.beta0"
# Void cython.* directives (for case insensitive operating systems).
from Cython.Shadow import *
#!/usr/bin/env python
import sys
from Cython.Debugger import Cygdb as cygdb
if __name__ == '__main__':
cygdb.main()
#!/usr/bin/env python
import sys
from Cython.Debugger import Cygdb as cygdb
if __name__ == '__main__':
cygdb.main()
......@@ -343,15 +343,27 @@ class CythonCompileTestCase(unittest.TestCase):
else:
return geterrors()
def run_cython(self, test_directory, module, targetdir, incdir, annotate):
def run_cython(self, test_directory, module, targetdir, incdir, annotate,
extra_compile_options=None):
include_dirs = INCLUDE_DIRS[:]
if incdir:
include_dirs.append(incdir)
source = self.find_module_source_file(
os.path.join(test_directory, module + '.pyx'))
target = os.path.join(targetdir, self.build_target_filename(module))
if extra_compile_options is None:
extra_compile_options = {}
try:
CompilationOptions
except NameError:
from Cython.Compiler.Main import CompilationOptions
from Cython.Compiler.Main import compile as cython_compile
from Cython.Compiler.Main import default_options
options = CompilationOptions(
pyrex_default_options,
default_options,
include_path = include_dirs,
output_file = target,
annotate = annotate,
......@@ -360,11 +372,13 @@ class CythonCompileTestCase(unittest.TestCase):
language_level = self.language_level,
generate_pxi = False,
evaluate_tree_assertions = True,
**extra_compile_options
)
cython_compile(source, options=options,
full_module_name=module)
def run_distutils(self, test_directory, module, workdir, incdir):
def run_distutils(self, test_directory, module, workdir, incdir,
extra_extension_args=None):
cwd = os.getcwd()
os.chdir(workdir)
try:
......@@ -378,11 +392,16 @@ class CythonCompileTestCase(unittest.TestCase):
if match(module):
ext_include_dirs += get_additional_include_dirs()
self.copy_related_files(test_directory, workdir, module)
if extra_extension_args is None:
extra_extension_args = {}
extension = Extension(
module,
sources = self.find_source_files(workdir, module),
include_dirs = ext_include_dirs,
extra_compile_args = CFLAGS,
**extra_extension_args
)
if self.language == 'cpp':
extension.language = 'c++'
......@@ -625,6 +644,8 @@ class CythonUnitTestCase(CythonCompileTestCase):
except Exception:
pass
include_debugger = sys.version_info[:2] > (2, 4)
def collect_unittests(path, module_prefix, suite, selectors):
def file_matches(filename):
return filename.startswith("Test") and filename.endswith(".py")
......@@ -634,7 +655,11 @@ def collect_unittests(path, module_prefix, suite, selectors):
loader = unittest.TestLoader()
if include_debugger:
skipped_dirs = []
else:
cython_dir = os.path.dirname(os.path.abspath(__file__))
skipped_dirs = [os.path.join(cython_dir, 'Cython', 'Debugger')]
for dirpath, dirnames, filenames in os.walk(path):
if dirpath != path and "__init__.py" not in filenames:
......@@ -659,12 +684,22 @@ def collect_unittests(path, module_prefix, suite, selectors):
module = getattr(module, x)
suite.addTests([loader.loadTestsFromModule(module)])
def collect_doctests(path, module_prefix, suite, selectors):
def package_matches(dirname):
if dirname == 'Debugger' and not include_debugger:
return False
return dirname not in ("Mac", "Distutils", "Plex")
def file_matches(filename):
return (filename.endswith(".py") and not ('~' in filename
or '#' in filename or filename.startswith('.')))
filename, ext = os.path.splitext(filename)
blacklist = ['libcython', 'libpython', 'test_libcython_in_gdb',
'TestLibCython']
return (ext == '.py' and not
'~' in filename and not
'#' in filename and not
filename.startswith('.') and not
filename in blacklist)
import doctest, types
for dirpath, dirnames, filenames in os.walk(path):
parentname = os.path.split(dirpath)[-1]
......
......@@ -70,6 +70,9 @@ else:
# specific to setup
setuptools_extra_args = {}
# tells whether to include cygdb (the script and the Cython.Debugger package
include_debugger = sys.version_info[:2] > (2, 4)
if 'setuptools' in sys.modules:
setuptools_extra_args['zip_safe'] = False
setuptools_extra_args['entry_points'] = {
......@@ -81,8 +84,12 @@ if 'setuptools' in sys.modules:
else:
if os.name == "posix":
scripts = ["bin/cython"]
if include_debugger:
scripts.append('bin/cygdb')
else:
scripts = ["cython.py"]
if include_debugger:
scripts.append('cygdb.py')
def compile_cython_modules(profile=False, compile_more=False, cython_with_refnanny=False):
source_root = os.path.abspath(os.path.dirname(__file__))
......@@ -249,6 +256,20 @@ setup_args.update(setuptools_extra_args)
from Cython import __version__ as version
packages = [
'Cython',
'Cython.Build',
'Cython.Compiler',
'Cython.Runtime',
'Cython.Distutils',
'Cython.Plex',
'Cython.Tests',
'Cython.Compiler.Tests',
]
if include_debugger:
packages.append('Cython.Debugger')
setup(
name = 'Cython',
version = version,
......@@ -289,17 +310,7 @@ setup(
],
scripts = scripts,
packages=[
'Cython',
'Cython.Build',
'Cython.Compiler',
'Cython.Runtime',
'Cython.Distutils',
'Cython.Plex',
'Cython.Tests',
'Cython.Compiler.Tests',
],
packages=packages,
# pyximport
py_modules = ["pyximport/__init__",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment