Commit e4c431ec authored by Eric Snow's avatar Eric Snow Committed by GitHub

bpo-36876: Re-organize the c-analyzer tool code. (gh-16841)

This is partly a cleanup of the code. It also is preparation for getting the variables from the source (cross-platform) rather than from the symbols.

The change only touches the tool (and its tests).
parent ea55c51b
......@@ -3,7 +3,7 @@ import test.test_tools
test.test_tools.skip_if_missing('c-analyzer')
with test.test_tools.imports_under_tool('c-analyzer'):
from c_globals.__main__ import main
from cpython.__main__ import main
class ActualChecks(unittest.TestCase):
......
import re
import textwrap
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_parser.info import Variable
from c_analyzer_common.info import ID
from c_analyzer_common.known import from_file
class FromFileTests(unittest.TestCase):
maxDiff = None
_return_read_tsv = ()
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
def _read_tsv(self, *args):
self.calls.append(('_read_tsv', args))
return self._return_read_tsv
def test_typical(self):
lines = textwrap.dedent('''
filename funcname name kind declaration
file1.c - var1 variable static int
file1.c func1 local1 variable static int
file1.c - var2 variable int
file1.c func2 local2 variable char *
file2.c - var1 variable char *
''').strip().splitlines()
lines = [re.sub(r'\s+', '\t', line, 4) for line in lines]
self._return_read_tsv = [tuple(v.strip() for v in line.split('\t'))
for line in lines[1:]]
known = from_file('spam.c', _read_tsv=self._read_tsv)
self.assertEqual(known, {
'variables': {v.id: v for v in [
Variable.from_parts('file1.c', '', 'var1', 'static int'),
Variable.from_parts('file1.c', 'func1', 'local1', 'static int'),
Variable.from_parts('file1.c', '', 'var2', 'int'),
Variable.from_parts('file1.c', 'func2', 'local2', 'char *'),
Variable.from_parts('file2.c', '', 'var1', 'char *'),
]},
})
self.assertEqual(self.calls, [
('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\tdeclaration')),
])
def test_empty(self):
self._return_read_tsv = []
known = from_file('spam.c', _read_tsv=self._read_tsv)
self.assertEqual(known, {
'variables': {},
})
self.assertEqual(self.calls, [
('_read_tsv', ('spam.c', 'filename\tfuncname\tname\tkind\tdeclaration')),
])
......@@ -3,7 +3,7 @@ import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common.files import (
from c_analyzer.common.files import (
iter_files, _walk_tree, glob_tree,
)
......
......@@ -4,7 +4,10 @@ import unittest
from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common.info import ID
from c_analyzer.common.info import (
UNKNOWN,
ID,
)
class IDTests(unittest.TestCase):
......
......@@ -2,8 +2,10 @@ import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_parser import info
from c_globals.show import basic
from c_analyzer.variables import info
from c_analyzer.common.show import (
basic,
)
TYPICAL = [
......
......@@ -3,12 +3,13 @@ import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common import SOURCE_DIRS
from c_analyzer_common.known import DATA_FILE as KNOWN_FILE
from c_parser import info
import c_globals as cg
from c_globals.supported import IGNORED_FILE
from c_globals.__main__ import cmd_check, cmd_show, parse_args, main
from c_analyzer.variables import info
from cpython import SOURCE_DIRS
from cpython.supported import IGNORED_FILE
from cpython.known import DATA_FILE as KNOWN_FILE
from cpython.__main__ import (
cmd_check, cmd_show, parse_args, main,
)
TYPICAL = [
......@@ -46,6 +47,8 @@ class CMDBase(unittest.TestCase):
maxDiff = None
# _return_known_from_file = None
# _return_ignored_from_file = None
_return_find = ()
@property
......@@ -56,8 +59,16 @@ class CMDBase(unittest.TestCase):
self._calls = []
return self._calls
def _find(self, *args):
self.calls.append(('_find', args))
# def _known_from_file(self, *args):
# self.calls.append(('_known_from_file', args))
# return self._return_known_from_file or {}
#
# def _ignored_from_file(self, *args):
# self.calls.append(('_ignored_from_file', args))
# return self._return_ignored_from_file or {}
def _find(self, known, ignored, skip_objects=False):
self.calls.append(('_find', (known, ignored, skip_objects)))
return self._return_find
def _show(self, *args):
......@@ -78,41 +89,35 @@ class CheckTests(CMDBase):
_print=self._print,
)
self.assertEqual(self.calls[0], (
'_find', (
SOURCE_DIRS,
KNOWN_FILE,
IGNORED_FILE,
),
))
self.assertEqual(
self.calls[0],
('_find', (KNOWN_FILE, IGNORED_FILE, False)),
)
def test_all_supported(self):
self._return_find = [(v, s) for v, s in TYPICAL if s]
dirs = ['src1', 'src2', 'Include']
cmd_check('check',
dirs,
ignored='ignored.tsv',
known='known.tsv',
_find=self._find,
_show=self._show,
_print=self._print,
)
known='known.tsv',
ignored='ignored.tsv',
_find=self._find,
_show=self._show,
_print=self._print,
)
self.assertEqual(self.calls, [
('_find', (dirs, 'known.tsv', 'ignored.tsv')),
('_find', ('known.tsv', 'ignored.tsv', False)),
#('_print', ('okay',)),
])
def test_some_unsupported(self):
self._return_find = TYPICAL
dirs = ['src1', 'src2', 'Include']
with self.assertRaises(SystemExit) as cm:
cmd_check('check',
dirs,
ignored='ignored.tsv',
known='known.tsv',
ignored='ignored.tsv',
_find=self._find,
_show=self._show,
_print=self._print,
......@@ -120,7 +125,7 @@ class CheckTests(CMDBase):
unsupported = [v for v, s in TYPICAL if not s]
self.assertEqual(self.calls, [
('_find', (dirs, 'known.tsv', 'ignored.tsv')),
('_find', ('known.tsv', 'ignored.tsv', False)),
('_print', ('ERROR: found unsupported global variables',)),
('_print', ()),
('_show', (sorted(unsupported),)),
......@@ -140,20 +145,15 @@ class ShowTests(CMDBase):
_print=self._print,
)
self.assertEqual(self.calls[0], (
'_find', (
SOURCE_DIRS,
KNOWN_FILE,
IGNORED_FILE,
),
))
self.assertEqual(
self.calls[0],
('_find', (KNOWN_FILE, IGNORED_FILE, False)),
)
def test_typical(self):
self._return_find = TYPICAL
dirs = ['src1', 'src2', 'Include']
cmd_show('show',
dirs,
known='known.tsv',
ignored='ignored.tsv',
_find=self._find,
......@@ -164,7 +164,7 @@ class ShowTests(CMDBase):
supported = [v for v, s in TYPICAL if s]
unsupported = [v for v, s in TYPICAL if not s]
self.assertEqual(self.calls, [
('_find', (dirs, 'known.tsv', 'ignored.tsv')),
('_find', ('known.tsv', 'ignored.tsv', False)),
('_print', ('supported:',)),
('_print', ('----------',)),
('_show', (sorted(supported),)),
......@@ -201,7 +201,7 @@ class ParseArgsTests(unittest.TestCase):
self.assertEqual(cmdkwargs, {
'ignored': IGNORED_FILE,
'known': KNOWN_FILE,
'dirs': SOURCE_DIRS,
#'dirs': SOURCE_DIRS,
})
def test_check_full_args(self):
......@@ -209,16 +209,16 @@ class ParseArgsTests(unittest.TestCase):
'check',
'--ignored', 'spam.tsv',
'--known', 'eggs.tsv',
'dir1',
'dir2',
'dir3',
#'dir1',
#'dir2',
#'dir3',
])
self.assertEqual(cmd, 'check')
self.assertEqual(cmdkwargs, {
'ignored': 'spam.tsv',
'known': 'eggs.tsv',
'dirs': ['dir1', 'dir2', 'dir3']
#'dirs': ['dir1', 'dir2', 'dir3']
})
def test_show_no_args(self):
......@@ -230,7 +230,7 @@ class ParseArgsTests(unittest.TestCase):
self.assertEqual(cmdkwargs, {
'ignored': IGNORED_FILE,
'known': KNOWN_FILE,
'dirs': SOURCE_DIRS,
#'dirs': SOURCE_DIRS,
'skip_objects': False,
})
......@@ -239,16 +239,16 @@ class ParseArgsTests(unittest.TestCase):
'show',
'--ignored', 'spam.tsv',
'--known', 'eggs.tsv',
'dir1',
'dir2',
'dir3',
#'dir1',
#'dir2',
#'dir3',
])
self.assertEqual(cmd, 'show')
self.assertEqual(cmdkwargs, {
'ignored': 'spam.tsv',
'known': 'eggs.tsv',
'dirs': ['dir1', 'dir2', 'dir3'],
#'dirs': ['dir1', 'dir2', 'dir3'],
'skip_objects': False,
})
......
......@@ -4,9 +4,11 @@ import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common.info import ID
from c_parser import info
from c_globals.supported import is_supported, ignored_from_file
from c_analyzer.common.info import ID
from c_analyzer.variables.info import Variable
from cpython.supported import (
is_supported, ignored_from_file,
)
class IsSupportedTests(unittest.TestCase):
......@@ -14,8 +16,8 @@ class IsSupportedTests(unittest.TestCase):
@unittest.expectedFailure
def test_supported(self):
statics = [
info.StaticVar('src1/spam.c', None, 'var1', 'const char *'),
info.StaticVar('src1/spam.c', None, 'var1', 'int'),
Variable('src1/spam.c', None, 'var1', 'const char *'),
Variable('src1/spam.c', None, 'var1', 'int'),
]
for static in statics:
with self.subTest(static):
......@@ -26,8 +28,8 @@ class IsSupportedTests(unittest.TestCase):
@unittest.expectedFailure
def test_not_supported(self):
statics = [
info.StaticVar('src1/spam.c', None, 'var1', 'PyObject *'),
info.StaticVar('src1/spam.c', None, 'var1', 'PyObject[10]'),
Variable('src1/spam.c', None, 'var1', 'PyObject *'),
Variable('src1/spam.c', None, 'var1', 'PyObject[10]'),
]
for static in statics:
with self.subTest(static):
......
......@@ -3,9 +3,9 @@ import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_parser.declarations import (
from c_analyzer.parser.declarations import (
iter_global_declarations, iter_local_statements,
parse_func, parse_var, parse_compound,
parse_func, _parse_var, parse_compound,
iter_variables,
)
......@@ -515,7 +515,7 @@ class ParseVarTests(TestCaseBase):
])
for stmt, expected in tests:
with self.subTest(stmt):
name, vartype = parse_var(stmt)
name, vartype = _parse_var(stmt)
self.assertEqual((name, vartype), expected)
......
......@@ -6,7 +6,7 @@ import sys
from ..util import wrapped_arg_combos, StrProxy
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_parser.preprocessor import (
from c_analyzer.parser.preprocessor import (
iter_lines,
# directives
parse_directive, PreprocessorDirective,
......
......@@ -4,8 +4,8 @@ import unittest
from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common.info import ID
from c_symbols.info import Symbol
from c_analyzer.common.info import ID
from c_analyzer.symbols.info import Symbol
class SymbolTests(unittest.TestCase):
......
import os.path
from test.support import load_package_tests
def load_tests(*args):
return load_package_tests(os.path.dirname(__file__), *args)
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.variables import info
from c_analyzer.variables.find import (
vars_from_binary,
)
class _Base(unittest.TestCase):
maxDiff = None
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
class VarsFromBinaryTests(_Base):
_return_iter_vars = ()
_return_get_symbol_resolver = None
def setUp(self):
super().setUp()
self.kwargs = dict(
_iter_vars=self._iter_vars,
_get_symbol_resolver=self._get_symbol_resolver,
)
def _iter_vars(self, binfile, resolve, handle_id):
self.calls.append(('_iter_vars', (binfile, resolve, handle_id)))
return [(v, v.id) for v in self._return_iter_vars]
def _get_symbol_resolver(self, known=None, dirnames=(), *,
handle_var,
filenames=None,
check_filename=None,
perfilecache=None,
):
self.calls.append(('_get_symbol_resolver',
(known, dirnames, handle_var, filenames,
check_filename, perfilecache)))
return self._return_get_symbol_resolver
def test_typical(self):
resolver = self._return_get_symbol_resolver = object()
variables = self._return_iter_vars = [
info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'),
info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
]
known = object()
filenames = object()
found = list(vars_from_binary('python',
known=known,
filenames=filenames,
**self.kwargs))
self.assertEqual(found, [
info.Variable.from_parts('dir1/spam.c', None, 'var1', 'int'),
info.Variable.from_parts('dir1/spam.c', None, 'var2', 'static int'),
info.Variable.from_parts('dir1/spam.c', None, 'var3', 'char *'),
info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', 'const char *'),
info.Variable.from_parts('dir1/eggs.c', None, 'var1', 'static int'),
info.Variable.from_parts('dir1/eggs.c', 'func1', 'var2', 'static char *'),
])
self.assertEqual(self.calls, [
('_get_symbol_resolver', (filenames, known, info.Variable.from_id, None, None, {})),
('_iter_vars', ('python', resolver, None)),
])
# self._return_iter_symbols = [
# s_info.Symbol(('dir1/spam.c', None, 'var1'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', None, 'var2'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', None, 'func1'), 'function', False),
# s_info.Symbol(('dir1/spam.c', None, 'func2'), 'function', True),
# s_info.Symbol(('dir1/spam.c', None, 'var3'), 'variable', False),
# s_info.Symbol(('dir1/spam.c', 'func2', 'var4'), 'variable', False),
# s_info.Symbol(('dir1/ham.c', None, 'var1'), 'variable', True),
# s_info.Symbol(('dir1/eggs.c', None, 'var1'), 'variable', False),
# s_info.Symbol(('dir1/eggs.c', None, 'xyz'), 'other', False),
# s_info.Symbol(('dir1/eggs.c', '???', 'var2'), 'variable', False),
# s_info.Symbol(('???', None, 'var_x'), 'variable', False),
# s_info.Symbol(('???', '???', 'var_y'), 'variable', False),
# s_info.Symbol((None, None, '???'), 'other', False),
# ]
# known = object()
#
# vars_from_binary('python', knownvars=known, **this.kwargs)
# found = list(globals_from_symbols(['dir1'], self.iter_symbols))
#
# self.assertEqual(found, [
# info.Variable.from_parts('dir1/spam.c', None, 'var1', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var2', '???'),
# info.Variable.from_parts('dir1/spam.c', None, 'var3', '???'),
# info.Variable.from_parts('dir1/spam.c', 'func2', 'var4', '???'),
# info.Variable.from_parts('dir1/eggs.c', None, 'var1', '???'),
# ])
# self.assertEqual(self.calls, [
# ('iter_symbols', (['dir1'],)),
# ])
#
# def test_no_symbols(self):
# self._return_iter_symbols = []
#
# found = list(globals_from_symbols(['dir1'], self.iter_symbols))
#
# self.assertEqual(found, [])
# self.assertEqual(self.calls, [
# ('iter_symbols', (['dir1'],)),
# ])
# XXX need functional test
......@@ -4,10 +4,10 @@ import unittest
from ..util import PseudoStr, StrProxy, Object
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer_common.info import ID, UNKNOWN
from c_parser.info import (
normalize_vartype, Variable,
)
from c_analyzer.common.info import UNKNOWN, ID
from c_analyzer.variables.info import (
normalize_vartype, Variable
)
class NormalizeVartypeTests(unittest.TestCase):
......
import re
import textwrap
import unittest
from .. import tool_imports_for_tests
with tool_imports_for_tests():
from c_analyzer.common.info import ID
from c_analyzer.variables.info import Variable
from c_analyzer.variables.known import (
read_file,
from_file,
)
class _BaseTests(unittest.TestCase):
maxDiff = None
@property
def calls(self):
try:
return self._calls
except AttributeError:
self._calls = []
return self._calls
class ReadFileTests(_BaseTests):
_return_read_tsv = ()
def _read_tsv(self, *args):
self.calls.append(('_read_tsv', args))
return self._return_read_tsv
def test_typical(self):
lines = textwrap.dedent('''
filename funcname name kind declaration
file1.c - var1 variable static int
file1.c func1 local1 variable static int
file1.c - var2 variable int
file1.c func2 local2 variable char *
file2.c - var1 variable char *
''').strip().splitlines()
lines = [re.sub(r'\s+', '\t', line, 4) for line in lines]
self._return_read_tsv = [tuple(v.strip() for v in line.split('\t'))
for line in lines[1:]]
known = list(read_file('known.tsv', _read_tsv=self._read_tsv))
self.assertEqual(known, [
('variable', ID('file1.c', '', 'var1'), 'static int'),
('variable', ID('file1.c', 'func1', 'local1'), 'static int'),
('variable', ID('file1.c', '', 'var2'), 'int'),
('variable', ID('file1.c', 'func2', 'local2'), 'char *'),
('variable', ID('file2.c', '', 'var1'), 'char *'),
])
self.assertEqual(self.calls, [
('_read_tsv',
('known.tsv', 'filename\tfuncname\tname\tkind\tdeclaration')),
])
def test_empty(self):
self._return_read_tsv = []
known = list(read_file('known.tsv', _read_tsv=self._read_tsv))
self.assertEqual(known, [])
self.assertEqual(self.calls, [
('_read_tsv', ('known.tsv', 'filename\tfuncname\tname\tkind\tdeclaration')),
])
class FromFileTests(_BaseTests):
_return_read_file = ()
_return_handle_var = ()
def _read_file(self, infile):
self.calls.append(('_read_file', (infile,)))
return iter(self._return_read_file)
def _handle_var(self, varid, decl):
self.calls.append(('_handle_var', (varid, decl)))
var = self._return_handle_var.pop(0)
return var
def test_typical(self):
expected = [
Variable.from_parts('file1.c', '', 'var1', 'static int'),
Variable.from_parts('file1.c', 'func1', 'local1', 'static int'),
Variable.from_parts('file1.c', '', 'var2', 'int'),
Variable.from_parts('file1.c', 'func2', 'local2', 'char *'),
Variable.from_parts('file2.c', '', 'var1', 'char *'),
]
self._return_read_file = [('variable', v.id, v.vartype)
for v in expected]
# ('variable', ID('file1.c', '', 'var1'), 'static int'),
# ('variable', ID('file1.c', 'func1', 'local1'), 'static int'),
# ('variable', ID('file1.c', '', 'var2'), 'int'),
# ('variable', ID('file1.c', 'func2', 'local2'), 'char *'),
# ('variable', ID('file2.c', '', 'var1'), 'char *'),
# ]
self._return_handle_var = list(expected) # a copy
known = from_file('known.tsv',
handle_var=self._handle_var,
_read_file=self._read_file,
)
self.assertEqual(known, {
'variables': {v.id: v for v in expected},
})
# Variable.from_parts('file1.c', '', 'var1', 'static int'),
# Variable.from_parts('file1.c', 'func1', 'local1', 'static int'),
# Variable.from_parts('file1.c', '', 'var2', 'int'),
# Variable.from_parts('file1.c', 'func2', 'local2', 'char *'),
# Variable.from_parts('file2.c', '', 'var1', 'char *'),
# ]},
# })
self.assertEqual(self.calls, [
('_read_file', ('known.tsv',)),
*[('_handle_var', (v.id, v.vartype))
for v in expected],
])
def test_empty(self):
self._return_read_file = []
known = from_file('known.tsv',
handle_var=self._handle_var,
_read_file=self._read_file,
)
self.assertEqual(known, {
'variables': {},
})
self.assertEqual(self.calls, [
('_read_file', ('known.tsv',)),
])
# This is a script equivalent of running "python -m test.test_c_globals.cg".
from c_globals.__main__ import parse_args, main
from cpython.__main__ import parse_args, main
# This is effectively copied from cg/__main__.py:
......
......@@ -2,7 +2,10 @@ import glob
import os
import os.path
from . import SOURCE_DIRS, REPO_ROOT
# XXX need tests:
# * walk_tree()
# * glob_tree()
# * iter_files_by_suffix()
C_SOURCE_SUFFIXES = ('.c', '.h')
......@@ -115,24 +118,3 @@ def iter_files_by_suffix(root, suffixes, relparent=None, *,
# XXX Ignore repeated suffixes?
for suffix in suffixes:
yield from _iter_files(root, suffix, relparent)
def iter_cpython_files(*,
walk=walk_tree,
_files=iter_files_by_suffix,
):
"""Yield each file in the tree for each of the given directory names."""
excludedtrees = [
os.path.join('Include', 'cpython', ''),
]
def is_excluded(filename):
for root in excludedtrees:
if filename.startswith(root):
return True
return False
for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
walk=walk,
):
if is_excluded(filename):
continue
yield filename
......@@ -3,6 +3,9 @@ import re
from .util import classonly, _NTBase
# XXX need tests:
# * ID.match()
UNKNOWN = '???'
......@@ -67,3 +70,69 @@ class ID(_NTBase, namedtuple('ID', 'filename funcname name')):
@property
def islocal(self):
return self.funcname is not None
def match(self, other, *,
match_files=(lambda f1, f2: f1 == f2),
):
"""Return True if the two match.
At least one of the two must be completely valid (no UNKNOWN
anywhere). Otherwise False is returned. The remaining one
*may* have UNKNOWN for both funcname and filename. It must
have a valid name though.
The caller is responsible for knowing which of the two is valid
(and which to use if both are valid).
"""
# First check the name.
if self.name is None:
return False
if other.name != self.name:
return False
# Then check the filename.
if self.filename is None:
return False
if other.filename is None:
return False
if self.filename == UNKNOWN:
# "other" must be the valid one.
if other.funcname == UNKNOWN:
return False
elif self.funcname != UNKNOWN:
# XXX Try matching funcname even though we don't
# know the filename?
raise NotImplementedError
else:
return True
elif other.filename == UNKNOWN:
# "self" must be the valid one.
if self.funcname == UNKNOWN:
return False
elif other.funcname != UNKNOWN:
# XXX Try matching funcname even though we don't
# know the filename?
raise NotImplementedError
else:
return True
elif not match_files(self.filename, other.filename):
return False
# Finally, check the funcname.
if self.funcname == UNKNOWN:
# "other" must be the valid one.
if other.funcname == UNKNOWN:
return False
else:
return other.funcname is not None
elif other.funcname == UNKNOWN:
# "self" must be the valid one.
if self.funcname == UNKNOWN:
return False
else:
return self.funcname is not None
elif self.funcname == other.funcname:
# Both are valid.
return True
return False
def basic(variables, *,
_print=print):
"""Print each row simply."""
for var in variables:
if var.funcname:
line = f'{var.filename}:{var.funcname}():{var.name}'
else:
line = f'{var.filename}:{var.name}'
line = f'{line:<64} {var.vartype}'
_print(line)
......@@ -2,6 +2,8 @@ import re
import shlex
import subprocess
from ..common.info import UNKNOWN
from . import source
......@@ -194,7 +196,28 @@ def parse_func(stmt, body):
return name, signature
def parse_var(stmt):
#TYPE_SPEC = rf'''(?:
# )'''
#VAR_DECLARATOR = rf'''(?:
# )'''
#VAR_DECL = rf'''(?:
# {TYPE_SPEC}+
# {VAR_DECLARATOR}
# \s*
# )'''
#VAR_DECLARATION = rf'''(?:
# {VAR_DECL}
# (?: = [^=] [^;]* )?
# ;
# )'''
#
#
#def parse_variable(decl, *, inFunc=False):
# """Return [(name, storage, vartype)] for the given variable declaration."""
# ...
def _parse_var(stmt):
"""Return (name, vartype) for the given variable declaration."""
stmt = stmt.rstrip(';')
m = LOCAL_STMT_START_RE.match(stmt)
......@@ -220,6 +243,27 @@ def parse_var(stmt):
return name, vartype
def extract_storage(decl, *, infunc=None):
"""Return (storage, vartype) based on the given declaration.
The default storage is "implicit" (or "local" if infunc is True).
"""
if decl == UNKNOWN:
return decl
if decl.startswith('static '):
return 'static'
#return 'static', decl.partition(' ')[2].strip()
elif decl.startswith('extern '):
return 'extern'
#return 'extern', decl.partition(' ')[2].strip()
elif re.match('.*\b(static|extern)\b', decl):
raise NotImplementedError
elif infunc:
return 'local'
else:
return 'implicit'
def parse_compound(stmt, blocks):
"""Return (headers, bodies) for the given compound statement."""
# XXX Identify declarations inside compound statements
......@@ -228,14 +272,17 @@ def parse_compound(stmt, blocks):
def iter_variables(filename, *,
preprocessed=False,
_iter_source_lines=source.iter_lines,
_iter_global=iter_global_declarations,
_iter_local=iter_local_statements,
_parse_func=parse_func,
_parse_var=parse_var,
_parse_var=_parse_var,
_parse_compound=parse_compound,
):
"""Yield (funcname, name, vartype) for every variable in the given file."""
if preprocessed:
raise NotImplementedError
lines = _iter_source_lines(filename)
for stmt, body in _iter_global(lines):
# At the file top-level we only have to worry about vars & funcs.
......@@ -256,7 +303,7 @@ def iter_variables(filename, *,
def _iter_locals(lines, *,
_iter_statements=iter_local_statements,
_parse_var=parse_var,
_parse_var=_parse_var,
_parse_compound=parse_compound,
):
compound = [lines]
......@@ -278,18 +325,15 @@ def _iter_locals(lines, *,
compound.extend(bodies)
def iter_all(dirnames):
def iter_all(filename, *,
preprocessed=False,
):
"""Yield a Declaration for each one found.
If there are duplicates, due to preprocessor conditionals, then
they are checked to make sure they are the same.
"""
raise NotImplementedError
def iter_preprocessed(dirnames):
"""Yield a Declaration for each one found.
All source files are run through the preprocessor first.
"""
raise NotImplementedError
# XXX For the moment we cheat.
for funcname, name, decl in iter_variables(filename,
preprocessed=preprocessed):
yield 'variable', funcname, name, decl
from ..common.info import UNKNOWN, ID
from . import declarations
# XXX need tests:
# * variables
# * variable
# * variable_from_id
def _iter_vars(filenames, preprocessed, *,
handle_id=None,
_iter_decls=declarations.iter_all,
):
if handle_id is None:
handle_id = ID
for filename in filenames or ():
for kind, funcname, name, decl in _iter_decls(filename,
preprocessed=preprocessed,
):
if kind != 'variable':
continue
varid = handle_id(filename, funcname, name)
yield varid, decl
# XXX Add a "handle_var" arg like we did for get_resolver()?
def variables(*filenames,
perfilecache=None,
preprocessed=False,
known=None, # for types
handle_id=None,
_iter_vars=_iter_vars,
):
"""Yield (varid, decl) for each variable found in the given files.
If "preprocessed" is provided (and not False/None) then it is used
to decide which tool to use to parse the source code after it runs
through the C preprocessor. Otherwise the raw
"""
if len(filenames) == 1 and not (filenames[0], str):
filenames, = filenames
if perfilecache is None:
yield from _iter_vars(filenames, preprocessed)
else:
# XXX Cache per-file variables (e.g. `{filename: [(varid, decl)]}`).
raise NotImplementedError
def variable(name, filenames, *,
local=False,
perfilecache=None,
preprocessed=False,
handle_id=None,
_iter_vars=variables,
):
"""Return (varid, decl) for the first found variable that matches.
If "local" is True then the first matching local variable in the
file will always be returned. To avoid that, pass perfilecache and
pop each variable from the cache after using it.
"""
for varid, decl in _iter_vars(filenames,
perfilecache=perfilecache,
preprocessed=preprocessed,
):
if varid.name != name:
continue
if local:
if varid.funcname:
if varid.funcname == UNKNOWN:
raise NotImplementedError
return varid, decl
elif not varid.funcname:
return varid, decl
else:
return None, None # No matching variable was found.
def variable_from_id(id, filenames, *,
perfilecache=None,
preprocessed=False,
handle_id=None,
_get_var=variable,
):
"""Return (varid, decl) for the first found variable that matches."""
local = False
if isinstance(id, str):
name = id
else:
if id.funcname == UNKNOWN:
local = True
elif id.funcname:
raise NotImplementedError
name = id.name
if id.filename and id.filename != UNKNOWN:
filenames = [id.filename]
return _get_var(name, filenames,
local=local,
perfilecache=perfilecache,
preprocessed=preprocessed,
handle_id=handle_id,
)
import re
from c_analyzer_common.info import UNKNOWN
from ..common.info import UNKNOWN, ID
from .info import Variable
from .preprocessor import _iter_clean_lines
......@@ -55,7 +54,7 @@ def parse_variable_declaration(srcline):
def parse_variable(srcline, funcname=None):
"""Return a Variable for the variable declared on the line (or None)."""
"""Return (varid, decl) for the variable declared on the line (or None)."""
line = srcline.strip()
# XXX Handle more than just static variables.
......@@ -74,7 +73,7 @@ def iter_variables(filename, *,
_get_srclines=get_srclines,
_default_parse_variable=parse_variable,
):
"""Yield a Variable for each in the given source file."""
"""Yield (varid, decl) for each variable in the given source file."""
if parse_variable is None:
parse_variable = _default_parse_variable
......@@ -99,13 +98,13 @@ def iter_variables(filename, *,
info = parse_variable(line, funcname)
if isinstance(info, list):
for name, _funcname, decl in info:
yield Variable.from_parts(filename, _funcname, name, decl)
yield ID(filename, _funcname, name), decl
continue
name, decl = info
if name is None:
continue
yield Variable.from_parts(filename, funcname, name, decl)
yield ID(filename, funcname, name), decl
def _match_varid(variable, name, funcname, ignored=None):
......@@ -134,12 +133,12 @@ def find_variable(filename, funcname, name, *,
Return None if the variable is not found.
"""
for variable in _iter_variables(filename,
for varid, decl in _iter_variables(filename,
srccache=srccache,
parse_variable=parse_variable,
):
if _match_varid(variable, name, funcname, ignored):
return variable
if _match_varid(varid, name, funcname, ignored):
return varid, decl
else:
return None
......@@ -149,10 +148,10 @@ def find_variables(varids, filenames=None, *,
parse_variable=None,
_find_symbol=find_variable,
):
"""Yield a Variable for each ID.
"""Yield (varid, decl) for each ID.
If the variable is not found then its decl will be UNKNOWN. That
way there will be one resulting Variable per given ID.
way there will be one resulting variable per given ID.
"""
if srccache is _NOT_SET:
srccache = {}
......@@ -163,18 +162,18 @@ def find_variables(varids, filenames=None, *,
srcfiles = [varid.filename]
else:
if not filenames:
yield Variable(varid, UNKNOWN, UNKNOWN)
yield varid, UNKNOWN
continue
srcfiles = filenames
for filename in srcfiles:
found = _find_varid(filename, varid.funcname, varid.name,
ignored=used,
srccache=srccache,
parse_variable=parse_variable,
)
if found:
yield found
used.add(found)
varid, decl = _find_varid(filename, varid.funcname, varid.name,
ignored=used,
srccache=srccache,
parse_variable=parse_variable,
)
if varid:
yield varid, decl
used.add(varid)
break
else:
yield Variable(varid, UNKNOWN, UNKNOWN)
yield varid, UNKNOWN
......@@ -3,8 +3,7 @@ import shlex
import os
import re
from c_analyzer_common import util
from . import info
from ..common import util, info
CONTINUATION = '\\' + os.linesep
......
import os
import os.path
import shutil
import sys
from c_analyzer_common import util, info
from . import source
from .info import Symbol
#PYTHON = os.path.join(REPO_ROOT, 'python')
PYTHON = sys.executable
from c_analyzer.common import util, info
def iter_symbols(binary=PYTHON, dirnames=None, *,
# Alternately, use look_up_known_symbol()
# from c_globals.supported.
find_local_symbol=source.find_symbol,
_file_exists=os.path.exists,
_iter_symbols_nm=(lambda b, *a: _iter_symbols_nm(b, *a)),
):
"""Yield a Symbol for each symbol found in the binary."""
if not _file_exists(binary):
raise Exception('executable missing (need to build it first?)')
if find_local_symbol:
cache = {}
def find_local_symbol(name, *, _find=find_local_symbol):
return _find(name, dirnames, _perfilecache=cache)
else:
find_local_symbol = None
from .info import Symbol
if os.name == 'nt':
# XXX Support this.
raise NotImplementedError
else:
yield from _iter_symbols_nm(binary, find_local_symbol)
# XXX need tests:
# * iter_symbols
#############################
# binary format (e.g. ELF)
NM_KINDS = {
'b': Symbol.KIND.VARIABLE, # uninitialized
'd': Symbol.KIND.VARIABLE, # initialized
#'g': Symbol.KIND.VARIABLE, # uninitialized
#'s': Symbol.KIND.VARIABLE, # initialized
't': Symbol.KIND.FUNCTION,
}
SPECIAL_SYMBOLS = {
# binary format (e.g. ELF)
'__bss_start',
'__data_start',
'__dso_handle',
......@@ -63,29 +41,23 @@ def _is_special_symbol(name):
return False
#############################
# "nm"
NM_KINDS = {
'b': Symbol.KIND.VARIABLE, # uninitialized
'd': Symbol.KIND.VARIABLE, # initialized
#'g': Symbol.KIND.VARIABLE, # uninitialized
#'s': Symbol.KIND.VARIABLE, # initialized
't': Symbol.KIND.FUNCTION,
}
def iter_symbols(binfile, *,
nm=None,
handle_id=None,
_which=shutil.which,
_run=util.run_cmd,
):
"""Yield a Symbol for each relevant entry reported by the "nm" command."""
if nm is None:
nm = _which('nm')
if not nm:
raise NotImplementedError
if handle_id is None:
handle_id = info.ID
def _iter_symbols_nm(binary, find_local_symbol=None,
*,
_which=shutil.which,
_run=util.run_cmd,
):
nm = _which('nm')
if not nm:
raise NotImplementedError
argv = [nm,
'--line-numbers',
binary,
binfile,
]
try:
output = _run(argv)
......@@ -95,23 +67,20 @@ def _iter_symbols_nm(binary, find_local_symbol=None,
raise NotImplementedError
raise
for line in output.splitlines():
(name, kind, external, filename, funcname, vartype,
) = _parse_nm_line(line,
_find_local_symbol=find_local_symbol,
)
(name, kind, external, filename, funcname,
) = _parse_nm_line(line)
if kind != Symbol.KIND.VARIABLE:
continue
elif _is_special_symbol(name):
continue
assert vartype is None
yield Symbol(
id=(filename, funcname, name),
id=handle_id(filename, funcname, name),
kind=kind,
external=external,
)
def _parse_nm_line(line, *, _find_local_symbol=None):
def _parse_nm_line(line):
_origline = line
_, _, line = line.partition(' ') # strip off the address
line = line.strip()
......@@ -128,18 +97,9 @@ def _parse_nm_line(line, *, _find_local_symbol=None):
else:
filename = info.UNKNOWN
vartype = None
name, islocal = _parse_nm_name(name, kind)
if islocal:
funcname = info.UNKNOWN
if _find_local_symbol is not None:
filename, funcname, vartype = _find_local_symbol(name)
filename = filename or info.UNKNOWN
funcname = funcname or info.UNKNOWN
else:
funcname = None
# XXX fine filename and vartype?
return name, kind, external, filename, funcname, vartype
funcname = info.UNKNOWN if islocal else None
return name, kind, external, filename, funcname
def _parse_nm_name(name, kind):
......
import os
import os.path
import shutil
from ..common import files
from ..common.info import UNKNOWN, ID
from ..parser import find as p_find
from . import _nm
from .info import Symbol
# XXX need tests:
# * get_resolver()
# * get_resolver_from_dirs()
# * symbol()
# * symbols()
# * variables()
def _resolve_known(symbol, knownvars):
for varid in knownvars:
if symbol.match(varid):
break
else:
return None
return knownvars.pop(varid)
def get_resolver(filenames=None, known=None, *,
handle_var,
check_filename=None,
perfilecache=None,
preprocessed=False,
_from_source=p_find.variable_from_id,
):
"""Return a "resolver" func for the given known vars/types and filenames.
"handle_var" is a callable that takes (ID, decl) and returns a
Variable. Variable.from_id is a suitable callable.
The returned func takes a single Symbol and returns a corresponding
Variable. If the symbol was located then the variable will be
valid, populated with the corresponding information. Otherwise None
is returned.
"""
knownvars = (known or {}).get('variables')
if knownvars:
knownvars = dict(knownvars) # a copy
if filenames:
if check_filename is None:
filenames = list(filenames)
def check_filename(filename):
return filename in filenames
def resolve(symbol):
# XXX Check "found" instead?
if not check_filename(symbol.filename):
return None
found = _resolve_known(symbol, knownvars)
if found is None:
#return None
varid, decl = _from_source(symbol, filenames,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
found = handle_var(varid, decl)
return found
else:
def resolve(symbol):
return _resolve_known(symbol, knownvars)
elif filenames:
def resolve(symbol):
varid, decl = _from_source(symbol, filenames,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
return handle_var(varid, decl)
else:
def resolve(symbol):
return None
return resolve
def get_resolver_from_dirs(dirnames, known=None, *,
handle_var,
suffixes=('.c',),
perfilecache=None,
preprocessed=False,
_iter_files=files.iter_files_by_suffix,
_get_resolver=get_resolver,
):
"""Return a "resolver" func for the given known vars/types and filenames.
"dirnames" should be absolute paths. If not then they will be
resolved relative to CWD.
See get_resolver().
"""
dirnames = [d if d.endswith(os.path.sep) else d + os.path.sep
for d in dirnames]
filenames = _iter_files(dirnames, suffixes)
def check_filename(filename):
for dirname in dirnames:
if filename.startswith(dirname):
return True
else:
return False
return _get_resolver(filenames, known,
handle_var=handle_var,
check_filename=check_filename,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
def symbol(symbol, filenames, known=None, *,
perfilecache=None,
preprocessed=False,
handle_id=None,
_get_resolver=get_resolver,
):
"""Return a Variable for the one matching the given symbol.
"symbol" can be one of several objects:
* Symbol - use the contained info
* name (str) - look for a global variable with that name
* (filename, name) - look for named global in file
* (filename, funcname, name) - look for named local in file
A name is always required. If the filename is None, "", or
"UNKNOWN" then all files will be searched. If the funcname is
"" or "UNKNOWN" then only local variables will be searched for.
"""
resolve = _get_resolver(known, filenames,
handle_id=handle_id,
perfilecache=perfilecache,
preprocessed=preprocessed,
)
return resolve(symbol)
def _get_platform_tool():
if os.name == 'nt':
# XXX Support this.
raise NotImplementedError
elif nm := shutil.which('nm'):
return lambda b, hi: _nm.iter_symbols(b, nm=nm, handle_id=hi)
else:
raise NotImplementedError
def symbols(binfile, *,
handle_id=None,
_file_exists=os.path.exists,
_get_platform_tool=_get_platform_tool,
):
"""Yield a Symbol for each one found in the binary."""
if not _file_exists(binfile):
raise Exception('executable missing (need to build it first?)')
_iter_symbols = _get_platform_tool()
yield from _iter_symbols(binfile, handle_id)
def variables(binfile, *,
resolve,
handle_id=None,
_iter_symbols=symbols,
):
"""Yield (Variable, Symbol) for each found symbol."""
for symbol in _iter_symbols(binfile, handle_id=handle_id):
if symbol.kind != Symbol.KIND.VARIABLE:
continue
var = resolve(symbol) or None
yield var, symbol
from collections import namedtuple
from c_analyzer_common.info import ID
from c_analyzer_common.util import classonly, _NTBase
from c_analyzer.common.info import ID
from c_analyzer.common.util import classonly, _NTBase
class Symbol(_NTBase, namedtuple('Symbol', 'id kind external')):
......
from ..common import files
from ..common.info import UNKNOWN
from ..parser import (
find as p_find,
)
from ..symbols import (
info as s_info,
find as s_find,
)
from .info import Variable
# XXX need tests:
# * vars_from_source
def _remove_cached(cache, var):
if not cache:
return
try:
cached = cache[var.filename]
cached.remove(var)
except (KeyError, IndexError):
pass
def vars_from_binary(binfile, *,
known=None,
filenames=None,
handle_id=None,
check_filename=None,
handle_var=Variable.from_id,
_iter_vars=s_find.variables,
_get_symbol_resolver=s_find.get_resolver,
):
"""Yield a Variable for each found Symbol.
Details are filled in from the given "known" variables and types.
"""
cache = {}
resolve = _get_symbol_resolver(filenames, known,
handle_var=handle_var,
check_filename=check_filename,
perfilecache=cache,
)
for var, symbol in _iter_vars(binfile,
resolve=resolve,
handle_id=handle_id,
):
if var is None:
var = Variable(symbol.id, UNKNOWN, UNKNOWN)
yield var
_remove_cached(cache, var)
def vars_from_source(filenames, *,
preprocessed=None,
known=None,
handle_id=None,
handle_var=Variable.from_id,
iter_vars=p_find.variables,
):
"""Yield a Variable for each declaration in the raw source code.
Details are filled in from the given "known" variables and types.
"""
cache = {}
for varid, decl in iter_vars(filenames or (),
perfilecache=cache,
preprocessed=preprocessed,
known=known,
handle_id=handle_id,
):
var = handle_var(varid, decl)
yield var
_remove_cached(cache, var)
from collections import namedtuple
import re
from c_analyzer_common import info, util
from c_analyzer_common.util import classonly, _NTBase
from ..common.info import ID, UNKNOWN
from ..common.util import classonly, _NTBase
def normalize_vartype(vartype):
......@@ -16,26 +15,7 @@ def normalize_vartype(vartype):
return str(vartype)
def extract_storage(decl, *, isfunc=False):
"""Return (storage, vartype) based on the given declaration.
The default storage is "implicit" or "local".
"""
if decl == info.UNKNOWN:
return decl, decl
if decl.startswith('static '):
return 'static', decl
#return 'static', decl.partition(' ')[2].strip()
elif decl.startswith('extern '):
return 'extern', decl
#return 'extern', decl.partition(' ')[2].strip()
elif re.match('.*\b(static|extern)\b', decl):
raise NotImplementedError
elif isfunc:
return 'local', decl
else:
return 'implicit', decl
# XXX Variable.vartype -> decl (Declaration).
class Variable(_NTBase,
namedtuple('Variable', 'id storage vartype')):
......@@ -52,16 +32,23 @@ class Variable(_NTBase,
@classonly
def from_parts(cls, filename, funcname, name, decl, storage=None):
varid = ID(filename, funcname, name)
if storage is None:
storage, decl = extract_storage(decl, isfunc=funcname)
id = info.ID(filename, funcname, name)
self = cls(id, storage, decl)
self = cls.from_id(varid, decl)
else:
self = cls(varid, storage, decl)
return self
@classonly
def from_id(cls, varid, decl):
from ..parser.declarations import extract_storage
storage = extract_storage(decl, infunc=varid.funcname)
return cls(varid, storage, decl)
def __new__(cls, id, storage, vartype):
self = super().__new__(
cls,
id=info.ID.from_raw(id),
id=ID.from_raw(id),
storage=str(storage) if storage else None,
vartype=normalize_vartype(vartype) if vartype else None,
)
......@@ -77,10 +64,10 @@ class Variable(_NTBase,
if not self.id:
raise TypeError('missing id')
if not self.filename or self.filename == info.UNKNOWN:
if not self.filename or self.filename == UNKNOWN:
raise TypeError(f'id missing filename ({self.id})')
if self.funcname and self.funcname == info.UNKNOWN:
if self.funcname and self.funcname == UNKNOWN:
raise TypeError(f'id missing funcname ({self.id})')
self.id.validate()
......@@ -89,12 +76,12 @@ class Variable(_NTBase,
"""Fail if the object is invalid (i.e. init with bad data)."""
self._validate_id()
if self.storage is None or self.storage == info.UNKNOWN:
if self.storage is None or self.storage == UNKNOWN:
raise TypeError('missing storage')
elif self.storage not in self.STORAGE:
raise ValueError(f'unsupported storage {self.storage:r}')
if self.vartype is None or self.vartype == info.UNKNOWN:
if self.vartype is None or self.vartype == UNKNOWN:
raise TypeError('missing vartype')
@property
......
import csv
from ..common.info import ID, UNKNOWN
from ..common.util import read_tsv
from .info import Variable
# XXX need tests:
# * read_file()
# * look_up_variable()
COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
HEADER = '\t'.join(COLUMNS)
def read_file(infile, *,
_read_tsv=read_tsv,
):
"""Yield (kind, id, decl) for each row in the data file.
The caller is responsible for validating each row.
"""
for row in _read_tsv(infile, HEADER):
filename, funcname, name, kind, declaration = row
if not funcname or funcname == '-':
funcname = None
id = ID(filename, funcname, name)
yield kind, id, declaration
def from_file(infile, *,
handle_var=Variable.from_id,
_read_file=read_file,
):
"""Return the info for known declarations in the given file."""
known = {
'variables': {},
#'types': {},
#'constants': {},
#'macros': {},
}
for kind, id, decl in _read_file(infile):
if kind == 'variable':
values = known['variables']
value = handle_var(id, decl)
else:
raise ValueError(f'unsupported kind in row {row}')
value.validate()
values[id] = value
return known
def look_up_variable(varid, knownvars, *,
match_files=(lambda f1, f2: f1 == f2),
):
"""Return the known Variable matching the given ID.
"knownvars" is a mapping of ID to Variable.
"match_files" is used to verify if two filenames point to
the same file.
If no match is found then None is returned.
"""
if not knownvars:
return None
if varid.funcname == UNKNOWN:
if not varid.filename or varid.filename == UNKNOWN:
for varid in knownvars:
if not varid.funcname:
continue
if varid.name == varid.name:
return knownvars[varid]
else:
return None
else:
for varid in knownvars:
if not varid.funcname:
continue
if not match_files(varid.filename, varid.filename):
continue
if varid.name == varid.name:
return knownvars[varid]
else:
return None
elif not varid.filename or varid.filename == UNKNOWN:
raise NotImplementedError
else:
return knownvars.get(varid.id)
from c_analyzer_common import SOURCE_DIRS
from c_analyzer_common.info import UNKNOWN
from c_symbols import (
info as s_info,
binary as b_symbols,
source as s_symbols,
resolve,
)
from c_parser import info, declarations
# XXX needs tests:
# * iter_variables
def globals_from_binary(binfile=b_symbols.PYTHON, *,
knownvars=None,
dirnames=None,
_iter_symbols=b_symbols.iter_symbols,
_resolve=resolve.symbols_to_variables,
_get_symbol_resolver=resolve.get_resolver,
):
"""Yield a Variable for each found Symbol.
Details are filled in from the given "known" variables and types.
"""
symbols = _iter_symbols(binfile, find_local_symbol=None)
#symbols = list(symbols)
for variable in _resolve(symbols,
resolve=_get_symbol_resolver(knownvars, dirnames),
):
# Skip each non-global variable (unless we couldn't find it).
# XXX Drop the "UNKNOWN" condition?
if not variable.isglobal and variable.vartype != UNKNOWN:
continue
yield variable
def globals_from_declarations(dirnames=SOURCE_DIRS, *,
known=None,
):
"""Yield a Variable for each found declaration.
Details are filled in from the given "known" variables and types.
"""
raise NotImplementedError
def iter_variables(kind='platform', *,
known=None,
dirnames=None,
_resolve_symbols=resolve.symbols_to_variables,
_get_symbol_resolver=resolve.get_resolver,
_symbols_from_binary=b_symbols.iter_symbols,
_symbols_from_source=s_symbols.iter_symbols,
_iter_raw=declarations.iter_all,
_iter_preprocessed=declarations.iter_preprocessed,
):
"""Yield a Variable for each one found (e.g. in files)."""
kind = kind or 'platform'
if kind == 'symbols':
knownvars = (known or {}).get('variables')
yield from _resolve_symbols(
_symbols_from_source(dirnames, known),
resolve=_get_symbol_resolver(knownvars, dirnames),
)
elif kind == 'platform':
knownvars = (known or {}).get('variables')
yield from _resolve_symbols(
_symbols_from_binary(find_local_symbol=None),
resolve=_get_symbol_resolver(knownvars, dirnames),
)
elif kind == 'declarations':
for decl in _iter_raw(dirnames):
if not isinstance(decl, info.Variable):
continue
yield decl
elif kind == 'preprocessed':
for decl in _iter_preprocessed(dirnames):
if not isinstance(decl, info.Variable):
continue
yield decl
else:
raise ValueError(f'unsupported kind {kind!r}')
def globals(dirnames, known, *,
kind=None, # Use the default.
_iter_variables=iter_variables,
):
"""Return a list of (StaticVar, <supported>) for each found global var."""
for found in _iter_variables(kind, known=known, dirnames=dirnames):
if not found.isglobal:
continue
yield found
def basic(globals, *,
_print=print):
"""Print each row simply."""
for variable in globals:
if variable.funcname:
line = f'{variable.filename}:{variable.funcname}():{variable.name}'
else:
line = f'{variable.filename}:{variable.name}'
vartype = variable.vartype
#if vartype.startswith('static '):
# vartype = vartype.partition(' ')[2]
#else:
# vartype = '=' + vartype
line = f'{line:<64} {vartype}'
_print(line)
import os.path
from c_analyzer_common import files
from c_analyzer_common.info import UNKNOWN
from c_parser import declarations, info
from .info import Symbol
from .source import _find_symbol
# XXX need tests:
# * look_up_known_symbol()
# * symbol_from_source()
# * get_resolver()
# * symbols_to_variables()
def look_up_known_symbol(symbol, knownvars, *,
match_files=(lambda f1, f2: f1 == f2),
):
"""Return the known variable matching the given symbol.
"knownvars" is a mapping of common.ID to parser.Variable.
"match_files" is used to verify if two filenames point to
the same file.
"""
if not knownvars:
return None
if symbol.funcname == UNKNOWN:
if not symbol.filename or symbol.filename == UNKNOWN:
for varid in knownvars:
if not varid.funcname:
continue
if varid.name == symbol.name:
return knownvars[varid]
else:
return None
else:
for varid in knownvars:
if not varid.funcname:
continue
if not match_files(varid.filename, symbol.filename):
continue
if varid.name == symbol.name:
return knownvars[varid]
else:
return None
elif not symbol.filename or symbol.filename == UNKNOWN:
raise NotImplementedError
else:
return knownvars.get(symbol.id)
def find_in_source(symbol, dirnames, *,
_perfilecache={},
_find_symbol=_find_symbol,
_iter_files=files.iter_files_by_suffix,
):
"""Return the Variable matching the given Symbol.
If there is no match then return None.
"""
if symbol.filename and symbol.filename != UNKNOWN:
filenames = [symbol.filename]
else:
filenames = _iter_files(dirnames, ('.c', '.h'))
if symbol.funcname and symbol.funcname != UNKNOWN:
raise NotImplementedError
(filename, funcname, decl
) = _find_symbol(symbol.name, filenames, _perfilecache)
if filename == UNKNOWN:
return None
return info.Variable.from_parts(filename, funcname, symbol.name, decl)
def get_resolver(knownvars=None, dirnames=None, *,
_look_up_known=look_up_known_symbol,
_from_source=find_in_source,
):
"""Return a "resolver" func for the given known vars and dirnames.
The func takes a single Symbol and returns a corresponding Variable.
If the symbol was located then the variable will be valid, populated
with the corresponding information. Otherwise None is returned.
"""
if knownvars:
knownvars = dict(knownvars) # a copy
def resolve_known(symbol):
found = _look_up_known(symbol, knownvars)
if found is None:
return None
elif symbol.funcname == UNKNOWN:
knownvars.pop(found.id)
elif not symbol.filename or symbol.filename == UNKNOWN:
knownvars.pop(found.id)
return found
if dirnames:
def resolve(symbol):
found = resolve_known(symbol)
if found is None:
return None
#return _from_source(symbol, dirnames)
else:
for dirname in dirnames:
if not dirname.endswith(os.path.sep):
dirname += os.path.sep
if found.filename.startswith(dirname):
break
else:
return None
return found
else:
resolve = resolve_known
elif dirnames:
def resolve(symbol):
return _from_source(symbol, dirnames)
else:
def resolve(symbol):
return None
return resolve
def symbols_to_variables(symbols, *,
resolve=(lambda s: look_up_known_symbol(s, None)),
):
"""Yield the variable the matches each given symbol.
Use get_resolver() for a "resolve" func to use.
"""
for symbol in symbols:
if isinstance(symbol, info.Variable):
# XXX validate?
yield symbol
continue
if symbol.kind != Symbol.KIND.VARIABLE:
continue
resolved = resolve(symbol)
if resolved is None:
#raise NotImplementedError(symbol)
resolved = info.Variable(
id=symbol.id,
storage=UNKNOWN,
vartype=UNKNOWN,
)
yield resolved
from c_analyzer_common import files
from c_analyzer_common.info import UNKNOWN
from c_parser import declarations
# XXX need tests:
# * find_symbol()
def find_symbol(name, dirnames, *,
_perfilecache,
_iter_files=files.iter_files_by_suffix,
**kwargs
):
"""Return (filename, funcname, vartype) for the matching Symbol."""
filenames = _iter_files(dirnames, ('.c', '.h'))
return _find_symbol(name, filenames, _perfilecache, **kwargs)
def _get_symbols(filename, *,
_iter_variables=declarations.iter_variables,
):
"""Return the list of Symbols found in the given file."""
symbols = {}
for funcname, name, vartype in _iter_variables(filename):
if not funcname:
continue
try:
instances = symbols[name]
except KeyError:
instances = symbols[name] = []
instances.append((funcname, vartype))
return symbols
def _find_symbol(name, filenames, _perfilecache, *,
_get_local_symbols=_get_symbols,
):
for filename in filenames:
try:
symbols = _perfilecache[filename]
except KeyError:
symbols = _perfilecache[filename] = _get_local_symbols(filename)
try:
instances = symbols[name]
except KeyError:
continue
funcname, vartype = instances.pop(0)
if not instances:
symbols.pop(name)
return filename, funcname, vartype
else:
return UNKNOWN, UNKNOWN, UNKNOWN
def iter_symbols():
raise NotImplementedError
import os.path
import sys
PKG_ROOT = os.path.dirname(__file__)
DATA_DIR = os.path.dirname(PKG_ROOT)
REPO_ROOT = os.path.dirname(
os.path.dirname(DATA_DIR))
TOOL_ROOT = os.path.abspath(
os.path.dirname( # c-analyzer/
os.path.dirname(__file__))) # cpython/
DATA_DIR = TOOL_ROOT
REPO_ROOT = (
os.path.dirname( # ..
os.path.dirname(TOOL_ROOT))) # Tools/
SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
INCLUDE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
'Include',
]]
SOURCE_DIRS = [os.path.join(REPO_ROOT, name) for name in [
'Python',
'Parser',
'Objects',
'Modules',
]]
#PYTHON = os.path.join(REPO_ROOT, 'python')
PYTHON = sys.executable
# Clean up the namespace.
del sys
del os
import argparse
import os.path
import re
import sys
from c_analyzer_common import SOURCE_DIRS, REPO_ROOT
from c_analyzer_common.info import UNKNOWN
from c_analyzer_common.known import (
from c_analyzer.common import show
from c_analyzer.common.info import UNKNOWN
from . import SOURCE_DIRS
from .find import supported_vars
from .known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
from . import find, show
from .supported import is_supported, ignored_from_file, IGNORED_FILE, _is_object
from .supported import IGNORED_FILE
def _match_unused_global(variable, knownvars, used):
found = []
for varid in knownvars:
if varid in used:
continue
if varid.funcname is not None:
continue
if varid.name != variable.name:
continue
if variable.filename and variable.filename != UNKNOWN:
if variable.filename == varid.filename:
def _check_results(unknown, knownvars, used):
def _match_unused_global(variable):
found = []
for varid in knownvars:
if varid in used:
continue
if varid.funcname is not None:
continue
if varid.name != variable.name:
continue
if variable.filename and variable.filename != UNKNOWN:
if variable.filename == varid.filename:
found.append(varid)
else:
found.append(varid)
else:
found.append(varid)
return found
return found
def _check_results(unknown, knownvars, used):
badknown = set()
for variable in sorted(unknown):
msg = None
if variable.funcname != UNKNOWN:
msg = f'could not find global symbol {variable.id}'
elif m := _match_unused_global(variable, knownvars, used):
elif m := _match_unused_global(variable):
assert isinstance(m, list)
badknown.update(m)
elif variable.name in ('completed', 'id'): # XXX Figure out where these variables are.
......@@ -65,32 +65,29 @@ def _check_results(unknown, knownvars, used):
raise Exception('could not find all symbols')
def _find_globals(dirnames, known, ignored):
if dirnames == SOURCE_DIRS:
dirnames = [os.path.relpath(d, REPO_ROOT) for d in dirnames]
ignored = ignored_from_file(ignored)
known = known_from_file(known)
# XXX Move this check to its own command.
def cmd_check_cache(cmd, *,
known=KNOWN_FILE,
ignored=IGNORED_FILE,
_known_from_file=known_from_file,
_find=supported_vars,
):
known = _known_from_file(known)
used = set()
unknown = set()
knownvars = (known or {}).get('variables')
for variable in find.globals_from_binary(knownvars=knownvars,
dirnames=dirnames):
#for variable in find.globals(dirnames, known, kind='platform'):
if variable.vartype == UNKNOWN:
unknown.add(variable)
for var, supported in _find(known=known, ignored=ignored):
if supported is None:
unknown.add(var)
continue
yield variable, is_supported(variable, ignored, known)
used.add(variable.id)
#_check_results(unknown, knownvars, used)
used.add(var.id)
_check_results(unknown, known['variables'], used)
def cmd_check(cmd, dirs=SOURCE_DIRS, *,
ignored=IGNORED_FILE,
def cmd_check(cmd, *,
known=KNOWN_FILE,
_find=_find_globals,
ignored=IGNORED_FILE,
_find=supported_vars,
_show=show.basic,
_print=print,
):
......@@ -100,7 +97,11 @@ def cmd_check(cmd, dirs=SOURCE_DIRS, *,
In the failure case, the list of unsupported variables
will be printed out.
"""
unsupported = [v for v, s in _find(dirs, known, ignored) if not s]
unsupported = []
for var, supported in _find(known=known, ignored=ignored):
if not supported:
unsupported.append(var)
if not unsupported:
#_print('okay')
return
......@@ -112,11 +113,11 @@ def cmd_check(cmd, dirs=SOURCE_DIRS, *,
sys.exit(1)
def cmd_show(cmd, dirs=SOURCE_DIRS, *,
ignored=IGNORED_FILE,
def cmd_show(cmd, *,
known=KNOWN_FILE,
ignored=IGNORED_FILE,
skip_objects=False,
_find=_find_globals,
_find=supported_vars,
_show=show.basic,
_print=print,
):
......@@ -127,10 +128,12 @@ def cmd_show(cmd, dirs=SOURCE_DIRS, *,
"""
allsupported = []
allunsupported = []
for found, supported in _find(dirs, known, ignored):
if skip_objects: # XXX Support proper filters instead.
if _is_object(found.vartype):
continue
for found, supported in _find(known=known,
ignored=ignored,
skip_objects=skip_objects,
):
if supported is None:
continue
(allsupported if supported else allunsupported
).append(found)
......@@ -165,9 +168,9 @@ def parse_args(prog=PROG, argv=sys.argv[1:], *, _fail=None):
common.add_argument('--known', metavar='FILE',
default=KNOWN_FILE,
help='path to file that lists known types')
common.add_argument('dirs', metavar='DIR', nargs='*',
default=SOURCE_DIRS,
help='a directory to check')
#common.add_argument('dirs', metavar='DIR', nargs='*',
# default=SOURCE_DIRS,
# help='a directory to check')
parser = argparse.ArgumentParser(
prog=prog,
......
# The code here consists of hacks for pre-populating the known.tsv file.
from c_parser.preprocessor import _iter_clean_lines
from c_parser.naive import (
from c_analyzer.parser.preprocessor import _iter_clean_lines
from c_analyzer.parser.naive import (
iter_variables, parse_variable_declaration, find_variables,
)
from c_parser.info import Variable
from c_analyzer.common.known import HEADER as KNOWN_HEADER
from c_analyzer.common.info import UNKNOWN, ID
from c_analyzer.variables import Variable
from c_analyzer.util import write_tsv
from . import SOURCE_DIRS, REPO_ROOT
from .known import DATA_FILE as KNOWN_FILE, HEADER as KNOWN_HEADER
from .info import UNKNOWN, ID
from .util import write_tsv
from .known import DATA_FILE as KNOWN_FILE
from .files import iter_cpython_files
......
from c_analyzer.common.files import (
C_SOURCE_SUFFIXES, walk_tree, iter_files_by_suffix,
)
from . import SOURCE_DIRS, REPO_ROOT
# XXX need tests:
# * iter_files()
def iter_files(*,
walk=walk_tree,
_files=iter_files_by_suffix,
):
"""Yield each file in the tree for each of the given directory names."""
excludedtrees = [
os.path.join('Include', 'cpython', ''),
]
def is_excluded(filename):
for root in excludedtrees:
if filename.startswith(root):
return True
return False
for filename in _files(SOURCE_DIRS, C_SOURCE_SUFFIXES, REPO_ROOT,
walk=walk,
):
if is_excluded(filename):
continue
yield filename
import os.path
from c_analyzer.common import files
from c_analyzer.common.info import UNKNOWN, ID
from c_analyzer.variables import find as _common
from . import SOURCE_DIRS, PYTHON, REPO_ROOT
from .known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
from .supported import (
ignored_from_file, IGNORED_FILE, is_supported, _is_object,
)
# XXX need tests:
# * vars_from_binary()
# * vars_from_source()
# * supported_vars()
def _handle_id(filename, funcname, name, *,
_relpath=os.path.relpath,
):
filename = _relpath(filename, REPO_ROOT)
return ID(filename, funcname, name)
def vars_from_binary(*,
known=KNOWN_FILE,
_known_from_file=known_from_file,
_iter_files=files.iter_files_by_suffix,
_iter_vars=_common.vars_from_binary,
):
"""Yield a Variable for each found Symbol.
Details are filled in from the given "known" variables and types.
"""
if isinstance(known, str):
known = _known_from_file(known)
dirnames = SOURCE_DIRS
suffixes = ('.c',)
filenames = _iter_files(dirnames, suffixes)
# XXX For now we only use known variables (no source lookup).
filenames = None
yield from _iter_vars(PYTHON,
known=known,
filenames=filenames,
handle_id=_handle_id,
check_filename=(lambda n: True),
)
def vars_from_source(*,
preprocessed=None,
known=KNOWN_FILE,
_known_from_file=known_from_file,
_iter_files=files.iter_files_by_suffix,
_iter_vars=_common.vars_from_source,
):
"""Yield a Variable for each declaration in the raw source code.
Details are filled in from the given "known" variables and types.
"""
if isinstance(known, str):
known = _known_from_file(known)
dirnames = SOURCE_DIRS
suffixes = ('.c',)
filenames = _iter_files(dirnames, suffixes)
yield from _iter_vars(filenames,
preprocessed=preprocessed,
known=known,
handle_id=_handle_id,
)
def supported_vars(*,
known=KNOWN_FILE,
ignored=IGNORED_FILE,
skip_objects=False,
_known_from_file=known_from_file,
_ignored_from_file=ignored_from_file,
_iter_vars=vars_from_binary,
_is_supported=is_supported,
):
"""Yield (var, is supported) for each found variable."""
if isinstance(known, str):
known = _known_from_file(known)
if isinstance(ignored, str):
ignored = _ignored_from_file(ignored)
for var in _iter_vars(known=known):
if not var.isglobal:
continue
elif var.vartype == UNKNOWN:
yield var, None
# XXX Support proper filters instead.
elif skip_objects and _is_object(found.vartype):
continue
else:
yield var, _is_supported(var, ignored, known)
import csv
import os.path
from c_parser.info import Variable
from c_analyzer.parser.declarations import extract_storage
from c_analyzer.variables import known as _common
from c_analyzer.variables.info import Variable
from . import DATA_DIR
from .info import ID, UNKNOWN
from .util import read_tsv
DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
# XXX need tests:
# * from_file()
# * look_up_variable()
COLUMNS = ('filename', 'funcname', 'name', 'kind', 'declaration')
HEADER = '\t'.join(COLUMNS)
DATA_FILE = os.path.join(DATA_DIR, 'known.tsv')
# XXX need tests:
# * from_file()
def from_file(infile, *,
_read_tsv=read_tsv,
):
"""Return the info for known declarations in the given file."""
known = {
'variables': {},
#'types': {},
#'constants': {},
#'macros': {},
}
for row in _read_tsv(infile, HEADER):
filename, funcname, name, kind, declaration = row
if not funcname or funcname == '-':
funcname = None
id = ID(filename, funcname, name)
if kind == 'variable':
values = known['variables']
if funcname:
storage = _get_storage(declaration) or 'local'
else:
storage = _get_storage(declaration) or 'implicit'
value = Variable(id, storage, declaration)
else:
raise ValueError(f'unsupported kind in row {row}')
value.validate()
# if value.name == 'id' and declaration == UNKNOWN:
# # None of these are variables.
# declaration = 'int id';
# else:
# value.validate()
values[id] = value
return known
def _get_storage(decl):
def _get_storage(decl, infunc):
# statics
if decl.startswith('static '):
return 'static'
if decl.startswith(('Py_LOCAL(', 'Py_LOCAL_INLINE(')):
return 'static'
if decl.startswith(('_Py_IDENTIFIER(', '_Py_static_string(')):
......@@ -66,9 +29,38 @@ def _get_storage(decl):
if decl.startswith('WRAP_METHOD('):
return 'static'
# public extern
if decl.startswith('extern '):
return 'extern'
if decl.startswith('PyAPI_DATA('):
return 'extern'
# implicit or local
return None
# Fall back to the normal handler.
return extract_storage(decl, infunc=infunc)
def _handle_var(varid, decl):
# if varid.name == 'id' and decl == UNKNOWN:
# # None of these are variables.
# decl = 'int id';
storage = _get_storage(decl, varid.funcname)
return Variable(varid, storage, decl)
def from_file(infile=DATA_FILE, *,
_from_file=_common.from_file,
_handle_var=_handle_var,
):
"""Return the info for known declarations in the given file."""
return _from_file(infile, handle_var=_handle_var)
def look_up_variable(varid, knownvars, *,
_lookup=_common.look_up_variable,
):
"""Return the known variable matching the given ID.
"knownvars" is a mapping of ID to Variable.
"match_files" is used to verify if two filenames point to
the same file.
If no match is found then None is returned.
"""
return _lookup(varid, knownvars)
import os.path
import re
from c_analyzer_common import DATA_DIR
from c_analyzer_common.info import ID
from c_analyzer_common.util import read_tsv, write_tsv
from c_analyzer.common.info import ID
from c_analyzer.common.util import read_tsv, write_tsv
from . import DATA_DIR
# XXX need tests:
# * generate / script
IGNORED_FILE = os.path.join(DATA_DIR, 'ignored.tsv')
......@@ -379,11 +383,12 @@ def _generate_ignored_file(variables, filename=None, *,
if __name__ == '__main__':
from c_analyzer_common import SOURCE_DIRS
from c_analyzer_common.known import (
from cpython import SOURCE_DIRS
from cpython.known import (
from_file as known_from_file,
DATA_FILE as KNOWN_FILE,
)
# XXX This is wrong!
from . import find
known = known_from_file(KNOWN_FILE)
knownvars = (known or {}).get('variables')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment