RestrictionMutator.py 20.9 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
##############################################################################
#
# Copyright (c) 2002 Zope Foundation and Contributors.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""Modify AST to include security checks.

RestrictionMutator modifies a tree produced by
compiler.transformer.Transformer, restricting and enhancing the
code in various ways before sending it to pycodegen.

$Revision: 1.13 $
"""

import ast
from ast import parse

# These utility functions allow us to generate AST subtrees without
# line number attributes.  These trees can then be inserted into other
# trees without affecting line numbers shown in tracebacks, etc.
def rmLineno(node):
    """Strip lineno attributes from a code tree."""
    for child in ast.walk(node):
        if 'lineno' in child._attributes:
            del child.lineno

def stmtNode(txt):
    """Make a "clean" statement node."""
    node = parse(txt).body[0]
    # TODO: Remove the line number of nodes will cause error.
    # Need to figure out why.
    return node

# The security checks are performed by a set of six functions that
# must be provided by the restricted environment.

_apply_name = ast.Name("_apply_", ast.Load())
_getattr_name = ast.Name("_getattr_", ast.Load())
_getitem_name = ast.Name("_getitem_", ast.Load())
_getiter_name = ast.Name("_getiter_", ast.Load())
_print_target_name = ast.Name("_print", ast.Load())
_write_name = ast.Name("_write_", ast.Load())
_inplacevar_name = ast.Name("_inplacevar_", ast.Load())

# Constants.
_None_const = ast.Name('None', ast.Load())
# _write_const = ast.Name("write", ast.Load())

# What is it?
# _printed_expr = stmtNode("_print()").expr
_printed_expr = stmtNode("_print()").value
_print_target_node = stmtNode("_print = _print_()")

class FuncInfo:
    print_used = False
    printed_used = False

class RestrictionTransformer(ast.NodeTransformer):

    def __init__(self):
        self.warnings = []
        self.errors = []
        self.used_names = {}
        self.funcinfo = FuncInfo()

    def error(self, node, info):
        """Records a security error discovered during compilation."""
        lineno = getattr(node, 'lineno', None)
        if lineno is not None and lineno > 0:
            self.errors.append('Line %d: %s' % (lineno, info))
        else:
            self.errors.append(info)

    def checkName(self, node, name):
        """Verifies that a name being assigned is safe.

        This is to prevent people from doing things like:

          __metatype__ = mytype (opens up metaclasses, a big unknown
                                 in terms of security)
          __path__ = foo        (could this confuse the import machinery?)
          _getattr = somefunc   (not very useful, but could open a hole)

        Note that assigning a variable is not the only way to assign
        a name.  def _badname, class _badname, import foo as _badname,
        and perhaps other statements assign names.  Special case:
        '_' is allowed.
        """
        if name.startswith("_") and name != "_":
            # Note: "_" *is* allowed.
            self.error(node, '"%s" is an invalid variable name because'
                       ' it starts with "_"' % name)
        if name.endswith('__roles__'):
            self.error(node, '"%s" is an invalid variable name because '
                       'it ends with "__roles__".' % name)
        if name == "printed":
            self.error(node, '"printed" is a reserved name.')

    def checkAttrName(self, node):
        """Verifies that an attribute name does not start with _.

        As long as guards (security proxies) have underscored names,
        this underscore protection is important regardless of the
        security policy.  Special case: '_' is allowed.
        """
        name = node.attr
        if name.startswith("_") and name != "_":
            # Note: "_" *is* allowed.
            self.error(node, '"%s" is an invalid attribute name '
                       'because it starts with "_".' % name)
        if name.endswith('__roles__'):
            self.error(node, '"%s" is an invalid attribute name '
                       'because it ends with "__roles__".' % name)

    def prepBody(self, body):
        """Insert code for print at the beginning of the code suite."""

        if self.funcinfo.print_used or self.funcinfo.printed_used:
            # Add code at top for creating _print_target
            body.insert(0, _print_target_node)
            if not self.funcinfo.printed_used:
                self.warnings.append(
                    "Prints, but never reads 'printed' variable.")
            elif not self.funcinfo.print_used:
                self.warnings.append(
                    "Doesn't print, but reads 'printed' variable.")

    def visit_FunctionDef(self, node):
        """Checks and mutates a function definition.

        Checks the name of the function and the argument names using
        checkName().  It also calls prepBody() to prepend code to the
        beginning of the code suite.
        """
        self.checkName(node, node.name)
        for argname in node.args.args:
            if isinstance(argname, str):
                self.checkName(node, argname)
            else:
                # TODO: check sequence!!!
                self.checkName(node, argname.id)
        # FuncDef.args.defaults is a list.
        # FuncDef.args.args is a list, contains ast.Name
        # FuncDef.args.kwarg is a list.
        # FuncDef.args.vararg is a list.

        for i, arg in enumerate(node.args.defaults):
            node.args.defaults[i] = self.visit(arg)
        former_funcinfo = self.funcinfo
        self.funcinfo = FuncInfo()
        for i, item in enumerate(node.body):
            node.body[i] = self.visit(item)
        self.prepBody(node.body)
        self.funcinfo = former_funcinfo
        ast.fix_missing_locations(node)
        return node

    def visit_Lambda(self, node):
        """Checks and mutates an anonymous function definition.

        Checks the argument names using checkName().  It also calls
        prepBody() to prepend code to the beginning of the code suite.
        """
        for arg in node.args.args:
            self.checkName(node, arg.id)
        return self.generic_visit(node)

    def visit_Print(self, node):
        """Checks and mutates a print statement.

        Adds a target to all print statements.  'print foo' becomes
        'print >> _print, foo', where _print is the default print
        target defined for this scope.

        Alternatively, if the untrusted code provides its own target,
        we have to check the 'write' method of the target.
        'print >> ob, foo' becomes
        'print >> (_getattr(ob, 'write') and ob), foo'.
        Otherwise, it would be possible to call the write method of
        templates and scripts; 'write' happens to be the name of the
        method that changes them.
        """
        self.generic_visit(node)
        self.funcinfo.print_used = True
        if node.dest is None:
            node.dest = _print_target_name
        else:
            # Pre-validate access to the "write" attribute.
            # "print >> ob, x" becomes
            # "print >> (_getattr(ob, 'write') and ob), x"
            # node.dest = ast.And([
            #     ast.CallFunc(_getattr_name, [node.dest, _write_const]),
            #     node.dest])
            call_node = ast.Call(_getattr_name, [node.dest, ast.Str('write')], [], None, None)
            and_node = ast.And()
            node.dest = ast.BoolOp(and_node, [
                call_node,
                node.dest])
        ast.fix_missing_locations(node)
        return node

    # XXX: Does ast.AST still have Printnl???
    visitPrintnl = visit_Print

    # def visitName(self, node, walker):
    def visit_Name(self, node):
        """Prevents access to protected names as defined by checkName().

        Also converts use of the name 'printed' to an expression.
        """

        if node.id == 'printed':
            # Replace name lookup with an expression.
            self.funcinfo.printed_used = True
            return ast.fix_missing_locations(_printed_expr)
        self.checkName(node, node.id)
        self.used_names[node.id] = True
        return node

    def visit_Call(self, node):
        """Checks calls with *-args and **-args.

        That's a way of spelling apply(), and needs to use our safe
        _apply_ instead.
        """
        self.generic_visit(node)
        if node.starargs is None and node.kwargs is None:
            return node
        # Otherwise transform foo(a, b, c, d=e, f=g, *args, **kws) into a call
        # of _apply_(foo, a, b, c, d=e, f=g, *args, **kws).  The interesting
        # thing here is that _apply_() is defined with just *args and **kws,
        # so it gets Python to collapse all the myriad ways to call functions
        # into one manageable form.
        #
        # From there, _apply_() digs out the first argument of *args (it's the
        # function to call), wraps args and kws in guarded accessors, then
        # calls the function, returning the value.
        # Transform foo(...) to _apply(foo, ...)
        # walked.args.insert(0, walked.node)
        # walked.node = _apply_name
        # walked.args.insert(0, walked.func)
        node.args.insert(0, node.func)
        node.func = _apply_name
        # walked.func = _apply_name
        return ast.fix_missing_locations(node)


    def visit_For(self, node):
        # convert
        #   for x in expr:
        # to
        #   for x in _getiter(expr):
        #        # Note that visitListCompFor is the same thing.
        #
        # Also for list comprehensions:
        #   [... for x in expr ...]
        # to
        #   [... for x in _getiter(expr) ...]
        self.generic_visit(node)
        node.iter = ast.Call(_getiter_name, [node.iter], [], None, None)
        ast.fix_missing_locations(node)
        return node

    # visitListComp = visitFor
    def visit_ListComp(self, node):
        self.generic_visit(node)
        return node

    def visit_comprehension(self, node):
        # Also for comprehensions:
        #   [... for x in expr ...]
        # to
        #   [... for x in _getiter(expr) ...]
        if isinstance(node.target, ast.Name):
            self.checkName(node, node.target.id)


        # XXX: Exception! If the target is an attribute access.
        # Change it manually. 
        if isinstance(node.target, ast.Attribute):
            self.checkAttrName(node.target)
            node.target.value = ast.Call(_write_name, [node.target.value], [], None, None)

        if not isinstance(node.iter, ast.Tuple):
            node.iter = ast.Call(_getiter_name, [node.iter], [], None, None)
            for i, arg in enumerate(node.iter.args):
                if isinstance(arg, ast.AST):
                    node.iter.args[i] = self.visit(arg)

        node.iter = self.unpackSequence(node.iter)
        for i, item in enumerate(node.ifs):
            if isinstance(item, ast.AST):
                node.ifs[i] = self.visit(item)

        ast.fix_missing_locations(node)
        return node


    def visit_Attribute(self, node):
    #     """Converts attribute access to a function call.
    #
    #     'foo.bar' becomes '_getattr(foo, "bar")'.
    #
    #     Also prevents augmented assignment of attributes, which would
    #     be difficult to support correctly.
    #     """
    #     assert(isinstance(node, ast.Attribute))
        self.checkAttrName(node)
        node = ast.Call(_getattr_name,
                [node.value, ast.Str(node.attr)], [], None, None)
        ast.fix_missing_locations(node)
        return node

    def visit_Subscript(self, node):
        """Checks all kinds of subscripts.

        This prevented in Augassgin
        'foo[bar] += baz' is disallowed.

        Change all 'foo[bar]' to '_getitem(foo, bar)':
        'a = foo[bar, baz]' becomes 'a = _getitem(foo, (bar, baz))'.
        'a = foo[bar]' becomes 'a = _getitem(foo, bar)'.
        'a = foo[bar:baz]' becomes 'a = _getitem(foo, slice(bar, baz))'.
        'a = foo[:baz]' becomes 'a = _getitem(foo, slice(None, baz))'.
        'a = foo[bar:]' becomes 'a = _getitem(foo, slice(bar, None))'.

        Not include the below:
        'del foo[bar]' becomes 'del _write(foo)[bar]'.
        'foo[bar] = a' becomes '_write(foo)[bar] = a'.

        The _write function returns a security proxy.
        """
        # convert the 'foo[bar]' to '_getitem(foo, bar)' by default.
        if isinstance(node.slice, ast.Index):
            new_node = ast.copy_location(ast.Call(_getitem_name,
                        [
                            node.value,
                            node.slice.value
                        ],
                    [], None, None), node)
            ast.fix_missing_locations(new_node)
            return new_node
        elif isinstance(node.slice, ast.Slice):
            lower = node.slice.lower
            upper = node.slice.upper
            step = node.slice.step
            new_node = ast.copy_location(ast.Call(_getitem_name,
                    [
                        node.value,
                        ast.Call(ast.Name('slice', ast.Load()),
                            [
                                lower if lower else _None_const ,
                                upper if upper else _None_const ,
                                step if step else _None_const ,
                            ], [], None, None),
                    ],
                    [], None, None), node)
            # return new_node
            ast.fix_missing_locations(new_node)
            return new_node
        return node

    def visit_Exec(self, node):
        self.error(node, 'Exec statements are not allowed.')

    def visit_Yield(self, node):
        self.error(node, 'Yield statements are not allowed.')

    def visit_ClassDef(self, node):
        """Checks the name of a class using checkName().

        Should classes be allowed at all?  They don't cause security
        issues, but they aren't very useful either since untrusted
        code can't assign instance attributes.
        """
        self.checkName(node, node.name)
        return node

    def visit_Module(self, node):
        """Adds prep code at module scope.

        Zope doesn't make use of this.  The body of Python scripts is
        always at function scope.
        """
        self.generic_visit(node)
        self.prepBody(node.body)
        node.lineno = 0
        node.col_offset = 0
        ast.fix_missing_locations(node)
        return node

    def visit_Delete(self, node):
        """
        'del foo[bar]' becomes 'del _write(foo)[bar]'
        """
        # the foo[bar] will convert to '_getitem(foo, bar)' first
        # so here need to convert the '_getitem(foo, bar)' to '_write(foo)[bar]'
        # please let me know if you have a better idea. Boxiang.
        for i, target in enumerate(node.targets):
            if isinstance(target, ast.Subscript):
                node.targets[i].value = ast.Call(_write_name, [target.value,], [], None, None)
        ast.fix_missing_locations(node)
        return node


    def visit_With(self, node):
        """Checks and mutates the attribute access in with statement.

        'with x as x.y' becomes 'with x as _write(x).y'

        The _write function returns a security proxy.
        """
        if isinstance(node.optional_vars, ast.Name):
            self.checkName(node, node.optional_vars.id)
        if isinstance(node.optional_vars, ast.Attribute):
            self.checkAttrName(node.optional_vars)
            node.optional_vars.value = ast.Call(_write_name, [node.optional_vars.value], [], None, None)

        node.context_expr = self.visit(node.context_expr)
        for item in node.body:
            self.visit(item)
        ast.fix_missing_locations(node)
        return node

    def unpackSequence(self, node):
        if isinstance(node, ast.Tuple) or isinstance(node, ast.List):
            for i, item in enumerate(node.elts):
                node.elts[i] = self.unpackSequence(item)
            node = ast.Call(_getiter_name, [node], [], None, None)
        return node

    def visit_Assign(self, node):
        """Checks and mutates some assignment.

        '
        'a.b = c' becomes '_write(a).b = c'.
        'foo[bar] = a' becomes '_write(foo)[bar] = a'

        The _write function returns a security proxy.
        """
        # Change the left side to '_write(a).b = c' in below.
        for i, target in enumerate(node.targets):
            if isinstance(target, ast.Name):
                self.checkName(node, target.id)
            elif isinstance(target, ast.Attribute):
                self.checkAttrName(target)
                node.targets[i].value = ast.Call(_write_name, [node.targets[i].value], [], None, None)
            elif isinstance(target, ast.Subscript):
                node.targets[i].value = ast.Call(_write_name, [node.targets[i].value], [], None, None)

        node.value = self.visit(node.value)

        # The purpose of this just want to call `_getiter` to generate a list from sequence.
        # The check is in unpackSequence, TODO: duplicate with the previous statement?
        # If the node.targets is not a tuple, do not rewrite the UNPACK_SEQUENCE, this is for no_unpack
        # test in before_and_after.py
        if isinstance(node.targets[0], ast.Tuple):
            node.value = self.unpackSequence(node.value)
    #     # change the right side
    #
    #     # For 'foo[bar] = baz'
    #     #     elif isinstance(node.targets[0], ast.Attribute):
        ast.fix_missing_locations(node)
        return node

    def visit_AugAssign(self, node):
        """Makes a note that augmented assignment is in use.

        Note that although augmented assignment of attributes and
        subscripts is disallowed, augmented assignment of names (such
        as 'n += 1') is allowed.

        This could be a problem if untrusted code got access to a
        mutable database object that supports augmented assignment.
        """
        # XXX: This error originally defined in visitGetattr.
        # But the ast.AST is different than compiler.ast.Node
        # Which there has no Getatr node. The corresponding Attribute
        # has nothing related with augment assign.
        # So the parser will try to convert all foo.bar to '_getattr(foo, "bar")
        # first, then enter this function to process augment operation.
        # In this situation, we need to check ast.Call rather than ast.Attribute.
        if isinstance(node.target, ast.Subscript):
            self.error(node, 'Augment assignment of '
                    'object items and slices is not allowed.')
        elif isinstance(node.target, ast.Attribute):
            self.error(node, 'Augmented assignment of '
                    'attributes is not allowed.')
        if isinstance(node.target, ast.Name):
            # 'n += bar' becomes 'n = _inplace_var('+=', n, bar)'
            # TODO, may contians serious problem. Do we should use ast.Name???
            new_node = ast.Assign([node.target], ast.Call(_inplacevar_name, [ast.Name(node.target.id, ast.Load()), node.value], [], None, None))
            if isinstance(node.op, ast.Add):
                new_node.value.args.insert(0, ast.Str('+='))
            elif isinstance(node.op, ast.Sub):
                new_node.value.args.insert(0, ast.Str('-='))
            elif isinstance(node.op, ast.Mult):
                new_node.value.args.insert(0, ast.Str('*='))
            elif isinstance(node.op, ast.Div):
                new_node.value.args.insert(0, ast.Str('/='))
            elif isinstance(node.op, ast.Mod):
                new_node.value.args.insert(0, ast.Str('%='))
            elif isinstance(node.op, ast.Pow):
                new_node.value.args.insert(0, ast.Str('**='))
            elif isinstance(node.op, ast.RShift):
                new_node.value.args.insert(0, ast.Str('>>='))
            elif isinstance(node.op, ast.LShift):
                new_node.value.args.insert(0, ast.Str('<<='))
            elif isinstance(node.op, ast.BitAnd):
                new_node.value.args.insert(0, ast.Str('&='))
            elif isinstance(node.op, ast.BitXor):
                new_node.value.args.insert(0, ast.Str('^='))
            elif isinstance(node.op, ast.BitOr):
                new_node.value.args.insert(0, ast.Str('|='))
            ast.fix_missing_locations(new_node)
            return new_node
        ast.fix_missing_locations(node)
        return node

    def visit_Import(self, node):
        """Checks names imported using checkName()."""
        for alias in node.names:
            self.checkName(node, alias.name)
            if alias.asname:
                self.checkName(node, alias.asname)
        return node

    visit_ImportFrom = visit_Import