python - 在 Python 中解析用户提供的数学公式的安全方法

Question

是否有 Python 的数学表达式解析器 + 求值器？

我不是第一个问这个问题的人，但答案通常指向eval(). 例如，可以这样做：

>>> safe_list = ['math','acos', 'asin', 'atan', 'atan2', 'ceil', 'cos', 'cosh', 'degrees', 'e', 'exp', 'fabs', 'floor', 'fmod', 'frexp', 'hypot', 'ldexp', 'log', 'log10', 'modf', 'pi', 'pow', 'radians', 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'abs']
>>> safe_dict = dict([ (k, locals().get(k, None)) for k in safe_list ])
>>> s = "2+3"
>>> eval(s, {"__builtins__":None}, safe_dict)
5

但这并不安全：

>>> s_badbaduser = """
... (lambda fc=(
...     lambda n: [
...         c for c in 
...             ().__class__.__bases__[0].__subclasses__() 
...             if c.__name__ == n
...         ][0]
...     ):
...     fc("function")(
...         fc("code")(
...             0,0,0,0,"KABOOM",(),(),(),"","",0,""
...         ),{}
...     )()
... )()
... """
>>> eval(s_badbaduser, {"__builtins__":None}, safe_dict)
Segmentation fault

此外，eval用于解析和评估数学表达式对我来说似乎是错误的。

我找到了 PyMathParser，但它也在幕后使用eval并且没有更好的：

>>> import MathParser
>>> m=MathParser.PyMathParser()
>>> m.expression = s_badbaduser
>>> m.evaluate();
Segmentation fault

是否有一个库可以在不使用 Python 解析器的情况下解析和评估数学表达式？

score 20 · Accepted Answer

查看Paul McGuire 的 pyparsing。他为算术表达式编写了通用解析器和语法：

from __future__ import division
import pyparsing as pyp
import math
import operator

class NumericStringParser(object):
    '''
    Most of this code comes from the fourFn.py pyparsing example
    http://pyparsing.wikispaces.com/file/view/fourFn.py
    http://pyparsing.wikispaces.com/message/view/home/15549426
    __author__='Paul McGuire'

    All I've done is rewrap Paul McGuire's fourFn.py as a class, so I can use it
    more easily in other places.
    '''
    def pushFirst(self, strg, loc, toks ):
        self.exprStack.append( toks[0] )
    def pushUMinus(self, strg, loc, toks ):
        if toks and toks[0] == '-':
            self.exprStack.append( 'unary -' )
    def __init__(self):
        """
        expop   :: '^'
        multop  :: '*' | '/'
        addop   :: '+' | '-'
        integer :: ['+' | '-'] '0'..'9'+
        atom    :: PI | E | real | fn '(' expr ')' | '(' expr ')'
        factor  :: atom [ expop factor ]*
        term    :: factor [ multop factor ]*
        expr    :: term [ addop term ]*
        """
        point = pyp.Literal( "." )
        e     = pyp.CaselessLiteral( "E" )
        fnumber = pyp.Combine( pyp.Word( "+-"+pyp.nums, pyp.nums ) + 
                           pyp.Optional( point + pyp.Optional( pyp.Word( pyp.nums ) ) ) +
                           pyp.Optional( e + pyp.Word( "+-"+pyp.nums, pyp.nums ) ) )
        ident = pyp.Word(pyp.alphas, pyp.alphas+pyp.nums+"_$")       
        plus  = pyp.Literal( "+" )
        minus = pyp.Literal( "-" )
        mult  = pyp.Literal( "*" )
        div   = pyp.Literal( "/" )
        lpar  = pyp.Literal( "(" ).suppress()
        rpar  = pyp.Literal( ")" ).suppress()
        addop  = plus | minus
        multop = mult | div
        expop = pyp.Literal( "^" )
        pi    = pyp.CaselessLiteral( "PI" )
        expr = pyp.Forward()
        atom = ((pyp.Optional(pyp.oneOf("- +")) +
                 (pi|e|fnumber|ident+lpar+expr+rpar).setParseAction(self.pushFirst))
                | pyp.Optional(pyp.oneOf("- +")) + pyp.Group(lpar+expr+rpar)
                ).setParseAction(self.pushUMinus)       
        # by defining exponentiation as "atom [ ^ factor ]..." instead of 
        # "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-right
        # that is, 2^3^2 = 2^(3^2), not (2^3)^2.
        factor = pyp.Forward()
        factor << atom + pyp.ZeroOrMore( ( expop + factor ).setParseAction(
            self.pushFirst ) )
        term = factor + pyp.ZeroOrMore( ( multop + factor ).setParseAction(
            self.pushFirst ) )
        expr << term + pyp.ZeroOrMore( ( addop + term ).setParseAction( self.pushFirst ) )
        self.bnf = expr
        # map operator symbols to corresponding arithmetic operations
        epsilon = 1e-12
        self.opn = { "+" : operator.add,
                "-" : operator.sub,
                "*" : operator.mul,
                "/" : operator.truediv,
                "^" : operator.pow }
        self.fn  = { "sin" : math.sin,
                "cos" : math.cos,
                "tan" : math.tan,
                "abs" : abs,
                "trunc" : lambda a: int(a),
                "round" : round,
                # For Python3 compatibility, cmp replaced by ((a > 0) - (a < 0)). See
                # https://docs.python.org/3.0/whatsnew/3.0.html#ordering-comparisons
                "sgn" : lambda a: abs(a)>epsilon and ((a > 0) - (a < 0)) or 0}
        self.exprStack = []
    def evaluateStack(self, s ):
        op = s.pop()
        if op == 'unary -':
            return -self.evaluateStack( s )
        if op in "+-*/^":
            op2 = self.evaluateStack( s )
            op1 = self.evaluateStack( s )
            return self.opn[op]( op1, op2 )
        elif op == "PI":
            return math.pi # 3.1415926535
        elif op == "E":
            return math.e  # 2.718281828
        elif op in self.fn:
            return self.fn[op]( self.evaluateStack( s ) )
        elif op[0].isalpha():
            return 0
        else:
            return float( op )
    def eval(self, num_string, parseAll = True):
        self.exprStack = []
        results = self.bnf.parseString(num_string, parseAll)
        val = self.evaluateStack( self.exprStack[:] )
        return val

nsp = NumericStringParser()
print(nsp.eval('1+2'))
# 3.0

print(nsp.eval('2*3-5'))
# 1.0

score 9 · Accepted Answer

我建议使用ast.parse解析树然后将其列入白名单。

tree = ast.parse(s, mode='eval')
valid = all(isinstance(node, whitelist) for node in ast.walk(tree))
if valid:
    result = eval(compile(tree, filename='', mode='eval'),
                  {"__builtins__": None}, safe_dict)

这里whitelist可能是这样的：

whitelist = (ast.Expression, ast.Call, ast.Name, ast.Load,
             ast.BinOp, ast.UnaryOp, ast.operator, ast.unaryop, ast.cmpop,
             ast.Num,
            )

score 1 · Accepted Answer

我在这里建立了一些帖子来创建一个评估器类。还使用了我基本上重写为类对象的eval 示例。

import sys
import ast
import operator as op
import abc

import math

class IEvaluator:
    __metaclass__ = abc.ABCMeta

    @abc.abstractmethod
    def eval_expr(cls, expr, subs):  # @NoSelf
        '''IMPORTANT: this is class method, overload it with @classmethod!
        Evaluate an expression given in the expr string.

        :param expr: str. String expression.
        :param subs: dict. Dictionary with values to substitute.
        :returns: Evaluated expression result.
        '''


class Evaluator(IEvaluator):
    '''Generic evaluator for a string expression. Uses ast and operator
    modules. The expr string is parsed with ast resulting in a node tree.
    Then the node tree is recursively traversed and evaluated with operations
    from the operator module.

    :implements: IEvaluator
    '''

    @classmethod
    def _get_op(cls, node):
        '''Get the operator corresponding to the node.
        :param node: Operator node type with node.op property.
        '''
        # supported operators
        operators = {
            ast.Add: op.add,
            ast.Sub: op.sub,
            ast.Mult: op.mul,
            ast.Div: op.truediv,
            ast.Pow: op.pow,
            ast.BitXor: op.xor,
            ast.USub: op.neg
        }
        return operators[type(node.op)]

    @classmethod
    def _get_op_fun(cls, node):
        # fun_call = {'sin': math.sin, 'cos': math.cos}[node.func.id]
        fun_call = getattr(math, node.func.id)
        return fun_call

    @classmethod
    def _num_op(cls, node, subs):
        '''Return the value of the node.
        :param node: Value node type with node.n property.
        '''
        return node.n

    @classmethod
    def _bin_op(cls, node, subs):
        '''Eval the left and right nodes, and call the binary operator.
        :param node: Binary operator with node.op, node.left, and node.right
            properties.
        '''
        op = cls._get_op(node)
        left_node = cls.eval(node.left, subs)
        right_node = cls.eval(node.right, subs)
        return op(left_node, right_node)

    @classmethod
    def _unary_op(cls, node, subs):
        '''Eval the node operand and call the unary operator.
        :param node: Unary operator with node.op and node.operand properties.
        '''
        op = cls._get_op(node)
        return op(cls.eval(node.operand, subs))

    @classmethod
    def _subs_op(cls, node, subs):
        '''Return the value of the variable represented by the node.
        :param node: Name node with node.id property to identify the variable.
        '''
        try:
            return subs[node.id]
        except KeyError:
            raise TypeError(node)

    @classmethod
    def _call_op(cls, node, subs):
        arg_list = []
        for node_arg in node.args:
            arg_list.append(cls.eval(node_arg, subs))
        fun_call = cls._get_op_fun(node)
        return fun_call(*arg_list)

    @classmethod
    def eval(cls, node, subs):
        '''The node is actually a tree. The node type i.e. type(node) is:
            ast.Num, ast.BinOp, ast.UnaryOp or ast.Name.
        Depending on the node type the node will have the following properties:
            node.n - Nodes value.
            node.id - Node id corresponding to a key in the subs dictionary.
            node.op - operation node. Type of node.op identifies the operation.
                type(node.op) is one of ast.Add, ast.Sub, ast.Mult, ast.Div,
                ast.Pow, ast.BitXor, or ast.USub.
            node.left or node.right - Binary operation node needs to have links
                to left and right nodes.
            node.operand - Unary operation node needs to have an operand.

        The binary and unary operations call eval recursively.
        '''
        # The functional logic is:
        # if isinstance(node, ast.Num):  # <number>
        #     return node.n
        # elif isinstance(node, ast.BinOp):  # <left> <operator> <right>
        #     return operators[type(node.op)](eval_(node.left, subs),
        #                                     eval_(node.right, subs))
        # elif isinstance(node, ast.UnaryOp):  # <operator> <operand> e.g., -1
        #     return operators[type(node.op)](eval_(node.operand, subs))
        # else:
        #     try:
        #         return subs[node.id]
        #     except KeyError:
        #         raise TypeError(node)

        node_type = type(node)

        return {
            # Value in the expression. Leaf.
            ast.Num: cls._num_op,  # <number>

            # Bin operation with two operands.
            ast.BinOp: cls._bin_op,  # <left> <operator> <right>

            # Unary operation such as neg.
            ast.UnaryOp: cls._unary_op,  # <operator> <operand> e.g., -1

            # Sub the value for the variable. Leaf.
            ast.Name: cls._subs_op,  # <variable>

            ast.Call: cls._call_op

        }[node_type](node, subs)

    @classmethod
    def eval_expr(cls, expr, subs=None):
        '''Evaluates a string expression. The expr string is parsed with ast
        resulting in a node tree. Then the eval method is used to recursively
        traverse and evaluate the nodes. Symbolic params are taken from subs.

        :Example:
            >>> eval_expr('2^6')
            4
            >>> eval_expr('2**6')
            64
            >>> eval_expr('1 + 2*3**(4^5) / (6 + -7)')
            -5.0
            >>> eval_expr('x + y', {'x': 1, 'y': 2})
            3

        :param expr: str. String expression.
        :param subs: dict. (default: globals of current and calling stack.)
        :returns: Result of running the evaluator.

        :implements: IEvaluator.eval_expr

        '''
        # ref: https://stackoverflow.com/a/9558001/3457624
        if subs is None:
            # Get the globals
            frame = sys._getframe()
            subs = {}
            subs.update(frame.f_globals)

            if frame.f_back:
                subs.update(frame.f_back.f_globals)

        expr_tree = ast.parse(expr, mode='eval').body
        return cls.eval(expr_tree, subs)

这里有些例子：

import sympy

from eval_sympy import Evaluator

# test case...
x = sympy.Symbol('x')
y = sympy.Symbol('y')

expr = x * 2 - y ** 2
# z = expr.subs({x:1, y:2})

str_expr = str(expr)
print str_expr

x = 1
y = 2
out0 = Evaluator.eval_expr(str_expr)
print '(x, y): ({}, {})'.format(x, y)
print str_expr, ' = ', out0

subs1 = {'x': 1, 'y': 2}
out1 = Evaluator.eval_expr(str_expr, subs1)
print 'subs: ', subs1
print str_expr, ' = ', out1

sin_subs = {'x': 1, 'y': 2}
sin_out = Evaluator.eval_expr('sin(log10(x*y))', sin_subs)
print 'sin_subs: ', sin_subs
print 'sin(log10(x*y)) = ', sin_out

结果

2*x - y**2

(x, y): (1, 2)
2*x - y**2  =  -2

subs:  {'y': 2, 'x': 1}
2*x - y**2  =  -2

sin_subs:  {'y': 2, 'x': 1}
sin(log10(x*y)) =  0.296504042171

python - 在 Python 中解析用户提供的数学公式的安全方法

3 回答 3

Related

Reference