1

我在对将类型引用作为成员的类进行 YAML 序列化时遇到问题。我正在使用 ruamel.yaml 的安全加载程序。

我从 REPL 提示符运行了以下所有内容(以获取多个错误)。

初始化:

import sys
from ruamel.yaml import YAML, yaml_object

Y = YAML(typ="safe",pure=True)

# ==============

@yaml_object(Y)
class A(object):
    """Object I want to serialize"""
    yaml_tag = "!Aclass"
    def __init__(self, type):
        self.type = type
    def f(self):
        return self.type()
    pass

class T1(object):
    """This will be referenced."""
    pass

@yaml_object(Y)
class T2(object):
    """Another referenced object"""
    pass

class T3(object):
    """Yet another try"""
    pass
Y.register_class(T3.__class__)

导致失败的代码:

Y.dump(A(T1), sys.stdout)
Y.dump(A(T2), sys.stdout)
Y.dump(A(T3), sys.stdout)
Y.dump(A(int), sys.stdout)

这输出(只有最后几行回溯):

ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T1' objects>
ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T2' objects>
ruamel.yaml.representer.RepresenterError: cannot represent an object: <attribute '__dict__' of 'T3' objects>
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__abs__' of 'int' objects>

任何让我(安全地)唯一保存类型的解决方案(我需要生成该类型的对象并检查传入的对象是否属于某种类型)都将不胜感激。生成我需要的类型的函数或类也会有同样的问题,即不可序列化。


PS我还可能发现了一个错误,由于某种原因,解析器会根据是否(尝试)序列化相同的有效参数而具有不同的行为。

Y.dump(A(str), sys.stdout)
Y.dump(A(str), sys.stdout)
Y.dump(A(str), sys.stdout)
Y.dump(A(str), sys.stdout)

输出:

>>> Y.dump(A(str), sys.stdout)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 352, in dump
    return self.dump_all([data], stream, _kw, transform=transform)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 383, in dump_all
    self.representer.represent(data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 73, in represent
    node = self.represent_data(data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 101, in represent_data
    node = self.yaml_representers[data_types[0]](self, data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 552, in t_y
    tag, data, cls, flow_style=representer.default_flow_style)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 371, in represent_yaml_object
    return self.represent_mapping(tag, state, flow_style=flow_style)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 206, in represent_mapping
    node_value = self.represent_data(item_value)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 101, in represent_data
    node = self.yaml_representers[data_types[0]](self, data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\main.py", line 492, in t_y
    tag, data, cls, flow_style=representer.default_flow_style)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 371, in represent_yaml_object
    return self.represent_mapping(tag, state, flow_style=flow_style)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 206, in represent_mapping
    node_value = self.represent_data(item_value)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 111, in represent_data
    node = self.yaml_representers[None](self, data)
  File "C:\Program Files\Anaconda3\lib\site-packages\ruamel\yaml\representer.py", line 375, in represent_undefined
    raise RepresenterError("cannot represent an object: %s" % data)
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__add__' of 'str' objects>
>>> Y.dump(A(str), sys.stdout)
!Aclass
type: !type {}
>>> Y.dump(A(str), sys.stdout)
Traceback (most recent call last):
# same traceback here
ruamel.yaml.representer.RepresenterError: cannot represent an object: <slot wrapper '__add__' of 'str' objects>
>>> Y.dump(A(str), sys.stdout)
!Aclass
type: !type {}
>>> 
4

2 回答 2

1

YAML 期望转储对象,并最终通过写出标量字符串来实现。T1is 不是一个对象(也不是T2or T3),这就是问题所在。您可以尝试将每个类引用到一个对象中并在这些对象上使用标签,但 IMO 只会使事情复杂化。

最终这一切都归结为获得一个标量表示,即类的字符串表示到文件中,因此您不妨A()直接转储字符串表示并将其读回:

import sys
from ruamel.yaml import YAML, yaml_object
from ruamel.yaml.compat import StringIO
from ruamel.yaml.scalarstring import DoubleQuotedScalarString


Y = YAML(typ="safe", pure=True)

# ==============

@yaml_object(Y)
class A(object):
    """Object I want to serialize"""
    yaml_tag = "!Aclass"
    def __init__(self, type):
        self.type = type  #.__class__.__name__

    @classmethod
    def to_yaml(cls, representer, node):
        return representer.represent_scalar(
            cls.yaml_tag, u'{}'.format(node.type.__name__)
        )

    @classmethod
    def from_yaml(cls, constructor, node):
        if '.' in node.value:  # in some other module
            m, n = node.value.rsplit('.', 1)
            return cls(getattr(sys.modules[m], n))
        else:
            return cls(globals()[node.value])


class T1(object):
    """This will be referenced."""
    pass


@yaml_object(Y)
class T2(object):
    """Another referenced object"""
    pass


class T3(object):
    """Yet another try"""
    pass
Y.register_class(T3)


for t in T1, T2, T3, DoubleQuotedScalarString:
    print('----------------------')
    x = StringIO()
    s = A(t)
    print('s', s.type)
    Y.dump(s, x)
    print(x.getvalue())

    d = Y.load(x.getvalue())
    print('d', d.type)

这使:

----------------------
s <class '__main__.T1'>
!Aclass T1
...

d <class '__main__.T1'>
----------------------
s <class '__main__.T2'>
!Aclass T2
...

d <class '__main__.T2'>
----------------------
s <class '__main__.T3'>
!Aclass T3
...

d <class '__main__.T3'>
----------------------
s <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'>
!Aclass DoubleQuotedScalarString
...

d <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'>

如果还有其他属性A()需要转储/加载,您应该创建一个字典(使用字符串转换.type)并转储/加载它。

我不认为你发现了一个真正的错误,但你在错误后继续操作会产生副作用:Y对象(及其组件)处于未定义状态。捕获错误后不应重用YAML()实例。这应该在文档中更清楚。所以如果你想try/except在 for 循环中做 a ,你应该Y = YAML(typ='safe', pure=True)在部件内移动try

于 2017-08-20T09:52:48.717 回答
0

添加到Anthon 的答案中,我开始修改 A.from_yaml 以更安全,尽管我还没有完成 _check_registered() 的所有案例。这个想法是加载 Y 允许加载实例的所有类型,并阻止所有其他类型。考虑这是一个 WIP:

import sys
from ruamel.yaml import YAML, yaml_object
from ruamel.yaml.compat import StringIO
from ruamel.yaml.scalarstring import DoubleQuotedScalarString


Y = YAML(typ="safe", pure=True)

# ==============

@yaml_object(Y)
class A(object):
    """Object I want to serialize"""
    yaml_tag = "!Aclass"
    def __init__(self, type):
        self.type = type  #.__class__.__name__

    @classmethod
    def to_yaml(cls, representer, node):
        return representer.represent_scalar(
            cls.yaml_tag, u'{}'.format(node.type.__name__)
        )

    @classmethod
    def from_yaml(cls, constructor, node):
        if '.' in node.value:  # in some other module
            m, n = node.value.rsplit('.', 1)
            t = getattr(sys.modules[m], n)
        else:
            t = globals()[node.value]
        cls._check_registered(t,constructor, node)
        return cls(t)

    @classmethod
    def _check_registered(cls, t, constructor, node):
        # Check if type "t" is registered in "constr"
        # Note: only a very basic check, 
        # and ideally should be made more secure

        if hasattr(t,"yaml_tag"):
            if t.yaml_tag in constructor.yaml_constructors: 

                return
            raise Exception("Error: Tag not registered!")
        else:
            #
            raise Exception("Error: No attribute 'yaml_tag'!")
        pass

    pass

class T1(object):
    """This will be referenced."""
    yaml_tag = u"!T1"
    pass


@yaml_object(Y)
class T2(object):
    """Another referenced object"""
    yaml_tag = u"!T2"

    def __init__(self):
        print("Initializing...")
        pass
    pass

class T2_bad(object):
    """Malicious class impersonating T2"""
    # Note: It's not registered
    yaml_tag = u"!T2"

    def __init__(self):
        print("Evil code here!")
        pass

    pass


class T3(object):
    """Yet another try"""
    yaml_tag = u"!T3"
    pass
Y.register_class(T3)



for t in T1, T2, T2_bad, T3, DoubleQuotedScalarString:
    try:
        print('----------------------')
        x = StringIO()
        s = A(t)
        print('s', s.type)
        Y.dump(s, x)
        print(x.getvalue())
        d = Y.load(x.getvalue())
        print('d', d.type)
        d.type()
    except Exception as e:
        print(e)
        continue
    pass

这将返回:

----------------------
s <class '__main__.T1'>
!Aclass T1
...

Error: Tag not registered!
----------------------
s <class '__main__.T2'>
!Aclass T2
...

d <class '__main__.T2'>
Initializing...
<__main__.T2 object at 0x0000015B8EC82F60>
----------------------
s <class '__main__.T2_bad'>
!Aclass T2_bad
...

d <class '__main__.T2_bad'>
Evil code here!
<__main__.T2_bad object at 0x0000015B8EC82EF0>
----------------------
s <class '__main__.T3'>
!Aclass T3
...

d <class '__main__.T3'>
<__main__.T3 object at 0x0000015B8EC82E10>
----------------------
s <class 'ruamel.yaml.scalarstring.DoubleQuotedScalarString'>
!Aclass DoubleQuotedScalarString
...

Error: No attribute 'yaml_tag'!

可以看出,它仍然不安全(运行“邪恶代码”),也不允许未定义 yaml_tag 的类型。随意修改以解决此问题。

于 2017-08-20T14:28:05.477 回答