我目前的方法是启用“import x”和“from x import y”依赖捆绑。对于当前的实现,一个缺点是它会在每个使用的模块中创建方法的副本,而在代码源中,每个用法只是对内存中相同方法的引用(尽管我在这里有冲突的结果 -请参阅代码后的部分)。
/// analysis_script.py /// (为简洁起见,排除了依赖项)
import test_module
from third_level_module import z
def f():
for i in range(1,5):
test_module.g('blah string used by g')
z()
/// 驱动程序.py ///
import modutil
import analysis_script
modutil.serialize_module_with_dependencies(analysis_script)
/// modutil.py ///
import sys
import modulefinder
import os
import inspect
import marshal
def dump_module(funcfile, name, module):
functions_list = [o for o in inspect.getmembers(module) if inspect.isfunction(o[1])]
print 'module name:' + name
marshal.dump(name, funcfile)
for func in functions_list:
print func
marshal.dump(func[1].func_code, funcfile)
def serialize_module_with_dependencies(module):
python_path = os.environ['PYTHONPATH'].split(os.pathsep)
module_path = os.path.dirname(module.__file__)
#planning to search for modules only on this python path and under the current scripts working directory
#standard libraries should be expected to be installed on the target platform
search_dir = [python_path, module_path]
mf = modulefinder.ModuleFinder(search_dir)
#__file__ returns the pyc after first run
#in this case we use replace to get the py file since we need that for our call to mf.run_script
src_file = module.__file__
if '.pyc' in src_file:
src_file = src_file.replace('.pyc', '.py')
mf.run_script(src_file)
funcfile = open("functions.pickle", "wb")
dump_module(funcfile, 'sandbox', module)
for name, mod in mf.modules.iteritems():
#the sys module is included by default but has no file and we don't want it anyway, i.e. should
#be on the remote systems path. __main__ we also don't want since it should be virtual empty and
#just used to invoke this function.
if not name == 'sys' and not name == '__main__':
dump_module(funcfile, name, sys.modules[name])
funcfile.close()
/// sandbox_reader.py ///
import marshal
import types
import imp
sandbox_module = imp.new_module('sandbox')
dynamic_modules = {}
current_module = ''
with open("functions.pickle", "rb") as funcfile:
while True:
try:
code = marshal.load(funcfile)
except EOFError:
break
if isinstance(code,types.StringType):
print "module name:" + code
if code == 'sandbox':
current_module = "sandbox"
else:
current_module = imp.new_module(code)
dynamic_modules[code] = current_module
exec 'import '+code in sandbox_module.__dict__
elif isinstance(code,types.CodeType):
print "func"
if current_module == "sandbox":
func = types.FunctionType(code, sandbox_module.__dict__, code.co_name)
setattr(sandbox_module, code.co_name, func)
else:
func = types.FunctionType(code, current_module.__dict__, code.co_name)
setattr(current_module, code.co_name, func)
else:
raise Exception( "unknown type received")
#yaa! actually invoke the method
sandbox_module.f()
del sandbox_module
例如,函数图在序列化之前如下所示:
module name:sandbox
('f', <function f at 0x15e07d0>)
('z', <function z at 0x7f47d719ade8>)
module name:test_module
('g', <function g at 0x15e0758>)
('z', <function z at 0x7f47d719ade8>)
module name:third_level_module
('z', <function z at 0x7f47d719ade8>)
具体来说,查看函数 z 我们可以看到所有引用都指向同一个地址,即 0x7f47d719ade8。
在沙盒重建后的远程进程上,我们有:
print sandbox_module.z
<function z at 0x1a071b8>
print sandbox_module.third_level_module.z
<function z at 0x1a072a8>
print sandbox_module.test_module.z
<function z at 0x1a072a8>
这让我大吃一惊!我原以为这里的所有地址在重建后都是唯一的,但由于某种原因 sandbox_module.test_module.z 和 sandbox_module.third_level_module.z 有相同的地址?