.pyc
文件包含一些元数据和一个marshal
ed code
对象;加载code
对象并拆卸使用:
import dis, marshal, sys
header_sizes = [
# (size, first version this applies to)
# pyc files were introduced in 0.9.2 way, way back in June 1991.
(8, (0, 9, 2)), # 2 bytes magic number, \r\n, 4 bytes UNIX timestamp
(12, (3, 6)), # added 4 bytes file size
# bytes 4-8 are flags, meaning of 9-16 depends on what flags are set
# bit 0 not set: 9-12 timestamp, 13-16 file size
# bit 0 set: 9-16 file hash (SipHash-2-4, k0 = 4 bytes of the file, k1 = 0)
(16, (3, 7)), # inserted 4 bytes bit flag field at 4-8
# future version may add more bytes still, at which point we can extend
# this table. It is correct for Python versions up to 3.9
]
header_size = next(s for s, v in reversed(header_sizes) if sys.version_info >= v)
with open(pycfile, "rb") as f:
metadata = f.read(header_size) # first header_size bytes are metadata
code = marshal.load(f) # rest is a marshalled code object
dis.dis(code)
bisect
带有模块的演示:
>>> import bisect
>>> import dis, marshal
>>> import sys
>>> header_sizes = [(8, (0, 9, 2)), (12, (3, 6)), (16, (3, 7))]
>>> header_size = next(s for s, v in reversed(header_sizes) if sys.version_info >= v)
>>> pycfile = getattr(bisect, '__cached__', pycfile.__file__)
>>> with open(pycfile, "rb") as f:
... metadata = f.read(header_size) # first header_size bytes are metadata
... code = marshal.load(f) # rest is bytecode
...
>>> dis.dis(code)
1 0 LOAD_CONST 0 ('Bisection algorithms.')
2 STORE_NAME 0 (__doc__)
3 4 LOAD_CONST 12 ((0, None))
6 LOAD_CONST 3 (<code object insort_right at 0x10694f3a0, file "/.../lib/python3.8/bisect.py", line 3>)
8 LOAD_CONST 4 ('insort_right')
10 MAKE_FUNCTION 1 (defaults)
12 STORE_NAME 1 (insort_right)
15 14 LOAD_CONST 13 ((0, None))
16 LOAD_CONST 5 (<code object bisect_right at 0x10694f2f0, file "/.../lib/python3.8/bisect.py", line 15>)
18 LOAD_CONST 6 ('bisect_right')
20 MAKE_FUNCTION 1 (defaults)
22 STORE_NAME 2 (bisect_right)
36 24 LOAD_CONST 14 ((0, None))
26 LOAD_CONST 7 (<code object insort_left at 0x10694f240, file "/.../lib/python3.8/bisect.py", line 36>)
28 LOAD_CONST 8 ('insort_left')
30 MAKE_FUNCTION 1 (defaults)
32 STORE_NAME 3 (insort_left)
49 34 LOAD_CONST 15 ((0, None))
36 LOAD_CONST 9 (<code object bisect_left at 0x10694f190, file "/.../lib/python3.8/bisect.py", line 49>)
38 LOAD_CONST 10 ('bisect_left')
40 MAKE_FUNCTION 1 (defaults)
42 STORE_NAME 4 (bisect_left)
71 44 SETUP_FINALLY 12 (to 58)
72 46 LOAD_CONST 1 (0)
48 LOAD_CONST 11 (('*',))
50 IMPORT_NAME 5 (_bisect)
52 IMPORT_STAR
54 POP_BLOCK
56 JUMP_FORWARD 20 (to 78)
73 >> 58 DUP_TOP
60 LOAD_NAME 6 (ImportError)
62 COMPARE_OP 10 (exception match)
64 POP_JUMP_IF_FALSE 76
66 POP_TOP
68 POP_TOP
70 POP_TOP
74 72 POP_EXCEPT
74 JUMP_FORWARD 2 (to 78)
>> 76 END_FINALLY
77 >> 78 LOAD_NAME 2 (bisect_right)
80 STORE_NAME 7 (bisect)
78 82 LOAD_NAME 1 (insort_right)
84 STORE_NAME 8 (insort)
86 LOAD_CONST 2 (None)
88 RETURN_VALUE
Disassembly of <code object insort_right at 0x10694f3a0, file "/.../lib/python3.8/bisect.py", line 3>:
12 0 LOAD_GLOBAL 0 (bisect_right)
2 LOAD_FAST 0 (a)
4 LOAD_FAST 1 (x)
6 LOAD_FAST 2 (lo)
8 LOAD_FAST 3 (hi)
10 CALL_FUNCTION 4
12 STORE_FAST 2 (lo)
13 14 LOAD_FAST 0 (a)
16 LOAD_METHOD 1 (insert)
18 LOAD_FAST 2 (lo)
20 LOAD_FAST 1 (x)
22 CALL_METHOD 2
24 POP_TOP
26 LOAD_CONST 1 (None)
28 RETURN_VALUE
Disassembly of <code object bisect_right at 0x10694f2f0, file "/.../lib/python3.8/bisect.py", line 15>:
26 0 LOAD_FAST 2 (lo)
2 LOAD_CONST 1 (0)
4 COMPARE_OP 0 (<)
6 POP_JUMP_IF_FALSE 16
27 8 LOAD_GLOBAL 0 (ValueError)
10 LOAD_CONST 2 ('lo must be non-negative')
12 CALL_FUNCTION 1
14 RAISE_VARARGS 1
28 >> 16 LOAD_FAST 3 (hi)
18 LOAD_CONST 3 (None)
20 COMPARE_OP 8 (is)
22 POP_JUMP_IF_FALSE 32
29 24 LOAD_GLOBAL 1 (len)
26 LOAD_FAST 0 (a)
28 CALL_FUNCTION 1
30 STORE_FAST 3 (hi)
30 >> 32 LOAD_FAST 2 (lo)
34 LOAD_FAST 3 (hi)
36 COMPARE_OP 0 (<)
38 POP_JUMP_IF_FALSE 80
31 40 LOAD_FAST 2 (lo)
42 LOAD_FAST 3 (hi)
44 BINARY_ADD
46 LOAD_CONST 4 (2)
48 BINARY_FLOOR_DIVIDE
50 STORE_FAST 4 (mid)
32 52 LOAD_FAST 1 (x)
54 LOAD_FAST 0 (a)
56 LOAD_FAST 4 (mid)
58 BINARY_SUBSCR
60 COMPARE_OP 0 (<)
62 POP_JUMP_IF_FALSE 70
64 LOAD_FAST 4 (mid)
66 STORE_FAST 3 (hi)
68 JUMP_ABSOLUTE 32
33 >> 70 LOAD_FAST 4 (mid)
72 LOAD_CONST 5 (1)
74 BINARY_ADD
76 STORE_FAST 2 (lo)
78 JUMP_ABSOLUTE 32
34 >> 80 LOAD_FAST 2 (lo)
82 RETURN_VALUE
Disassembly of <code object insort_left at 0x10694f240, file "/.../lib/python3.8/bisect.py", line 36>:
45 0 LOAD_GLOBAL 0 (bisect_left)
2 LOAD_FAST 0 (a)
4 LOAD_FAST 1 (x)
6 LOAD_FAST 2 (lo)
8 LOAD_FAST 3 (hi)
10 CALL_FUNCTION 4
12 STORE_FAST 2 (lo)
46 14 LOAD_FAST 0 (a)
16 LOAD_METHOD 1 (insert)
18 LOAD_FAST 2 (lo)
20 LOAD_FAST 1 (x)
22 CALL_METHOD 2
24 POP_TOP
26 LOAD_CONST 1 (None)
28 RETURN_VALUE
Disassembly of <code object bisect_left at 0x10694f190, file "/.../lib/python3.8/bisect.py", line 49>:
60 0 LOAD_FAST 2 (lo)
2 LOAD_CONST 1 (0)
4 COMPARE_OP 0 (<)
6 POP_JUMP_IF_FALSE 16
61 8 LOAD_GLOBAL 0 (ValueError)
10 LOAD_CONST 2 ('lo must be non-negative')
12 CALL_FUNCTION 1
14 RAISE_VARARGS 1
62 >> 16 LOAD_FAST 3 (hi)
18 LOAD_CONST 3 (None)
20 COMPARE_OP 8 (is)
22 POP_JUMP_IF_FALSE 32
63 24 LOAD_GLOBAL 1 (len)
26 LOAD_FAST 0 (a)
28 CALL_FUNCTION 1
30 STORE_FAST 3 (hi)
64 >> 32 LOAD_FAST 2 (lo)
34 LOAD_FAST 3 (hi)
36 COMPARE_OP 0 (<)
38 POP_JUMP_IF_FALSE 80
65 40 LOAD_FAST 2 (lo)
42 LOAD_FAST 3 (hi)
44 BINARY_ADD
46 LOAD_CONST 4 (2)
48 BINARY_FLOOR_DIVIDE
50 STORE_FAST 4 (mid)
66 52 LOAD_FAST 0 (a)
54 LOAD_FAST 4 (mid)
56 BINARY_SUBSCR
58 LOAD_FAST 1 (x)
60 COMPARE_OP 0 (<)
62 POP_JUMP_IF_FALSE 74
64 LOAD_FAST 4 (mid)
66 LOAD_CONST 5 (1)
68 BINARY_ADD
70 STORE_FAST 2 (lo)
72 JUMP_ABSOLUTE 32
67 >> 74 LOAD_FAST 4 (mid)
76 STORE_FAST 3 (hi)
78 JUMP_ABSOLUTE 32
68 >> 80 LOAD_FAST 2 (lo)
82 RETURN_VALUE(
请注意,这是分离出顶级代码对象、定义模块以及函数和类的代码对象。在 Python 3.6 及更早版本中,该dis.dis()
函数不会递归。在这些版本中,如果您想分析包含的函数,则需要code
从顶级code.co_consts
数组加载嵌套对象。例如,insort_right
函数的代码对象是用 加载的LOAD_CONST 3
,因此您在该索引处查找代码对象:
>>> code.co_consts[3]
<code object insort_right at 0x10694f3a0, file "/.../lib/python3.8/bisect.py", line 3>
>>> dis.dis(code.co_consts[3])
12 0 LOAD_GLOBAL 0 (bisect_right)
2 LOAD_FAST 0 (a)
4 LOAD_FAST 1 (x)
6 LOAD_FAST 2 (lo)
8 LOAD_FAST 3 (hi)
10 CALL_FUNCTION 4
12 STORE_FAST 2 (lo)
13 14 LOAD_FAST 0 (a)
16 LOAD_METHOD 1 (insert)
18 LOAD_FAST 2 (lo)
20 LOAD_FAST 1 (x)
22 CALL_METHOD 2
24 POP_TOP
26 LOAD_CONST 1 (None)
28 RETURN_VALUE
我个人会避免尝试使用匹配的 Python 版本和模块.pyc
以外的任何内容来解析文件。marshal
该marshal
格式基本上是一种内部序列化格式,会随着 Python 本身的需要而变化。列表推导和with
语句等新功能和async
/await
需要对格式进行新添加,除了作为C 源代码之外不发布。
如果您确实走这条路,并设法通过使用模块以外的其他方式读取code
对象,则必须从代码对象的各种属性中解析出反汇编;有关如何执行此操作的详细信息,请参阅dis
模块源代码(例如,您必须使用co_firstlineno
和co_lnotab
属性来创建字节码偏移到行号映射)。