另一个答案只考虑没有缩进的行包含全局变量声明。虽然这有效地排除了函数和类定义的主体,但它错过了在if
声明中定义的全局变量。这样的声明并不少见,例如,对于因使用的操作系统而异的常量等。
正如在问题下的评论中所争论的那样,任何静态分析都必然是不完美的,因为 Python 的动态特性使得无法完全准确地决定哪些变量是全局定义的,除非程序实际执行。
因此,以下也只是一个近似值。但是,它确实考虑if
了上面列出的 s 内部的全局变量定义。由于这最好通过实际分析源文件的解析树来完成,因此 bash 脚本不再是合适的选择。ast
不过,方便的是,Python 本身允许通过这里使用的包轻松访问解析树。
from argparse import ArgumentParser, SUPPRESS
import ast
from collections import Counter
from re import match as re_startswith
import os
import subprocess
import sys
# extract variable information from assign statements
def process_assign(target, results):
if isinstance(target, ast.Name):
results.append((target.lineno, target.col_offset, target.id))
elif isinstance(target, ast.Tuple):
for child in ast.iter_child_nodes(target):
process_assign(child, results)
# extract variable information from delete statements
def process_delete(target, results):
if isinstance(target, ast.Name):
results[:] = filter(lambda t: t[2] != target.id, results)
elif isinstance(target, ast.Tuple):
for child in ast.iter_child_nodes(target):
process_delete(child, results)
# recursively walk the parse tree of the source file
def process_node(node, results):
if isinstance(node, ast.Assign):
for target in node.targets:
process_assign(target, results)
elif isinstance(node, ast.Delete):
for target in node.targets:
process_delete(target, results)
elif type(node) not in [ast.FunctionDef, ast.ClassDef]:
for child in ast.iter_child_nodes(node):
process_node(child, results)
def get_arg_parser():
# create the parser to configure
parser = ArgumentParser(usage=SUPPRESS, add_help=False)
# run etags to find out about the supported command line parameters
dashlines = list(filter(lambda line: re_startswith('\\s*-', line),
subprocess.check_output(['etags', '-h'],
encoding='utf-8').split('\n')))
# ignore lines that start with a dash but don't have the right
# indentation
most_common_indent = max([(v,k) for k,v in
Counter([line.index('-') for line in dashlines]).items()])[1]
arglines = filter(lambda line: line.index('-') == most_common_indent, dashlines)
for argline in arglines:
# the various 'argline' entries contain the command line
# arguments for etags, sometimes more than one separated by
# commas.
for arg in argline.split(','):
if 'or' in arg:
arg = arg[:arg.index('or')]
if ' ' in arg or '=' in arg:
arg = arg[:min(arg.index(' ') if ' ' in arg else len(arg),
arg.index('=') if '=' in arg else len(arg))]
action='store'
else:
action='store_true'
arg = arg.strip()
if arg and not (arg == '-h' or arg == '--help'):
parser.add_argument(arg, action=action)
# we know we need files to run on
parser.add_argument('files', nargs='*', metavar='file')
# the parser is configured now to accept all of etags' arguments
return parser
if __name__ == '__main__':
# construct a parser for the command line arguments, unless
# -h/-help/--help is given in which case we just print the help
# screen
etags_args = sys.argv[1:]
if '-h' in etags_args or '-help' in etags_args or '--help' in etags_args:
unknown_args = True
else:
argparser = get_arg_parser()
known_ns, unknown_args = argparser.parse_known_args()
# if something's wrong with the command line arguments, print
# etags' help screen and exit
if unknown_args:
subprocess.run(['etags', '-h'], encoding='utf-8')
sys.exit(1)
# we base the output filename on the TAGS file name. Other than
# that, we only care about the actual filenames to parse, and all
# other command line arguments are simply passed to etags later on
tags_file = 'TAGS2' if hasattr(known_ns, 'o') is None else known_ns.o + '2'
filenames = known_ns.files
if filenames:
# TAGS file sections, one per source file
sections = []
# process all files to populate the 'sections' list
for filename in filenames:
# read source file
offsets = [0]; lines = []
offsets, lines = [0], []
with open(filename, 'r') as f:
for line in f.readlines():
offsets.append(offsets[-1] + len(bytes(line, 'utf-8')))
lines.append(line)
offsets = offsets[:-1]
# parse source file
source = ''.join(lines)
root_node = ast.parse(source, filename)
# extract global variable definitions
vardefs = []
process_node(root_node, vardefs)
# create TAGS file section
sections.append("")
for lineno, column, varname in vardefs:
line = lines[lineno-1]
offset = offsets[lineno-1]
end = line.index('=') if '=' in line else -1
sections[-1] += f"{line[:end]}\x7f{varname}\x01{lineno},{offset + column - 1}\n"
# write TAGS file
with open(tags_file, 'w') as f:
for filename, section in zip(filenames, sections):
if section:
f.write("\x0c\n")
f.write(filename)
f.write(",")
f.write(str(len(bytes(section, 'utf-8'))))
f.write("\n")
f.write(section)
f.write("\n")
# make sure etags includes the newly created file
etags_args += ['-i', tags_file]
# now run the actual etags to take care of all other definitions
try:
cp = subprocess.run(['etags'] + etags_args, encoding='utf-8')
status = cp.returncode
except:
status = 1
# if etags did not finish successfully, remove the tags_file
if status != 0:
try:
os.remove(tags_file)
except FileNotFoundError:
# nothing to be removed
pass
与另一个答案一样,此脚本旨在替代标准etags
,因为它在内部调用后者。因此它也接受所有etags
' 命令行参数(但目前不尊重-a
)。
建议使用别名修改一个 shell 的 init 文件,例如将以下行添加到~/.bashrc
:
alias etags+=python3 -u /path/to/script.py
where/path/to/script.py
是保存上述代码的文件的路径。有了这样的别名,您可以简单地调用
etags+ /path/to/file
等等