我有一份带有 .Doc 扩展名的简历。如何从该简历中提取文本数据。
import subprocess
import sys
def open_doc_file(file_name):
try:
process = subprocess.Popen(
['antiword', file_name],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
)
except (
FileNotFoundError,
ValueError,
subprocess.TimeoutExpired,
subprocess.SubprocessError,
) as err:
return (None, str(err))
else:
stdout, stderr = process.communicate()
return (stdout.strip(), stderr.strip())
for file_name in file_names:
if file_name.endswith('.doc'):
document = open_doc_file(file_name)
print(document)
得到的输出是 (None, '[WinError 2] The system cannot find the file specified')