经过几次迭代,我想出了以下代码:
main_name = 'your_notebook_name' # or use ipynbname to get it automatically
nb_fname = main_name + '.ipynb' # Input file
html_fname = main_name + '.html' # Output file
# Disable the ExtractOutput preprocessor. This prevents images embedded on the notebook (ie: plots) from being externally linked
config = {'ExtractOutputPreprocessor': {'enabled': False}}
# Find the HTML (with TOC) exporter
import nbconvert.exporters.exporter_locator
HTMLTOCExporter = nbconvert.exporters.exporter_locator.get_exporter('html_toc')
exporter = HTMLTOCExporter(config)
# Add a preprocessor to the exporter to remove the notebook cells with the tag 'remove_cell'
from nbconvert.preprocessors import TagRemovePreprocessor
cell_remover = TagRemovePreprocessor(remove_cell_tags={'remove_cell'})
exporter.register_preprocessor(cell_remover, True)
# Generate HTML and write it to a file
html, resources = exporter.from_filename(nb_fname)
with open(html_fname,'w') as f:
f.write(html)
加分项:在 Markdown 中嵌入 Markdown 图像![text](path_to_file)
您需要定义一个自定义预处理器并在调用之前注册它exporter.from_filename(...)
。
自定义预处理器
import re
import base64
import os
from nbconvert.preprocessors import Preprocessor
class EmbedExternalImagesPreprocessor(Preprocessor):
def preprocess_cell(self, cell, resources, cell_index):
if cell.get('cell_type','') == 'markdown':
# Find Markdown image pattern: ![alt_text](file)
embedded_images = re.findall('\!\[(.*?)\]\((.*?)\)',cell['source'])
for alt_text, file in embedded_images:
# Read each image file and encode it in base64
with open(file,'br') as f:
img_data = f.read()
b64_image = base64.b64encode(img_data).decode()
# Generate the HTML tag
_, file_extension = os.path.splitext(file)
base64html = f'<img src="data:image/{file_extension};base64,{b64_image}" alt="{alt_text}">'
# Replace Markdown pattern with HTML tag
cell['source'] = cell['source'].replace(f'![{alt_text}]({file})',base64html)
return cell, resources
注册新的预处理器
main_name = 'your_notebook_name' # or use ipynbname to get it automatically
nb_fname = main_name + '.ipynb' # Input file
html_fname = main_name + '.html' # Output file
# Disable the ExtractOutput preprocessor. This prevents images embedded on the notebook (ie: plots) from being externally linked
config = {'ExtractOutputPreprocessor': {'enabled': False}}
# Find the HTML (with TOC) exporter
import nbconvert.exporters.exporter_locator
HTMLTOCExporter = nbconvert.exporters.exporter_locator.get_exporter('html_toc')
exporter = HTMLTOCExporter(config)
# Add a preprocessor to the exporter to remove the notebook cells with the tag 'remove_cell'
from nbconvert.preprocessors import TagRemovePreprocessor, ExtractOutputPreprocessor
cell_remover = TagRemovePreprocessor(remove_cell_tags={'remove_cell'})
exporter.register_preprocessor(cell_remover, True)
exporter.register_preprocessor(EmbedExternalImagesPreprocessor(), True)
# Generate HTML and write it to a file
html, resources = exporter.from_filename(nb_fname)
with open(html_fname,'w') as f:
f.write(html)