如果您要打印 pandas DataFrame,这可能会有所帮助
from typing import Callable
import pandas as pd
import weasyprint as wp
def html_basic(df: pd.DataFrame) -> str:
# Using df.style.render outputs an id in every cell,
# whilst using df.to_html doesn't.
return df.style.render()
def write_pdf_autofit(df: pd.DataFrame,
preamble: str,
fn_df_to_html: Callable[[pd.DataFrame], str]=html_basic
) -> bytes:
template = f"""<html><body>{preamble}{{table}}</body></html>"""
# Render on a very long page so that there's no pagination.
# Width doesn't matter, because overflow is allowed on width.
mycss = wp.CSS(string=(
"@page longpage {\n"
" size: 210mm 10000mm;\n"
"}"
"body {\n"
" page: longpage;\n"
"}\n"
))
# Create a copy of the dataframe with a dummy final column,
# so that we can get the position of the left side of the
# dummy column which is the right side of the final real column.
# Then do a test render to find the positions of stuff.
df_tmp = df.copy()
df_tmp['x'] = np.nan
test_html = template.format(table=fn_df_to_html(df_tmp))
test_render = wp.HTML(string=test_html).render(stylesheets=[mycss])
test_page1: wp.Page = test_render.pages[0]
# I'm not sure why only need to subtract one margin,
# but seems to work.
printable_width = test_page1.width - test_page1._page_box.margin_left
printable_height = 11.7 * 96 - test_page1._page_box.margin_top
# All the cells in the html rendered DataFrame
# have an id so are anchors, so just find the
# max x and y from all the anchors.
max_x, max_y = map(max, *test_page1.anchors.values())
zoom_pct = 1
if max_x > printable_width or max_y > printable_height:
zoom_pct = min([printable_width / max_x,
printable_height / max_y])
# Increase the page size to fit the table, then
# we will zoom out the write_pdf to fit standard page size.
# A4 = 210mm x 297mm
mycss = wp.CSS(string=(
"@page scaled {\n"
f" size: {210 / zoom_pct}mm {297 / zoom_pct}mm;\n"
"}"
"body {\n"
" page: scaled;\n"
"}\n"
))
html = template.format(table=fn_df_to_html(df))
pdf_bytes = wp.HTML(string=html).write_pdf(zoom=zoom_pct,
stylesheets=[mycss])
return pdf_bytes
if __name__ == "__main__":
import numpy as np
DF = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list('ABCD'))
with open(r'c:\temp\x.pdf', 'wb') as f:
f.write(write_pdf_autofit(DF, ""))