如果你使用 selenium 和 chromium,你不需要自己管理 cookie,你可以从 chromium 的 print 生成 pdf 页面为 pdf。你可以参考这个项目来实现它。
https://github.com/maxvst/python-selenium-chrome-html-to-pdf-converter
修改基础 > https://github.com/maxvst/python-selenium-chrome-html-to-pdf-converter/blob/master/sample/html_to_pdf_converter.py
import sys
import json, base64
def send_devtools(driver, cmd, params={}):
resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
url = driver.command_executor._url + resource
body = json.dumps({'cmd': cmd, 'params': params})
response = driver.command_executor._request('POST', url, body)
return response.get('value')
def get_pdf_from_html(driver, url, print_options={}, output_file_path="example.pdf"):
driver.get(url)
calculated_print_options = {
'landscape': False,
'displayHeaderFooter': False,
'printBackground': True,
'preferCSSPageSize': True,
}
calculated_print_options.update(print_options)
result = send_devtools(driver, "Page.printToPDF", calculated_print_options)
data = base64.b64decode(result['data'])
with open(output_file_path, "wb") as f:
f.write(data)
# example
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
url = "https://stackoverflow.com/questions/23359083/how-to-convert-webpage-into-pdf-by-using-python#"
webdriver_options = Options()
webdriver_options.add_argument("--no-sandbox")
webdriver_options.add_argument('--headless')
webdriver_options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chromedriver, options=webdriver_options)
get_pdf_from_html(driver, url)
driver.quit()