1、找到所需html内容
import requests
url='https://blog.csdn.net/weixin_43495390/article/details/86533482'
headers = {
}
response=requests.get(url,headers=headers)
从下图开始是文章内容:
保存到a.html里
2、pip install pdfkit
3、安装wkhtmltopdf软件
下载地址:https://wkhtmltopdf.org/downloads.html
看情况解压安装,记住路径
4、转换
import pdfkit
#wkhtmltopdf安装路径
config=pdfkit.configuration(wkhtmltopdf='D:\\wkhtmltox\\bin\\wkhtmltopdf.exe')
pdfkit.from_file("a.html",'b.pdf',configuration=config)
5、全部代码
import requests
import parsel
import pdfkit
def get(url):
#url='https://blog.csdn.net/weixin_43495390/article/details/86533482'
headers = {
}
response=requests.get(url,headers=headers)
#print(response.text)
sel=parsel.Selector(response.text)
article=sel.css('#mainBox > main > div.blog-content-box > article').get()#只用一个‘article’也可以
title=sel.css('h1::text').get()
html_str="""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
{article}
</body>
</html>
"""
html=html_str.format(article=article)
with open(f'{title}.html',mode='w',encoding='utf-8') as f:
f.write(html)
config=pdfkit.configuration(wkhtmltopdf='D:\\wkhtmltox\\bin\\wkhtmltopdf.exe')
pdfkit.from_file(f'{title}.html',f'{title}.pdf',configuration=config)
if __name__ == '__main__':
get(url='https://blog.csdn.net/weixin_43495390/article/details/86533482')
补充:css选择器