爬取地址:https://www.gupiaoxuexi.com/post/1319
原码:
from requests_html import HTMLSession
import os
session = HTMLSession()
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
}
url = 'https://www.gupiaoxuexi.com/post/1319'
response = session.get(url, headers=headers)
html = response.html.html
# html = html.replace('https://www.gupiaoxuexi.com/post/', '')
# 把首页保存到本地.html文件
files = './pac/1/1.html'
with open(files, "a", encoding="UTF-8"