写入文件
filename = ‘第’+str(i) + ‘页.html’
with open(filename, ‘w’, encoding=‘utf-8’) as f:
f.write(html)
print(f’正在爬取第{i}页’)
==========================================================================
import urllib.request
import urllib.parse
分析功能:读取页面,写入文件,主函数
def readPage(url):
headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36’
}
发请求
req = urllib.re