python爬虫实践—纵横中文网免费小说爬取(仅供记录学习)
爬取初始页面链接: http://book.zongheng.com/store/c1/c0/b0/u0/p1/v9/s1/t0/u0/i1/ALL.html
.
python代码
.
// An highlighted block
import requests
from lxml import etree
import re
import os,time
def getHtml(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.36 Safari/537.36'}
html = requests.get(url,headers = headers)
return html.text
def getBookId(url):
bookIdList = []
text = getHtml(url)
html = etree.HTML(text)
bookUrlList = html.