import requests
import lxml.etree as etree
def getBook(url):
response = requests.get(url)
response.encoding='utf-8'
#print(response)
# 生成选择器对象
selector = etree.HTML(response.text)
#print(selector)
#xpath 数据 注意 输出类型定位精确
bookTitles = selector.xpath('//div[@class="book-mid-info"]/h4/a/text()') #注意etree 需要解析到文本
bookhrefs = selector.xpath('//div[@class="book-mid-info"]/h4/a/@href')
authors = selector.xpath('//div[@class="book-mid-info"]/p[1]/a[1]/text()')
intros = selector.xpath('//div[@class="book-mid-info"]/p[2]/text()')
n = 1
for i in range(len(bookTitles)):
data = {
'ID':n,
'Title':bookTitles[i],
'BookLink':"https:" + bookhrefs[i],
'Author':authors[i],
'Intro':intros[i].strip()
}
print(data)
n += 1
#url = 'https://www.qidian.com/rank/collect?chn=21&page=1'
urlList = ['https://www.qidian.com/rank/collect?chn=21&page=' + str(i) for i in range(1,100)]
for url in urlList:
getBook(url)
xpath 爬取起点小说 写入mysql前端
最新推荐文章于 2024-03-24 18:00:20 发布