爬取糗事百科段子(xpath)
import requests
from lxml import etree
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
} #加入请求头
url = 'https://www.qiushibaike.com/text/'
res = requests.get(url,headers = headers)
selector = etree.HTML(res.text)
url_infos = selector.xpath('//div[@class="article block untagged mb15 typs_hot"]')
for url_info in url_infos:
id = url_info.xpath('div[1]/a[2]/h2/text()')[0].strip("\n") #id
age = url_info.xpath('div[1]/div/text()')[0] #年龄
content = url_info.xpath('a[1]/div/span/text()')[0].strip("\n") #内容
like = url_info.xpath('div[2]/span[1]/i/text()')[0] #点赞
comment = url_info.xpath('div[2]/span[2]/a/i/text()')[0] #评论数
print("网名:"+ id)
print("年龄:"+age)
print(content)
print("点赞:"+ like+'\t'+"评论数:"+comment+'\n')