from lxml import etree
import requests
headers={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
def requests_page(url):
response = requests.get(url,headers=headers).text
html = etree.HTML(response)
contants = html.xpath("//div[@class = 'col1 old-style-col1']/div")
items = {}
for contant in contants:
author = contant.xpath(".//h2/text()")[0].strip()
article = contant.xpath(".//span/text()")[0].strip()
items.update(author=author,article=article)
print(items)
def main():
url = 'https://www.qiushibaike.com/text/page/1/'
requests_page(url)
if __name__ == '__main__':
main()
【爬虫练手】糗事百科
最新推荐文章于 2021-05-10 08:00:00 发布