1.直接上代码,本爬虫实例是糗事百科网站为URL的
import requests from bs4 import BeautifulSoup url="https://www.qiushibaike.com/" #获取网站的html文档 def getHtml(): response=requests.get(url); response.encoding="utf-8"; return response.text #解析html文档对象,返回soup 对象 def parseHtml(html): soup=BeautifulSoup(html); #joke_content = soup.select('div.content')[0].get_text() joke_content = soup.find_all("div",attrs="content") for i in joke_content: print (i.get_text()) text=getHtml() joke=parseHtml(text)
2.运行结果如下所示