一 beautifulsoup的官方问文档
https://www.crummy.com/software/BeautifulSoup/bs4/doc.zh/#
二 爬取丁香园论坛的特定帖子的所有回复内容,以及回复人的信息
#/usr/bin/env python
* coding:utf-8 *,z4
from bs4 import BeautifulSoup
import requests
r=requests.get(“http://www.dxy.cn/bbs/thread/626626#626626”)
blog=r.content
soup=BeautifulSoup(blog,“html.parser”)
auths=soup.find_all(class_=“auth”)
cons=soup.find_all(class_=“postbody”)
#获取作者信息
auth=[]
for i in auths:
auth_name=i.get_text()
auth.append(auth_name)
#获取内容
con=[]
for i in cons:
con_text=i.get_text().strip()
con.append(con_text)
for i in range(len(con)):
print(“作者:{0}”.format(auth[i]))
print(“内容:{0}”.format(con[i]))