import requests
from bs4 import BeautifulSoup
url = 'http://www.zjipc.com/434/list.htm'
data = requests.get(url)
data.encoding = 'utf-8'
bs=BeautifulSoup(data.text,'lxml')
title=bs.select(".list_news a")
ls=bs.select(".list_bt a")
print(len(title))
count=""
for i in title:
print("标题:"+i.getText())
for i in ls:
print("链接:"+"http://www.zjipc.com"+i.get('href'))
for i in ls:
urls="http://www.zjipc.com"+i.get('href')
data2=requests.get(urls)
data2.encoding="utf-8"
bs2=BeautifulSoup(data2.text,'lxml')
title2=bs2.select(".bt")
time=bs2.select("span[frag='窗口113']")
zuozhe=bs2.select("span[frag='窗口112']")
print("##################################"+i.getText()+"#######################################")
print("标题:")
print(title2[0].getText())
print("作者:")
print(zuozhe[0].getText())
print("时间:")
print(time[0].getText())
zw=bs2.select(".Article_Content")
print("正文:")
#print(type(zw))
print(zw[0].getText())
tl=title2[0].getText()
zz=zuozhe[0].getText()
sj=time[0].getText()
zw=zw[0].getText()
ss=tl+"\n"+zz+"\n"+sj+"\n"+urls+"\n"+zw+"\n\n"
count=count+ss
with open("data.txt","w",encoding="utf-8") as f:
f.write(count)
使用【BeautifulSoup】爬取高校网站学院新闻
最新推荐文章于 2020-12-21 19:58:27 发布