import requests
from bs4 import BeautifulSoup
url = 'http://www.zjipc.com/434/list.htm'
data = requests.get(url)
data.encoding = 'utf-8'
bs=BeautifulSoup(data.text,'lxml')
title=bs.select(".list_news a")
ls=bs.select(".list_bt a")
print(len(title))
count=""
for i in title:
print("标题:"+i.getText())
for i in ls:
print("链接:"+"http://www.zjipc.com"+i.get('href'))
for i in ls:
urls="http://www.zjipc.com"+i.get('href')
data2=requests.get(urls)
data2.encoding="utf-8"
bs2=BeautifulSoup(data2.text,'lxml')
title2=bs2.select(".bt")
time=bs2.select("span[frag='窗口113']")
zuozhe=bs2.select("span[frag='窗口112']")
print("##################################"+i.getText()+"#######################################")
print("标题:")
print(title2[0].getText())
print("作者:")
print(zuozhe[0].getText())
print("时间:")
print(time[0].getText())
zw=bs2.select(".Article_Content")
print("正文:")
#print(type(zw))
print(zw[0].getText())
tl=title2[0].getText()
zz=zuozhe[0].getText()
sj=time[0].getText()
zw=zw[0].getText()
ss=tl+"\n"+zz+"\n"+sj+"\n"+urls+"\n"+zw+"\n\n"
count=count+ss
with open("data.txt","w",encoding="utf-8") as f:
f.write(count)
88888888888888888888888888888888888888888888888888888888888888888888
import requests
from bs4 import BeautifulSoup
urls="
http://www.wzvtc.cn/list/21.html"
html=requests.get(urls)
html.encoding="utf-8"
bs=BeautifulSoup(html.text,'lxml')
ls=bs.select(".page_list_title")
print(ls[0].getText())
hs=bs.select(".page_list_title a")
count=""
for i in range(1,len(hs),1):
htmls="
http://www.wzvtc.cn"+hs[i].get('href')
print(htmls)
url=htmls
htm11=requests.get(url)
htm11.encoding="utf-8"
br=BeautifulSoup(htm11.text,'lxml')
bt=br.select("#ShowArticle_title")
print(len(bt))
sj=br.select("#ShowArticle_type")
zw=br.select("#ShowArticle_Content")
print("=======标题=======")
print(bt[0].string)
print("=======时间=======")
print(sj[0].getText())
print("=======正文=======")
print(zw[0].getText())
ss=bt[0].string+"\n"+sj[0].getText()+"\n"+zw[0].getText()+"\n"+"==========================="+"\n"
count=count+ss
with open("big.txt","w",encoding="utf-8") as f:
f.write(count)
8888888888888888888888888888888888888888888888888888888888888
import requests
from bs4` import BeautifulSoup
urls="http://www.zjiet.edu.cn/108/list.htm"
html=requests.get(urls)
html.encoding="utf-8"
bs=BeautifulSoup(html.text,'lxml')
ls=bs.select(".tongzhinrx")
print(ls[0].getText())
hs=bs.select(".tongzhinrx a")
count=""
for i in range(1,len(hs),2):
htmls="http://www.zjiet.edu.cn"+hs[i].get('href')
print(htmls)
url=htmls
htm11=requests.get(url)
htm11.encoding="utf-8"
br=BeautifulSoup(htm11.text,'lxml')
bt=br.select(".biaoti h1")
print(len(bt))
sj=br.select(".jiathis_txt")
zw=br.select(".wp_articlecontent")
print("=======标题=======")
print(bt[0].string)
print("=======时间=======")
print(sj[0].getText())
print("=======正文=======")
print(zw[0].getText())
ss=bt[0].string+"\n"+sj[0].getText()+"\n"+zw[0].getText()+"\n"+"==========================="+"\n"
count=count+ss
with open("big.txt","w",encoding="utf-8") as f:
f.write(count)
8888888888888888888888888888888888888888888888888888888888888
urls="
http://www.zjitc.net"+hr[i].get('href').lstrip('..')
urls="
http://www.zjtie.edu.cn/"+hr.lstrip("../..")
python12.26浙江高职院校新闻爬取实战
最新推荐文章于 2024-08-02 10:29:40 发布