from bs4 import BeautifulSoup
info =[]
with open('D:/BaiduYunDownload/Python实战::四周实现爬虫系统/课程资料/课程源码及作业参考答案/week1/1_2/1_2code_of_video/web/new_index.html', 'r') as some_data:
Soup = BeautifulSoup(some_data, 'lxml')
image = Soup.select('body > div.main-content > ul > li > img')
somepagraph = Soup.select('body > div.main-content > ul > li > div.article-info > h3 > a')
cate = Soup.select('body > div.main-content > ul > li > div.article-info > p.meta-info')
score = Soup.select('body > div.main-content > ul > li > div.rate > span')
for imageone, somepagraphone, cateone, scoreone in zip(image, somepagraph, cate, score):
data = {
'image':imageone.get('src'),
#对图片的爬取
'somepagraph':somepagraphone.get_text(),
'cate':list(cateone.stripped_strings),
#对一对多的爬取
'score': scoreone.get_text()
}
info.append(data)
for i in info:
if float(i['score']) > 3:
print(i['somepagraph'],':', i['score'])
爬取本地网页评分大于3的代码
最新推荐文章于 2022-05-27 22:22:00 发布