import requests from bs4 import BeautifulSoup import pandas as pd def getInfo(url): headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'} res=requests.get(url,headers=headers) res.encoding=res.apparent_encoding #return res.text soup=BeautifulSoup(res.text,'html.parser') lis=soup.select('.grid_view > li') for li in lis: item=li.select(".item") picUrl=li.select(".item>.pic>a>img")[0].get('src') xqUrl=li.select(".item > .info>.hd >a")[0].get('href') title=li.select(".item>.info>.hd>a")[0].text.strip('/n') rate=li.select('.item>.info>.bd>.star > span')[1].text pjnum=li.select('.item>.info>.bd>.star>span')[3].text content=li.select('.item>.info>.bd>p')[0].text.replace('/n','').replace('','') quote=li.select('.item>.info>.bd>.quote')[0].text.replace('\n','') print(picUrl,xqUrl,title,rate,pjnum,content,quote) print(len(lis)) getInfo('https://movie.douban.com/top250?start=')
日常代码分享
最新推荐文章于 2024-05-10 14:45:41 发布