import requests from bs4 import BeautifulSoup import pandas as pd def getInfo(url): headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'} res=requests.get(url,headers=headers) res.encoding=res.apparent_encoding #return res.text soup=BeautifulSoup(res.text,'html.parser') lis=soup.select('.grid_view > li') for li in lis: item=li.select(".item") picUrl=li.select(".item>.pic>a>img")[0].get('src') xqUrl=li.select(".item > .info>.hd >a")[0].get('href') title=li.select(".item>.info>.hd>a")[0].text.strip('/n') rate=li.select('.item>.info>.bd>.star > span')[1].text pjnum=li.select('.item>.info>.bd>.star>span')[3].text content=li.select('.item>.info>.bd>p')[0].text.replace('/n','').replace('','') quote=li.select('.item>.info>.bd>.quote')[0].text.replace('\n','') print(picUrl,xqUrl,title,rate,pjnum,content,quote) print(len(lis)) getInfo('https://movie.douban.com/top250?start=')
06-29
06-29
06-29
06-29
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交