import requests
from bs4 import BeautifulSoup
import bs4
import re
def getHTMLText(url,k):
try:
if(k==0):kw={}
else: kw={'start':k,'filter':''}
r = requests.get(url,params=kw,headers={'User-Agent': 'Mozilla/5.0'})
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return""
def fillUnivList(ulist, html):#解析并写入列表
soup = BeautifulSoup(html, "html.parser")
movieList=soup.find('ol',attrs={'class':'grid_view'})#找到第一个class属性值为grid_view的ol标签
for movieLi in movieList.find_all('li'):#找到所有li标签
#得到电影名字
movieHd=movieLi.find('div',attrs={'class':'hd'})#找到第一个class属性值为hd的div标签
movieName=movieHd.find('span',attrs={'class':'title'}).getText()#找到第一个class属性值为title的span标签
#得到电影的评分
movieScore=movieLi.find('span',attrs={'class':'rating_num'}).getText()
#得到电影的评价人数
movieEval=movieLi.find('div',attrs={'class':'star'})
movieEvalNum=re.findall(r'\d+',str(movieEval))[-1]
# 得到电影的短评
movieQuote = movieLi.find('span', attrs={'class': 'inq'})
if(movieQuote):
movieQuote=movieQuote.getText()
ulist.append([movieName, movieScore, movieEvalNum, movieQuote])
else:
ulist.append([movieName, movieScore, movieEvalNum,'无'])
def printUnivList(ulist, num):
#win下 tplt = "{0:{4}^20}\t{1:^10}\t{2:^10}\t{3:{4}<10}"
tplt = "{0:{4}<20}\t{1:>10}\t{2:>10}\t{3:{4}>10}"
print(tplt.format("电影名称", "评分", "评论人数", "短评", chr(12288)))
for i in range(num):
u = ulist[i]
print(tplt.format(u[0], u[1], u[2], u[3], chr(12288)))
def main():
uinfo = []
url = "https://movie.douban.com/top250"
k=0
while k<=225:
html = getHTMLText(url,k)
k+=25
fillUnivList(uinfo, html)
printUnivList(uinfo, 5)
main()
在win系统下就是正常显示,jupter也是正常,但macos下就是串行的,如下图请问有大佬了解这个问题么?可能是有一个设置的问题,但不知道具体应该在哪设置