import requests
from bs4 import BeautifulSoup
import random
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"}
origin="https://movie.douban.com/top250"
page=0
for j in range(3):
url=origin+"?start="+str(page)
page+=25
res=requests.get(url,data=datas,headers=headers) #user-agent 代理
res.encoding='utf-8' #网页的编码
soup=BeautifulSoup(res.text,'lxml') #lxml不可少
sum=soup.find_all('div',{'class':'info'})
movies=[]
for node in sum:
movie={}
movie["name"]=node.find('a').find('span').text; #find 只寻找第一个
movie["rating"]=node.find('div',{'class':'star'}).find('span',{'class':'rating_num'}).text
movies.append(movie) #把各个单元组在一起
for i in movies:
print(i)
想说的都在注释里了