代码和配套资源已上传到百度网盘
链接:https://pan.baidu.com/s/1y3abwg-bU1Ey4LDeTjE2pw
提取码:bt2g
import requests
from bs4 import BeautifulSoup
names=[]
websites=[]
#访问网站的次数
n=0
while n<10:
m=n*25
url_change='https://movie.douban.com/top250?start='+str(m)+'&filter='
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362'
}
response=requests.get(url_change,headers=headers)
print(response)
html=response.text
print(html)
soup=BeautifulSoup(html,'html.parser')
movie_list=soup.find('ol',class_='grid_view')
movies=movie_list.find_all('li')
for movie in movies:
name=movie.find('span',class_='title').get_text()
website=movie.find('a',class_='')
names.append(name)
websites.append(website['href'])
for name in names:
print(name)
for website in websites:
print(website)
#写入电影名称的个数
x=0
#写入电影网站的个数
y=0
with open('movies.txt','w') as f:
for name in names:
x=x+1
f.write(str(x)+'. '+name+'\n')
for website in websites:
y=y+1
f.write(str(y)+'. '+website+'\n')
n=n+1