from urllib.request import urlopen,Request
from bs4 import BeautifulSoup
import re
url="https://movie.douban.com/top250?start=50%filter="
hd = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
headers={'User-Agent':'','Referer':''}
ret = Request(url, headers=hd)
html = urlopen(ret)
bs=BeautifulSoup(html,'html.parser')
scores =bs.find("ol").find_all("span",{"class":"rating_num"})
names =bs.find("ol").find_all("ing")
f=open("d:movie.txt","w")
for name,score in zip(names,scores):
f.write(name["alt"]+" "+score.get_text()+"\n")
f.close()
python爬虫代码
最新推荐文章于 2024-07-27 12:20:46 发布