豆瓣电影TOP250电影名都有哪些,教你1秒搞定
很多人总以为爬虫没用, 但真正的高手告诉你,爬虫学的好,在网络上是真的可以为所欲为!
import requests
from bs4 import BeautifulSoup
def get_top250_movies():
movies = []
for page in range(10): # 总共10页
url = f"https://movie.douban.com/top250?start={page*25}"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36 SLBrowser/6.0.1.8131"
}# 这里头文件可以改一下
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
movie_list = soup.find_all("div", class_="hd")
for movie in movie_list:
title = movie.a.span.get_text()
movies.append(title)
return movies
def save_to_txt(movies):
with open("top250_movies.txt", "w", encoding="utf-8") as f:
for i, movie in enumerate(movies, 1):
f.write(f"{i}. {movie}\n")
if __name__ == "__main__":
top250_movies = get_top250_movies()
save_to_txt(top250_movies)
print("抓取信息并生成txt文件完成!")
# 一定要检查自己有没有安装相关的库
WO!结果马上出来!