import requests from bs4 import BeautifulSoup import csv # 豆瓣电影榜单的URL url = 'https://movie.douban.com/chart' # 设置请求头,伪装成浏览器访问 header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55', 'Cookie': '' } # 发送GET请求 response = requests.get(url, headers=header) print(response.status_code) # 检查请求是否成功 if response.status_code == 200: # 使用BeautifulSoup解析HTML内容 soup = BeautifulSoup(response.content, 'html.parser') # 选择电影标题的元素,这里假设电影标题在具有'pl2 a'类的<a>标签中 movie_info = soup.select('.item') movices = [] for each in movie_info: movie = {} title = each.select('.pl2 a')[0].text.split('/')[0].strip() try: rating = each.select_one('.rating_nums').text except AttributeError: rating = "暂无评分" try: count = int(each.select('.star .pl')[0].text[1:-4]) except: pass print(title) print(rating) print(count) movie['title'] = title movie['rating'] = rating movie['count'] = count movices.append(movie) else: print(f"请求失败,状态码:{response.status_code}") print(movices) # 数据写入 with open("movies.csv", "w", newline='', encoding='utf-8') as csvfile: writer = csv.writer(csvfile) writer.writerow(["片名", "评分", "评价人数"]) # 选择你要导出的讲师还是教授 for member in movices: writer.writerow([member['title'], member['rating'], member['count']])
豆瓣爬取排行榜电影
最新推荐文章于 2024-10-11 12:10:02 发布