from urllib import request,parse
import sys
from bs4 import BeautifulSoup
# 爬豆瓣电影+影评
search_name=input("输入搜索的内容:")
content=parse.urlencode({"q":search_name})
url='https://www.douban.com/search?%s'%content+'#more'
header={'User-Agent':'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Mobile Safari/537.36' }
req=request.Request(url,headers=header)
with request.urlopen(req) as f:
# print(f.read().decode())
soup=BeautifulSoup(f.read(),"html.parser")
for res in soup.find_all('div',class_='result'):
if res.find('a')['title'] == search_name:
url_1 = res.find('a')['href']
req_1 = request.Request(url_1, headers=header)
with request.urlopen(req_1) as f_1:
soup_1 = BeautifulSoup(f_1.read(),"html.parser")
all_reviews = soup_1.find('div',class_='bd movie-reviews').find_all('li')
for row in all_reviews:
content = row.find('p').text
url_2 = 'https://movie.douban.com/'+row.find('a')['href'][6:]
req_2 = request.Request(url_2,headers=header)
with request.urlopen(req_2) as f_2:
soup_2 = BeautifulSoup(f_2.read(), "html.parser")
content = soup_2.find('div',id='content').text
reviewer = row.find('span',class_='user-name').text
title = row.find('h3').text
info = row.find('div',class_='info').text
print("评论者:",reviewer)
print("标题:",title)
print(info,)
print("内容:")
for index,i in enumerate(content):
if index%120 == 0:
print('\n')
print(i, end='')
print('\n\n','-----------------------------------------------------------------')
break
效果图: