from bs4 import BeautifulSoup
import requests
import lxml
def request_douban(url):
try:
headers = {'user-agent': 'my-app/0.0.1'} # 伪装成浏览器,避免403,被禁止访问
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
except requests.RequestException:
return None
def main(url):
html = request_douban(url)
soup = BeautifulSoup(html, 'lxml') # 源码
moives_list = soup.find(class_="").find_all('tr')
# print(moives_list)
for item in moives_list:
# print(item)
# item_name = item.find(class_='title').string
try:
item_name = item.find(class_='nbg').get('title')
item_img = item.find('a').find('img').get('src')
item_score = item.find(class_="rating_nums").string
item_comment = item.find(class_="star clearfix").find(class_="pl").string
print('电影名:{:<10}评分:{:<5}评价人数:{:<10}封面:{}'.format(item_name, item_score, item_comment, item_img))
print('-'*50)
except:
print('Error')
if __name__ == "__main__":
url = 'https://movie.douban.com/chart'
main(url)
爬虫1_2019年豆瓣新片排行榜
最新推荐文章于 2024-04-02 00:01:19 发布