脚本运行效果:
本代码运行需要先安装requests和BeautifulSoup这两个库。
注意: 请在命令行下使用python xxx.py方式运行脚本,直接打开的话,窗口会自动关闭。
代码如下:
# -*- coding: utf-8 -*-
import urllib
import requests
import re
from bs4 import BeautifulSoup
# 读取用户输入的电影名,并转换为url编码
tip = u'请输入电影名:'
tip = tip.encode('gbk')
movie_name = raw_input(tip)
movie_name = movie_name.decode('gbk').encode('utf-8')
url_head = 'http://movie.douban.com/subject_search'
# 发送请求拿到HTML内容
payload = {'search_text': movie_name}
r = requests.get(url_head, params=payload)
# 用bs解析HTML内容
soup = BeautifulSoup(r.text, 'html.parser')
nameList = soup.select('.pl2 > a')
scoreList = soup.find_all(class_='rating_nums')
detailList = soup.select('.pl2 > p')
print('-------------------------------')
# 分别是电影名、评分、简介
name_num = len(nameList)
score_num = len(scoreList)
detail_num = len(detailList)
for i in range(name_num):
name = re.sub(r' ', '', nameList[i].get_text())
score = ''
detail = ''
if i < score_num:
score = scoreList[i].get_text()
if i < detail_num:
detail = detailList[i].get_text()
print(name)
if not score == '':
print(u'评分:【' + str(score) + u'】\n')
if not detail == '':
print(u'简介:'.encode('gbk') + detail.encode('gbk', 'ignore'))
print('-------------------------------')