from lxml import etree
import requests, json
from threading import Thread
from time import time
class Douban():
def __init__(self):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.157 Safari/537.36'
}
def response_url_list(self, url_list):
response = requests.get(url_list, self.headers,)
result = json.loads(response.text)['subjects']
res_url = []
for i in result:
url_list_name = i['url']
res_url.append(url_list_name)
return res_url
def resp_get(self, j):
response = requests.get(j, self.headers)
res_html = etree.HTML(response.text)
restult = {}
restult['电影名'] = res_html.xpath("//div[@id='wrapper']//h1/span/text()")[0]
restult['演员'] = res_html.xpath('//div[@id="info"]//span/span/a/text()')
restult['评分'] = res_html.xpath("//div[@class='rating_wrap clearbox']/div/strong/text()")[0]
restult['简介'] = res_html.xpath("//div[@class='related-info']/div/span/text()")[0].strip()
print(restult)
if __name__ == '__main__':
douban = Douban()
st_time = time()
print('提示:','热门','最新','经典' ,'可播放','豆瓣高分' ,'冷门佳片','华语','欧美','韩国','日本','动作','喜剧','爱情','科幻','悬疑','恐怖','成长')
a = input('请输入需要查询类别:')
b = int(input('请输入需要查询页数:'))
url_list = 'https://movie.douban.com/j/search_subjects?type=movie&tag={}&sort=recommend&page_limit=20&page_start={}'.format(a,(b-1)* 20)
print('正在解析列表页{}'.format(b))
list_url_resp = douban.response_url_list(url_list)
t_list = []
for j in list_url_resp:
t = Thread(target=douban.resp_get, args=(j,))
t.start()
t_list.append(t)
[i.join() for i in t_list]
print('多任务时,共耗时:{}'.format(time() - st_time))