仅学习交流
Upcoming.py
import re
import requests
from doubanapi import findmovie
from bs4 import BeautifulSoup
url = "https://movie.douban.com/cinema/later/taian/"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}
res = requests.get(url=url, headers=headers)
res.encoding = 'utf-8'
soup = BeautifulSoup(res.text,'lxml')
movies = []
for sp in soup.find_all('div',class_= re.compile('item mod.*')):
weber = sp.div.h3.a.get('href')
title = sp.div.h3.a.string
sp2 = sp.find_all('li')
mtime = sp2[0].string
mtype = sp2[1].string
mlocat = sp2[2].string
mpeople = sp2[3].string
mid = re.sub("\D", "", weber)
lits = findmovie(mid)
dire = lits[0]
actr = lits[1]
content = lits[2]
movie = {
"标题":title,
"时间":mtime,
"类型":mtype,
"地区":mlocat,
"热度":mpeople,
"导演":dire,
"演员":actr,
"简介":content,
"详细信息":weber
}
print(movie)
movies.append(movie)
print(movies)
doubanapi.py(调用api对电影信息进行补充)
import json
import requests
def findmovie(urlid):
baseurl = 'https://movie.querydata.org/api?id='
url = baseurl + urlid
res = requests.get(url)
res.encoding = 'utf-8'
lis = res.text
lis = json.loads(lis)
print(res.text) #测试
content = lis['data'][0]['description']
director = lis['director']
actors = lis['actor']
sumdire = ""
sumact = ""
for dire in director:
sumdire += dire["data"][0]['name'] + " "
for act in actors:
sumact += act["data"][0]['name'] + " "
sumer = {}
sumer[0] = sumdire
sumer[1] = sumact
sumer[2] = content
return sumer