利用python 爬取豆瓣即将上映的电影

仅学习交流

Upcoming.py

import re
import requests
from doubanapi import findmovie
from bs4 import BeautifulSoup
url = "https://movie.douban.com/cinema/later/taian/" 
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"}
res = requests.get(url=url, headers=headers)
res.encoding = 'utf-8'
soup = BeautifulSoup(res.text,'lxml')
movies = []
for sp in soup.find_all('div',class_= re.compile('item mod.*')):
    weber = sp.div.h3.a.get('href')
    title = sp.div.h3.a.string
    sp2 = sp.find_all('li')
    mtime = sp2[0].string
    mtype = sp2[1].string
    mlocat = sp2[2].string
    mpeople = sp2[3].string
    mid = re.sub("\D", "", weber)
    lits = findmovie(mid)
    dire = lits[0]
    actr = lits[1]
    content = lits[2]
    movie = {
        "标题":title,
        "时间":mtime,
        "类型":mtype,
        "地区":mlocat,
        "热度":mpeople,
        "导演":dire,
        "演员":actr,
        "简介":content,
        "详细信息":weber
    }
    print(movie)
    movies.append(movie)
print(movies)

doubanapi.py(调用api对电影信息进行补充)

import json
import requests

def findmovie(urlid):
    baseurl = 'https://movie.querydata.org/api?id='
    url = baseurl + urlid
    res = requests.get(url)
    res.encoding = 'utf-8'
    lis = res.text
    lis = json.loads(lis)
    print(res.text)        #测试
    content = lis['data'][0]['description']
    director = lis['director']
    actors = lis['actor']
    sumdire = ""
    sumact = ""
    for dire in director:
        sumdire += dire["data"][0]['name'] + " "
    for act in actors:
        sumact += act["data"][0]['name'] + " "
    sumer = {}
    sumer[0] = sumdire
    sumer[1] = sumact
    sumer[2] = content
    return sumer

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值