爬猫眼电影数据

猫眼的一些数据api

影片:
即将上映:http://api.maoyan.com/mmdb/movie/v2/list/rt/order/coming.json?ci=1&limit=12&token=
热      映:http://api.maoyan.com/mmdb/movie/v5/list/hot.json?ci=1&limit=12&token=
影片基本信息:http://api.maoyan.com/mmdb/movie/v5/1235560.json
影片视频:http://api.maoyan.com/mmdb/v1/movie/257594/videos.json?offset=0&limit=100
影片图片:https://api.maoyan.com/mmdb/movie/photos/257594/list.json
影片演员:http://api.maoyan.com/mmdb/movie/1262593/role/celebrities.json
影片公司:https://api.maoyan.com/mmdb/movie/1250661/feature/relatedCompanies.json
彩蛋信息:https://api.maoyan.com/mmdb/movie/tips/1218727/list.json
影片相关介绍:https://api.maoyan.com/mmdb/movie/246061/feature/v2/list.json 
幕后花絮:https://api.maoyan.com/mmdb/movie/246061/feature/highlights.json 
家长引导:https://api.maoyan.com/mmdb/movie/246061/feature/parentguidances.json
电影原声:https://api.maoyan.com/mmdb/movie/246061/feature/filmMusics.json  
电影技术参数:https://api.maoyan.com/mmdb/movie/246061/feature/technicals.json 
 幕后花絮(H5接口)http://m.maoyan.com/movie/246061/extras/highlights?_v_=yes 
家长引导(H5接口)http://m.maoyan.com/movie/246061/extras/parentguidances?_v_=yes  
 技术参数,出品公司(专业版接口)https://piaofang.maoyan.com/movie/246061/moresections 
电影相关影片:https://api.maoyan.com/mmdb/movie/887623/feature/relatedFilm.json
影人:
影人基本信息:https://api.maoyan.com/mmdb/v6/celebrity/3147.json
影人图片:https://api.maoyan.com/mmdb/v7/celebrity/28625/photos.json
影人搜索列表:https://api.maoyan.com/mmdb/search/integrated/keyword/list.json
影人演艺:https://api.maoyan.com/mmdb/v2/celebrity/28625/feature/yearbook.json
影人生活:https://api.maoyan.com/mmdb/v2/celebrity/28625/feature/life.json
影人社会影响:https://api.maoyan.com/mmdb/v2/celebrity/28625/feature/effect.json
影人趣闻花絮:https://api.maoyan.com/mmdb/v2/celebrity/28625/feature/trivia.json
影人语录:https://api.maoyan.com/mmdb/v2/celebrity/28625/feature/quotes.json
影人人物评价:https://api.maoyan.com/mmdb/v2/celebrity/28625/feature/evaluation.json
影人图片(H5接口)https://m.maoyan.com/celebrity/29264/photos?_v_=yes
演艺经历(专业版接口)https://piaofang.maoyan.com/celebrity/showCareer?id=28490
影人基本信息和图片(网页)https://maoyan.com/films/celebrity/18079
公司:
电影公司详情:https://api.maoyan.com/mmdb/movie/feature/company/283.json

都是json的请求,直接用requests.get就行
直接上代码

import requests
import csv
import json
import requests
import pandas as pd


def get_json(date):
    # date = '20131212'
    url = 'http://piaofang.maoyan.com/second-box?beginDate={}'.format(date)
    headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',

    'Accept': 'application/json, text/javascript, */*; q=0.01'
    }

    response = requests.get(url,headers=headers, timeout=5).json()
    return response


if __name__ == '__main__':
    url = 'http://api.maoyan.com/mmdb/movie/v5/1235560.json'
    print(get_json(date='20110101'))
    import pandas as pd
    from datetime import datetime

    # date_str = datetime.strftime(date[0], '%y%m%d')
    date = pd.date_range('2011/1/1', '2019/12/13', freq='D')
    date_str = ['20' + datetime.strftime(d, '%y%m%d') for d in date]

    import random
    import time
    path = './data/'
    for date in date_str[:-1000]:
        delta = random.randint(1, 10)
        time.sleep(delta)
        print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
        print('sleeping {} secondes'.format(delta))
        print(date)
        data = get_json(date)
        with open(path + date + '.txt', 'w') as f:
            f.write(str(data))

import random
import time

import requests
import pandas
import json
import os
from coming_soon import Spider

class MovieInfoSpider:
    def get_json(self,movieID):
    # date = '20131212'
        url = 'http://api.maoyan.com/mmdb/movie/v5/{}.json'.format(movieID)
        headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',

        'Accept': 'application/json, text/javascript, */*; q=0.01'
        }

        response = requests.get(url,headers=headers, timeout=5).json()
        return response

    def get_movieID(self,path):
        movieID_list = []

        for file in os.listdir(path):
            try:
                with open(path + file, 'r') as f:
                    content = eval(f.read())
                    movie_list = content['data']['list']

                for movie in movie_list:
                    movieID_list.append(movie['movieId'])
            except Exception as e:
                print(e)

        return movieID_list


if __name__ == '__main__':
    spider = MovieInfoSpider()
    movieids = spider.get_movieID(path='/home/andrew/Desktop/movie_all/')
    # print(movieids)
    for id in movieids:
        content = spider.get_json(movieID=id)
        Spider.json_to_txt('../movie_info/' + str(id) + '.txt', content)
        print(id)
        delta = random.randint(1,20)
        time.sleep(delta)
        print('sleeping {} seconds'.format(delta))

import requests
import pandas as pd

class Spider:
    def get_json(self,url):

        headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',

        'Accept': 'application/json, text/javascript, */*; q=0.01'
        }


        response = requests.get(url,headers=headers, timeout=5).json()
        return response

    @staticmethod
    def json_to_txt(file_name, json_name):
        with open(file_name, 'w') as f:
            f.write(str(json_name))


if __name__ == '__main__':
    spdier = Spider()
    content  = spdier.get_json(url = 'http://api.maoyan.com/mmdb/movie/v2/list/rt/order/coming.json?ci=1&limit=12&token='
)
    from datetime import datetime
    spdier.json_to_txt('../coming_movie/' + str(datetime.today()) + '.txt',content)

  • 2
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值