爬虫小白求助,程序爬一半就假死,不停止执行也不报错,怎么回事呢?

代码如下,麻烦各位大神帮忙看看,小弟在此谢过!


import requests
from lxml import etree
from urllib import parse
from urllib.parse import quote
import re
import time,json

def get_response(base_url):
    '''
    #获取分类url
    @param base_url:
    @return:
    '''
    url_list=[]
    headers = {
        'Referer': 'https://movie.douban.com/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
    }
    response=requests.get(base_url,headers=headers)
    html=etree.HTML(response.text)
    url_html=html.xpath('//div[@id="content"]/div/div[@class="aside"]/div/div[@class="types"]/span')
    # print(url_html)
    try:
        for itml in url_html:
            url_class='https://movie.douban.com/'+itml.xpath('./a/@href')[0]
            # print(url_class)
            url_list.append(url_class)

    except Exception:
        pass

    return url_list

def parse_ajax(movie_list,json_data):

    for i in json_data:
        item={}
        item['types']=i['types']
        item['regions']=i['regions']
        item['title']=i['title']
        item['url']=i['url']
        item['actor_count']=i['actor_count']
        item['vote_count'] = i['vote_count']
        item['score'] = i['score']
        print(item)
        movie_list.append(item)
    with open('doubanmovie_rank.json','w',encoding='utf-8') as fp:
        json.dump(movie_list,fp)

def get_movie_xapth(type_,type_name,id):
    """

    @param url_list:
    @param type_name:
    @param intervalid:
    @param limit:
    """
    movie_url = 'https://movie.douban.com/j/chart/top_list?type={}&interval_id={}%3A{}&action=&start={}&limit={}'
    x=0
    movie_list = []
    while True:
        params={
            'type_name': type_name,
            'type': type_,
            'interval_id': [str(id)+':'+str(id-10)],
            'action':'' ,
            'start': x*100,
            'limit': (x+1)*100
        }
        referer='https://movie.douban.com/typerank?'+parse.urlencode(params)
        headers = {
            'Referer': referer,
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',
            'X - Requested - With': 'XMLHttpRequest'
        }
        movie_url_f=movie_url.format(type_,id,id-10,x*100,(x+1)*100)
        response_moive = requests.get(movie_url_f, headers=headers, params=params)
        # print(response_moive.text)
        if response_moive=='[]':
            break
        json_data = json.loads(response_moive.text)
        parse_ajax(movie_list,json_data)
        x+=50
        time.sleep(1)

def main():
    base_url = 'https://movie.douban.com/chart'
    url_list=get_response(base_url)
    print(url_list)
    for item in url_list:
        p = re.compile('type_name=([\u4e00-\u9fa5]{2,})&type=(.*?)&interval')
        type_name = p.search(item).group(1)
        type_=p.search(item).group(2)
        print(type_name,type_)
        get_movie_xapth(type_,type_name,100)

if __name__ == '__main__':
    main()



展开阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 1024 设计师: 上身试试
应支付0元
点击重新获取
扫码支付

支付成功即可阅读