山东大学创新项目实训个人工作日志(十八)

景点门票爬取的速度提升
因为爬取的数据过多,所以速度较慢,这次完成的工作就是提高爬虫的速度。
采用多线程的方式,检索每一个平台时都创建一个线程,待数据返回,再进行合并即可。

def init():
    '''
    建立对象
    :return:
    '''
    global inited
    global feizhu,xiecheng,tuniu,quna,lvmama,dahe,klook,tongcheng
    feizhu = menpiao.zwf_menpiao_feizhu.FeizhuSpider()
    xiecheng = menpiao.zwf_menpiao_xiecheng.XiechengSpider()
    tuniu = menpiao.zwf_menpiao_tuniu.TuNiuSpider()
    quna = menpiao.zwf_menpiao_qunar.QunaSpider()
    lvmama = menpiao.zwf_menpiao_lvmama.LvmamaSpider()
    dahe = menpiao.zwf_menpiao_dahe.DaheSpider()
    klook = menpiao.zwf_menpiao_klook.KlookSpider()
    tongcheng = menpiao.zwf_menpiao_tongcheng.TongchengSpider()
    inited = True


def searchSpots(keyword, city):
    '''
    根据关键词和城市返回对应景点的门票,目前已爬取的网站:飞猪、去哪、携程、途牛、klook、驴妈妈、同程、大河
    :param keyword:
    :param city:
    :return:
    '''
    global feizhu, xiecheng, tuniu, quna, lvmama, dahe, klook, tongcheng
    global spotsInfo,city_keywords#景点名称,门票(名称name,类别type,价格price,url,已售buy,旅行社from,可退isReturnable,预定时间bookTime,出票时间outTime,可用时间useTime,说明discription)
    if inited==False:
        init()

    # feizhu = menpiao.zwf_menpiao_feizhu.FeizhuSpider()
    # xiecheng = menpiao.zwf_menpiao_xiecheng.XiechengSpider()
    # tuniu = menpiao.zwf_menpiao_tuniu.TuNiuSpider()
    # quna = menpiao.zwf_menpiao_qunar.QunaSpider()
    # lvmama = menpiao.zwf_menpiao_lvmama.LvmamaSpider()
    # dahe = menpiao.zwf_menpiao_dahe.DaheSpider()
    # klook = menpiao.zwf_menpiao_klook.KlookSpider()
    # tongcheng = menpiao.zwf_menpiao_tongcheng.TongchengSpider()

    # list = [feizhu,xiecheng,tuniu,quna,lvmama,dahe,klook,tongcheng]
    # for l in list:
    #     l.search_spots(keyword,city)
    #     print(l.spotsInfo)
    # merge(feizhu, xiecheng, tuniu, quna, lvmama, dahe, klook, tongcheng)


    # feizhu.search_spots(keyword,city)
    # xiecheng.searchSpots(keyword,city)
    # tuniu.search_spots(keyword,city)
    # quna.search_spots(keyword,city)
    list = [feizhu,xiecheng,tuniu,quna,lvmama,dahe,klook,tongcheng]
    try:
        '''建立线程,提高速度'''
        t1 = threading.Thread(target=run_search, args=(xiecheng,keyword,city,))
        t2 = threading.Thread(target=run_search, args=(feizhu, keyword, city,))
        t3 = threading.Thread(target=run_search, args=(tuniu, keyword, city,))
        t4 = threading.Thread(target=run_search, args=(quna, keyword, city,))
        t5 = threading.Thread(target=run_search, args=(lvmama, keyword, city,))
        t6 = threading.Thread(target=run_search, args=(dahe, keyword, city,))
        t7 = threading.Thread(target=run_search, args=(klook, keyword, city,))
        t8 = threading.Thread(target=run_search, args=(tongcheng, keyword, city,))
        '''线程开始'''
        t1.start()
        t2.start()
        t3.start()
        t4.start()
        t5.start()
        t6.start()
        t7.start()
        t8.start()

        '''父线程等待'''
        t1.join()
        t2.join()
        t3.join()
        t4.join()
        t5.join()
        t6.join()
        t7.join()
        t8.join()
        
        '''合并'''
        ti = merge(feizhu, xiecheng, tuniu, quna, lvmama, dahe, klook, tongcheng)
        spotsInfo.append(ti)
        city_keywords.append(city+''+keyword)

    except Exception as e:
        print(e)

因为创建对象耗时较长,所以在程序初始化的时候,即初始化对象,而不用重复创建,可以大大缩减程序运行时长。
采用缓存的方式暂存用户在本次页面打开时间中搜索过的关键词,也可以提高搜索速度。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值