(79)--爬取网页信息

# 增加搜索功能

import re
import urllib.request
import json

def InsertDict(D, key, value):
    D.setdefault(key)
    D[key] = value

def GetHtml(url):
    response = urllib.request.urlopen(url)
    text = response.read().decode('gbk')
    return text

def GetDict(s, D1):
    for i in range(len(s)):
        s0 = s[i].split('(')
        s1 = s0[0]
        s2 = s0[1].replace(')', '')
        InsertDict(D1, s1, s2)


def GetCitycode(original_city, D):
    for k in D:
        if original_city == k:
            return D[k]
        else:
            pass

def UrlJoin(original_city_code, target_city_code, t):
    return 'http://flights.ctrip.com/domestic/ajax/Get90DaysLowestPrice?dcity=%s&acity=%s&ddate=%s&searchType=S&r=0.18035678380179632' % (original_city_code, target_city_code, t)

def GetLowestprice(lowerprice_html, D2):
    data = json.loads(lowerprice_html)
    L = re.findall(r'[0-9]{4}\-[0-9]{2}\-[0-9]{2}', lowerprice_html)
    L1 = re.findall(r'\d{3,4}', lowerprice_html)
    L2 = []
    min = 1500
    for i in range(len(L1)):
        if int(L1[i]) == 2017 or int(L1[i]) == 2018:
            pass
        else:
            L2.append(L1[i])
            # print(L2)
    for i in range(len(L)):
        InsertDict(D2, L[i], L2[i])
    for k in D2:
        if int(D2[k]) < int(min):
            min = D2[k]

        if D2[k] == min:
            print('最低票价为: %s 日期为: %s' %(D2[k], k))
            # print(k)
            return k
if __name__ == '__main__':

    D1 = {}
D2 = {}
get_citycode_url = 'http://webresource.c-ctrip.com/code/cquery/resource/address/flight/flight_new_poi_gb2312.js?releaseno=?CR_2016_03_07_22_18_26'
citycode_html = GetHtml(get_citycode_url)
s = re.findall(r'[\u4e00-\u9fa5]+\([A-Z]+\)', citycode_html)
GetDict(s, D1)
original_city = input('请输入出发地:')
target_city = input('请输入目的地:')
t = input('请输入时间:')
original_city_code = GetCitycode(original_city, D1)
target_city_code = GetCitycode(target_city, D1)
get_lowerprice_url = UrlJoin(original_city_code, target_city_code, t)
print(get_lowerprice_url)
lowerprice_html = GetHtml(get_lowerprice_url)
k = GetLowestprice(lowerprice_html, D2)
target_url = 'http://flights.ctrip.com/booking/%s-%s-day-1.html?DDate1=%s' % (original_city_code, target_city_code, k)
print(target_url)

兄弟连学python


Python学习交流、资源共享群:563626388 QQ



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值