python3 登陆网站并查询结果输出

最新推荐文章于 2024-05-16 07:30:19 发布

18923489164

最新推荐文章于 2024-05-16 07:30:19 发布

阅读量3.2k

点赞数

分类专栏：爬虫

本文链接：https://blog.csdn.net/AnYeZhiYin/article/details/83514443

版权

爬虫专栏收录该内容

117 篇文章 13 订阅

订阅专栏


    import urllib.request
import requests
import re
from bs4 import BeautifulSoup as bs
from urllib.request import quote
import ast
import pickle

def urlopen(url):
    #登陆信息

    head = {}

    head['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
    head['Accept-Language'] = 'zh-CN,zh;q=0.9'
    head['Cache-Control'] = 'no-cache'
    head['Connection'] = 'keep-alive'
    head['Content-Length']='97'
    head['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
    head['Cookie']='UM_distinctid=16579cf386494-0d95db621e53d2-454c092b-100200-16579cf38651a7; Hm_lvt_5d2a564b91009e38063616ec4b3d8311=1539494544,1539665344,1539919502,1540451788; PHPSESSID=4enbqpdlibic1t6q3ma6fnt4a5; Usercookie_username=%25E6%25B1%25BD%25E8%25BD%25A6%25E7%2594%25A8%25E5%2593%2581%25E6%25B7%2598%25E6%25B7%2598%25E5%25BA%2597; Usercookie_userid=527277; CNZZDATA155540=cnzz_eid%3D866609669-1503013385-http%253A%252F%252Fbusiness.hcp66.com%252F%26ntime%3D1540774055'
    head['Host']='business.hcp66.com'
    head['Pragma']='no-cache'
    head['Referer']='http://business.hcp66.com/member/index/login.html'
    head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
    head['X-Requested-With'] = 'XMLHttpRequest'

    req = urllib.request.Request(url,headers = head)

    data = {}
    data['gotourl']=''
    data['member[username]'] = 'hcp.com'
    data['member[password]'] = '1456'
    data['member[code]']=''
    data = urllib.parse.urlencode(data).encode('utf-8')

    html = urllib.request.urlopen(req,data)

    html = html.read()

    return html


def chaurlopen(Length,city1,city2,city3):
    #查询页的信息

    url = 'http://business.hcp66.com/member/index/shop.html'

    head = {}

    head['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
    head['Accept-Language'] = 'zh-CN,zh;q=0.9'
    head['Cache-Control'] = 'no-cache'
    head['Connection'] = 'keep-alive'
    head['Content-Length']=Length
    head['Content-Type'] = 'application/x-www-form-urlencoded'

    head['Cookie']='UM_distinctid=16579cf386494-0d95db621e53d2-454c092b-100200-16579cf38651a7; Hm_lvt_5d2a564b91009e38063616ec4b3d8311=1539494544,1539665344,1539919502,1540451788; PHPSESSID=4enbqpdlibic1t6q3ma6fnt4a5; Usercookie_username=%25E6%25B1%25BD%25E8%25BD%25A6%25E7%2594%25A8%25E5%2593%2581%25E6%25B7%2598%25E6%25B7%2598%25E5%25BA%2597; Usercookie_userid=527277; CNZZDATA155540=cnzz_eid%3D866609669-1503013385-http%253A%252F%252Fbusiness.hcp66.com%252F%26ntime%3D1540768648'
    head['Host']='business.hcp66.com'
    head['Pragma']='no-cache'
    head['Referer']='http://business.hcp66.com/member/index/shop.html'
    head['Upgrade-Insecure-Requests']='1'
    head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'

    req = urllib.request.Request(url,headers = head)

    data ={}
    data['search[city1]'] = city1
    data['search[city2]'] = city2
    data['search[city3]'] = city3
    data['b1']='查询'
    
    data = urllib.parse.urlencode(data).encode('utf-8')

    html = urllib.request.urlopen(req,data)

    html = html.read()

    return html
    

def length_(city1,city2,city3):
    #这个是计算length值 
    


    data ={}
    data['search[city1]'] = city1
    data['search[city2]'] = city2
    data['search[city3]'] = city3
    data['b1']='查询'

    req = requests.post('http://httpbin.org/post', data)
    
    length = len(data.keys()) * 2 - 1
    total = ''.join(list(data.keys()) + list(data.values()))
    length += len(total)
    length = req.json()['headers']['Content-Length']
    
    return length

def xia():

    url= 'http://business.hcp66.com/member/index/login.html'

    html = urlopen(url)
    #先登陆

    html = html.decode('utf-8')

    htmldic = ast.literal_eval(html)
    #登陆成功把信息转成字典输入

    print(htmldic)

    dic = {'北京市':'1','天津市':'2','河北省':'3','山西省':'4','内蒙古自治区':'5','辽宁省':'6','吉林省':'7','黑龙江省':'8','上海市':'9','江苏省':'10','浙江省':'11','安徽省':'12','福建省':'13','江西省':'14','山东省':'15','河南省':'16','湖北省':'17','湖南省':'18','广东省':'19','广西壮族自治区':'20','海南省':'21','重庆市':'22','四川省':'23','贵州省':'24','云南省':'25','西藏自治区':'26','陕西省':'28','甘肃省':'28','青海省':'29','宁夏回族自治区':'30','新疆维吾尔自治区':'31','台湾省':'32','香港特别行政区':'33','澳门特别行政区':'34',}
    #这个是后面需要提交的data

    file = open('name.pkl','rb')
    #这个是把全国地区信息保存起来的

    dict_name = pickle.load(file)
    #打开这个字典然后赋值给dict_name

    dict_qu = input('请输入省 市 区（县）空格隔开：')
    #这里接受输入的信息

    dict_qu = dict_qu.split()
    # 把输入的信息变成一个列表

    print(dict_qu)
    
    city1 = dic[dict_qu[0]]
    #提取第一个元素 并且在全国地区的字典里找到。 再把字典的值传给了city1
    city2 = dict_name[dict_qu[1]]

    if len(dict_qu)==2:
        #判断输入的信息如果没有输入县或区 city3 默认等于0
        city3 = '0'

    else:
        #如果有就查字典 赋值给city3
        city3 = dict_name[dict_qu[2]]

    Length = length_(city1,city2,city3)
    #这个地方就是查length 值
    
    print(city1)
    print(city2)
    print(city3)
    print(Length)

    cont = chaurlopen(Length,city1,city2,city3)
    #现在所有表单数据准备好了就可以访问查询了
    cont = cont.decode('utf-8')
    cont = bs(cont,'lxml')
    #得到的结果

    list1 = cont.find_all('div',style="padding-top:50px;padding-left:15px;")

    list1 = list1[0]

    content = list1.find_all('td', height="30")

    if len(content)==0:
        print("这个地区暂时无安装网点")

    c=0

    for i in content:
        
        i  = i.text
        i = i.strip()
        if len(i)>5:

            cha = i.find('通用记录仪')
            if cha==-1:
                     
                print(i)
                c=c+1
            
        if c==2:
            print('\n')
            c = 0
            
                    
x =0
while x == 0:
    xia()

18923489164

关注

0
点赞
踩
9

收藏

觉得还不错? 一键收藏
0
评论
python3 登陆网站并查询结果输出

import urllib.requestimport requestsimport refrom bs4 import BeautifulSoup as bsfrom urllib.request import quoteimport astimport pickledef urlopen(url):#登陆信息head = {}head['Accept'] = 'text/...
复制链接

扫一扫