import urllib.request
import requests
import re
from bs4 import BeautifulSoup as bs
from urllib.request import quote
import ast
import pickle
def urlopen(url):
#登陆信息
head = {}
head['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
head['Accept-Language'] = 'zh-CN,zh;q=0.9'
head['Cache-Control'] = 'no-cache'
head['Connection'] = 'keep-alive'
head['Content-Length']='97'
head['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
head['Cookie']='UM_distinctid=16579cf386494-0d95db621e53d2-454c092b-100200-16579cf38651a7; Hm_lvt_5d2a564b91009e38063616ec4b3d8311=1539494544,1539665344,1539919502,1540451788; PHPSESSID=4enbqpdlibic1t6q3ma6fnt4a5; Usercookie_username=%25E6%25B1%25BD%25E8%25BD%25A6%25E7%2594%25A8%25E5%2593%2581%25E6%25B7%2598%25E6%25B7%2598%25E5%25BA%2597; Usercookie_userid=527277; CNZZDATA155540=cnzz_eid%3D866609669-1503013385-http%253A%252F%252Fbusiness.hcp66.com%252F%26ntime%3D1540774055'
head['Host']='business.hcp66.com'
head['Pragma']='no-cache'
head['Referer']='http://business.hcp66.com/member/index/login.html'
head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
head['X-Requested-With'] = 'XMLHttpRequest'
req = urllib.request.Request(url,headers = head)
data = {}
data['gotourl']=''
data['member[username]'] = 'hcp.com'
data['member[password]'] = '1456'
data['member[code]']=''
data = urllib.parse.urlencode(data).encode('utf-8')
html = urllib.request.urlopen(req,data)
html = html.read()
return html
def chaurlopen(Length,city1,city2,city3):
#查询页的信息
url = 'http://business.hcp66.com/member/index/shop.html'
head = {}
head['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'
head['Accept-Language'] = 'zh-CN,zh;q=0.9'
head['Cache-Control'] = 'no-cache'
head['Connection'] = 'keep-alive'
head['Content-Length']=Length
head['Content-Type'] = 'application/x-www-form-urlencoded'
head['Cookie']='UM_distinctid=16579cf386494-0d95db621e53d2-454c092b-100200-16579cf38651a7; Hm_lvt_5d2a564b91009e38063616ec4b3d8311=1539494544,1539665344,1539919502,1540451788; PHPSESSID=4enbqpdlibic1t6q3ma6fnt4a5; Usercookie_username=%25E6%25B1%25BD%25E8%25BD%25A6%25E7%2594%25A8%25E5%2593%2581%25E6%25B7%2598%25E6%25B7%2598%25E5%25BA%2597; Usercookie_userid=527277; CNZZDATA155540=cnzz_eid%3D866609669-1503013385-http%253A%252F%252Fbusiness.hcp66.com%252F%26ntime%3D1540768648'
head['Host']='business.hcp66.com'
head['Pragma']='no-cache'
head['Referer']='http://business.hcp66.com/member/index/shop.html'
head['Upgrade-Insecure-Requests']='1'
head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
req = urllib.request.Request(url,headers = head)
data ={}
data['search[city1]'] = city1
data['search[city2]'] = city2
data['search[city3]'] = city3
data['b1']='查询'
data = urllib.parse.urlencode(data).encode('utf-8')
html = urllib.request.urlopen(req,data)
html = html.read()
return html
def length_(city1,city2,city3):
#这个是计算length值
data ={}
data['search[city1]'] = city1
data['search[city2]'] = city2
data['search[city3]'] = city3
data['b1']='查询'
req = requests.post('http://httpbin.org/post', data)
length = len(data.keys()) * 2 - 1
total = ''.join(list(data.keys()) + list(data.values()))
length += len(total)
length = req.json()['headers']['Content-Length']
return length
def xia():
url= 'http://business.hcp66.com/member/index/login.html'
html = urlopen(url)
#先登陆
html = html.decode('utf-8')
htmldic = ast.literal_eval(html)
#登陆成功把信息转成字典输入
print(htmldic)
dic = {'北京市':'1','天津市':'2','河北省':'3','山西省':'4','内蒙古自治区':'5','辽宁省':'6','吉林省':'7','黑龙江省':'8','上海市':'9','江苏省':'10','浙江省':'11','安徽省':'12','福建省':'13','江西省':'14','山东省':'15','河南省':'16','湖北省':'17','湖南省':'18','广东省':'19','广西壮族自治区':'20','海南省':'21','重庆市':'22','四川省':'23','贵州省':'24','云南省':'25','西藏自治区':'26','陕西省':'28','甘肃省':'28','青海省':'29','宁夏回族自治区':'30','新疆维吾尔自治区':'31','台湾省':'32','香港特别行政区':'33','澳门特别行政区':'34',}
#这个是后面需要提交的data
file = open('name.pkl','rb')
#这个是把全国地区信息保存起来的
dict_name = pickle.load(file)
#打开这个字典然后赋值给dict_name
dict_qu = input('请输入省 市 区(县)空格隔开:')
#这里接受输入的信息
dict_qu = dict_qu.split()
# 把输入的信息变成一个列表
print(dict_qu)
city1 = dic[dict_qu[0]]
#提取第一个元素 并且在全国地区的字典里找到。 再把字典的值传给了city1
city2 = dict_name[dict_qu[1]]
if len(dict_qu)==2:
#判断输入的信息如果没有输入县或区 city3 默认等于0
city3 = '0'
else:
#如果有就查字典 赋值给city3
city3 = dict_name[dict_qu[2]]
Length = length_(city1,city2,city3)
#这个地方就是查length 值
print(city1)
print(city2)
print(city3)
print(Length)
cont = chaurlopen(Length,city1,city2,city3)
#现在所有表单数据准备好了就可以访问查询了
cont = cont.decode('utf-8')
cont = bs(cont,'lxml')
#得到的结果
list1 = cont.find_all('div',style="padding-top:50px;padding-left:15px;")
list1 = list1[0]
content = list1.find_all('td', height="30")
if len(content)==0:
print("这个地区暂时无安装网点")
c=0
for i in content:
i = i.text
i = i.strip()
if len(i)>5:
cha = i.find('通用记录仪')
if cha==-1:
print(i)
c=c+1
if c==2:
print('\n')
c = 0
x =0
while x == 0:
xia()