python登录网页查询信息 关键字保存_python3 登陆网站并查询结果输出

该代码示例展示了使用Python的urllib和BeautifulSoup库进行网页登录和数据查询的过程。首先,通过设置请求头和POST数据完成网站的登录操作。然后,定义了一个查询函数,用于根据用户输入的省市区信息发送查询请求,并获取查询结果。整个过程涉及到了HTTP请求、数据编码、cookies管理等网络爬虫的基础技术。
摘要由CSDN通过智能技术生成

import urllib.request

import requests

import re

from bs4 import BeautifulSoup as bs

from urllib.request import quote

import ast

import pickle

def urlopen(url):

#登陆信息

head = {}

head['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'

head['Accept-Language'] = 'zh-CN,zh;q=0.9'

head['Cache-Control'] = 'no-cache'

head['Connection'] = 'keep-alive'

head['Content-Length']='97'

head['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'

head['Cookie']='UM_distinctid=16579cf386494-0d95db621e53d2-454c092b-100200-16579cf38651a7; Hm_lvt_5d2a564b91009e38063616ec4b3d8311=1539494544,1539665344,1539919502,1540451788; PHPSESSID=4enbqpdlibic1t6q3ma6fnt4a5; Usercookie_username=%25E6%25B1%25BD%25E8%25BD%25A6%25E7%2594%25A8%25E5%2593%2581%25E6%25B7%2598%25E6%25B7%2598%25E5%25BA%2597; Usercookie_userid=527277; CNZZDATA155540=cnzz_eid%3D866609669-1503013385-http%253A%252F%252Fbusiness.hcp66.com%252F%26ntime%3D1540774055'

head['Host']='business.hcp66.com'

head['Pragma']='no-cache'

head['Referer']='http://business.hcp66.com/member/index/login.html'

head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'

head['X-Requested-With'] = 'XMLHttpRequest'

req = urllib.request.Request(url,headers = head)

data = {}

data['gotourl']=''

data['member[username]'] = 'hcp.com'

data['member[password]'] = '1456'

data['member[code]']=''

data = urllib.parse.urlencode(data).encode('utf-8')

html = urllib.request.urlopen(req,data)

html = html.read()

return html

def chaurlopen(Length,city1,city2,city3):

#查询页的信息

url = 'http://business.hcp66.com/member/index/shop.html'

head = {}

head['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'

head['Accept-Language'] = 'zh-CN,zh;q=0.9'

head['Cache-Control'] = 'no-cache'

head['Connection'] = 'keep-alive'

head['Content-Length']=Length

head['Content-Type'] = 'application/x-www-form-urlencoded'

head['Cookie']='UM_distinctid=16579cf386494-0d95db621e53d2-454c092b-100200-16579cf38651a7; Hm_lvt_5d2a564b91009e38063616ec4b3d8311=1539494544,1539665344,1539919502,1540451788; PHPSESSID=4enbqpdlibic1t6q3ma6fnt4a5; Usercookie_username=%25E6%25B1%25BD%25E8%25BD%25A6%25E7%2594%25A8%25E5%2593%2581%25E6%25B7%2598%25E6%25B7%2598%25E5%25BA%2597; Usercookie_userid=527277; CNZZDATA155540=cnzz_eid%3D866609669-1503013385-http%253A%252F%252Fbusiness.hcp66.com%252F%26ntime%3D1540768648'

head['Host']='business.hcp66.com'

head['Pragma']='no-cache'

head['Referer']='http://business.hcp66.com/member/index/shop.html'

head['Upgrade-Insecure-Requests']='1'

head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'

req = urllib.request.Request(url,headers = head)

data ={}

data['search[city1]'] = city1

data['search[city2]'] = city2

data['search[city3]'] = city3

data['b1']='查询'

data = urllib.parse.urlencode(data).encode('utf-8')

html = urllib.request.urlopen(req,data)

html = html.read()

return html

def length_(city1,city2,city3):

#这个是计算length值

data ={}

data['search[city1]'] = city1

data['search[city2]'] = city2

data['search[city3]'] = city3

data['b1']='查询'

req = requests.post('http://httpbin.org/post', data)

length = len(data.keys()) * 2 - 1

total = ''.join(list(data.keys()) + list(data.values()))

length += len(total)

length = req.json()['headers']['Content-Length']

return length

def xia():

url= 'http://business.hcp66.com/member/index/login.html'

html = urlopen(url)

#先登陆

html = html.decode('utf-8')

htmldic = ast.literal_eval(html)

#登陆成功把信息转成字典输入

print(htmldic)

dic = {'北京市':'1','天津市':'2','河北省':'3','山西省':'4','内蒙古自治区':'5','辽宁省':'6','吉林省':'7','黑龙江省':'8','上海市':'9','江苏省':'10','浙江省':'11','安徽省':'12','福建省':'13','江西省':'14','山东省':'15','河南省':'16','湖北省':'17','湖南省':'18','广东省':'19','广西壮族自治区':'20','海南省':'21','重庆市':'22','四川省':'23','贵州省':'24','云南省':'25','西藏自治区':'26','陕西省':'28','甘肃省':'28','青海省':'29','宁夏回族自治区':'30','新疆维吾尔自治区':'31','台湾省':'32','香港特别行政区':'33','澳门特别行政区':'34',}

#这个是后面需要提交的data

file = open('name.pkl','rb')

#这个是把全国地区信息保存起来的

dict_name = pickle.load(file)

#打开这个字典然后赋值给dict_name

dict_qu = input('请输入省 市 区(县)空格隔开:')

#这里接受输入的信息

dict_qu = dict_qu.split()

# 把输入的信息变成一个列表

print(dict_qu)

city1 = dic[dict_qu[0]]

#提取第一个元素 并且在全国地区的字典里找到。 再把字典的值传给了city1

city2 = dict_name[dict_qu[1]]

if len(dict_qu)==2:

#判断输入的信息如果没有输入县或区 city3 默认等于0

city3 = '0'

else:

#如果有就查字典 赋值给city3

city3 = dict_name[dict_qu[2]]

Length = length_(city1,city2,city3)

#这个地方就是查length 值

print(city1)

print(city2)

print(city3)

print(Length)

cont = chaurlopen(Length,city1,city2,city3)

#现在所有表单数据准备好了就可以访问查询了

cont = cont.decode('utf-8')

cont = bs(cont,'lxml')

#得到的结果

list1 = cont.find_all('div',style="padding-top:50px;padding-left:15px;")

list1 = list1[0]

content = list1.find_all('td', height="30")

if len(content)==0:

print("这个地区暂时无安装网点")

c=0

for i in content:

i  = i.text

i = i.strip()

if len(i)>5:

cha = i.find('通用记录仪')

if cha==-1:

print(i)

c=c+1

if c==2:

print('\n')

c = 0

x =0

while x == 0:

xia()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值