python登录网页查询信息关键字保存_python3 登陆网站并查询结果输出

最新推荐文章于 2022-12-10 11:09:09 发布

瘦下来

最新推荐文章于 2022-12-10 11:09:09 发布

阅读量492

点赞数

文章标签： python登录网页查询信息关键字保存

本文链接：https://blog.csdn.net/weixin_42284380/article/details/114450455

版权

该代码示例展示了使用Python的urllib和BeautifulSoup库进行网页登录和数据查询的过程。首先，通过设置请求头和POST数据完成网站的登录操作。然后，定义了一个查询函数，用于根据用户输入的省市区信息发送查询请求，并获取查询结果。整个过程涉及到了HTTP请求、数据编码、cookies管理等网络爬虫的基础技术。

摘要由CSDN通过智能技术生成

import urllib.request

import requests

import re

from bs4 import BeautifulSoup as bs

from urllib.request import quote

import ast

import pickle

def urlopen(url):

#登陆信息

head = {}

head['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'

head['Accept-Language'] = 'zh-CN,zh;q=0.9'

head['Cache-Control'] = 'no-cache'

head['Connection'] = 'keep-alive'

head['Content-Length']='97'

head['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'

head['Cookie']='UM_distinctid=16579cf386494-0d95db621e53d2-454c092b-100200-16579cf38651a7; Hm_lvt_5d2a564b91009e38063616ec4b3d8311=1539494544,1539665344,1539919502,1540451788; PHPSESSID=4enbqpdlibic1t6q3ma6fnt4a5; Usercookie_username=%25E6%25B1%25BD%25E8%25BD%25A6%25E7%2594%25A8%25E5%2593%2581%25E6%25B7%2598%25E6%25B7%2598%25E5%25BA%2597; Usercookie_userid=527277; CNZZDATA155540=cnzz_eid%3D866609669-1503013385-http%253A%252F%252Fbusiness.hcp66.com%252F%26ntime%3D1540774055'

head['Host']='business.hcp66.com'

head['Pragma']='no-cache'

head['Referer']='http://business.hcp66.com/member/index/login.html'

head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'

head['X-Requested-With'] = 'XMLHttpRequest'

req = urllib.request.Request(url,headers = head)

data = {}

data['gotourl']=''

data['member[username]'] = 'hcp.com'

data['member[password]'] = '1456'

data['member[code]']=''

data = urllib.parse.urlencode(data).encode('utf-8')

html = urllib.request.urlopen(req,data)

html = html.read()

return html

def chaurlopen(Length,city1,city2,city3):

#查询页的信息

url = 'http://business.hcp66.com/member/index/shop.html'

head = {}

head['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8'

head['Accept-Language'] = 'zh-CN,zh;q=0.9'

head['Cache-Control'] = 'no-cache'

head['Connection'] = 'keep-alive'

head['Content-Length']=Length

head['Content-Type'] = 'application/x-www-form-urlencoded'

head['Host']='business.hcp66.com'

head['Pragma']='no-cache'

head['Referer']='http://business.hcp66.com/member/index/shop.html'

head['Upgrade-Insecure-Requests']='1'

head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'

req = urllib.request.Request(url,headers = head)

data ={}

data['search[city1]'] = city1

data['search[city2]'] = city2

data['search[city3]'] = city3

data['b1']='查询'

data = urllib.parse.urlencode(data).encode('utf-8')

html = urllib.request.urlopen(req,data)

html = html.read()

return html

def length_(city1,city2,city3):

#这个是计算length值

data ={}

data['search[city1]'] = city1

data['search[city2]'] = city2

data['search[city3]'] = city3

data['b1']='查询'

req = requests.post('http://httpbin.org/post', data)

length = len(data.keys()) * 2 - 1

total = ''.join(list(data.keys()) + list(data.values()))

length += len(total)

length = req.json()['headers']['Content-Length']

return length

def xia():

url= 'http://business.hcp66.com/member/index/login.html'

html = urlopen(url)

#先登陆

html = html.decode('utf-8')

htmldic = ast.literal_eval(html)

#登陆成功把信息转成字典输入

print(htmldic)

dic = {'北京市':'1','天津市':'2','河北省':'3','山西省':'4','内蒙古自治区':'5','辽宁省':'6','吉林省':'7','黑龙江省':'8','上海市':'9','江苏省':'10','浙江省':'11','安徽省':'12','福建省':'13','江西省':'14','山东省':'15','河南省':'16','湖北省':'17','湖南省':'18','广东省':'19','广西壮族自治区':'20','海南省':'21','重庆市':'22','四川省':'23','贵州省':'24','云南省':'25','西藏自治区':'26','陕西省':'28','甘肃省':'28','青海省':'29','宁夏回族自治区':'30','新疆维吾尔自治区':'31','台湾省':'32','香港特别行政区':'33','澳门特别行政区':'34',}

#这个是后面需要提交的data

file = open('name.pkl','rb')

#这个是把全国地区信息保存起来的

dict_name = pickle.load(file)

#打开这个字典然后赋值给dict_name

dict_qu = input('请输入省市区(县)空格隔开：')

#这里接受输入的信息

dict_qu = dict_qu.split()

# 把输入的信息变成一个列表

print(dict_qu)

city1 = dic[dict_qu[0]]

#提取第一个元素并且在全国地区的字典里找到。再把字典的值传给了city1