Python模拟浏览器登录

最新推荐文章于 2024-04-22 14:35:00 发布
tomWND
最新推荐文章于 2024-04-22 14:35:00 发布
阅读量1.4k
点赞数
本文链接：https://blog.csdn.net/tangwendi/article/details/71188869
版权
'''
import gzip
import sys
import re
import urllib.request
import urllib.parse
import urllib.error
import http.cookiejar
from bs4 import BeautifulSoup
import datetime
import random

pages = set()
random.seed(datetime.datetime.now())

#获取页面所有内链的列表
def getInternalLinks(bsObj, includeUrl):
    internalLinks = []
    #找出所有已“/”开头的连接
    for link in bsObj.findAll('a', href=re.compile('^(/|.*'+includeUrl+')')):
        if link.attrs['href'] is not None:
            if link.attrs['href'] not in internalLinks:
                internalLinks.append(link.attrs['href'])
    return internalLinks

'http 方式模拟登陆网站'
def ungzip(data):
    try:
        print("正在解压.....")
        data = gzip.decompress(data)
        print("解压成功")
    except:
        print("未经压缩，无需解压")
    return data

LoginUrl = "http://网址.com:9090/jsFrame/login.aspx?login=login"



headers = {
    'Accept':'text/html, application/xhtml+xml, */*',
    'Referer':'http://网址.com:9090/jsFrame/login.aspx?login=login',
    'User-Agent':'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
    'Content-Type':'application/x-www-form-urlencoded',
    'Accept-Encoding':'gzip, deflate',
    'Host':'网址.com:9090',
    'Connection':'Keep-Alive',
    'Pragma':'no-cache'   
    }

__VIEWSTATE='/wEPDwUKMTgyNjAzNjE0MQ9kFgICAQ9kFgICCQ8QDxYCHgdWaXNpYmxlaGRkZGQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFEmNoa1JlbWViZXJQYXNzd29yZGHjnndASufNAaraxhc4Fq1KydHN'
__EVENTVALIDATION='/wEWBgKthPnxBQLT8dy8BQKd+7qdDgK1qbSRCwLPx7zUAgLf2eqGAzVtS60EPvfNOGv+JEjkKNxzBqnS'
txtUserID='123'
txtPwd='密码'
txtPassword='123'



postDict = {
    '__VIEWSTATE':__VIEWSTATE,
    '__EVENTVALIDATION':__EVENTVALIDATION,
    'txtUserID':txtUserID,
    'txtPwd':txtPwd,
    'txtPassword':txtPassword,
    'Image1':' '
    }



#将http内容合并码
#The urllib.parse.urlencode() function takes a mapping or sequence of 2-tuples
#and returns an ASCII string in this format. It should be encoded to bytes before being used as the data parameter.
postdata=urllib.parse.urlencode(postDict).encode()



cookie_filename = 'cookie.txt'
cookie = http.cookiejar.MozillaCookieJar(cookie_filename)
cookie_support = urllib.request.HTTPCookieProcessor(cookie)
#创建一个带有cookie的opener
opener = urllib.request.build_opener(cookie_support)
#将url，http头和http内容放到requet中
request = urllib.request.Request(LoginUrl, data=postdata, headers=headers)

try:
    #模拟浏览器发送请求，并获取返回结果
    response = opener.open(request)
    #将返回结果解压
    response = ungzip(response.read())
    #将返回结果解码
    page = response.decode()
    #print(page)
    bsObj = BeautifulSoup(page,"html.parser")

    for link in bsObj.findAll('iframe'):
        print(link)

except urllib.error.URLError as e:
    print(e.code,':',e.reason)


cookie.save(ignore_discard=True, ignore_expires=True)  # 保存cookie到cookie.txt中

print(cookie)
for item in cookie:
    print('Name = ' + item.name)
    print('Value = ' + item.value)


#$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
t_headers = {
    'Accept':'*/*',
    'Accept-Language':'zh-cn',
    'Referer':'http://erp.sciyon.com:9090/NM/JsFrame/HomeShow/Inform.aspx?title=新闻公告&homeitemid=101',
    'x-requested-with':'XMLHttpRequest',
    'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
    'Accept-Encoding':'gzip, deflate',
    'User-Agent':'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
    'Host':'erp.sciyon.com:9090',
    'DNT':'1',
    'Connection':'Keep-Alive',
    'Pragma':'no-cache'   
    }


t_url = 'http://erp.sciyon.com:9090/NM/Proxy/NoticeProxy.aspx'
Data = '<Data>'
Data2 = '</Data>'
Action = '<Action>'
Action2 = '</Action>'
TYPE = '<TYPE>'
TYPE2 = '</TYPE>'
STATE = '<STATE>'
STATE2 = '</STATE>'
AUTHORIZATION = '<AUTHORIZATION>'
AUTHORIZATION2 = '</AUTHORIZATION>'
HOMEPAGEID = '<HOMEPAGEID>'
HOMEPAGEID2 = '</HOMEPAGEID>'
#get_postDict = Data+Action+'GETNOTICEDATABYWHERE'+Action2+TYPE+TYPE2+STATE+'APPROVE'+STATE2+AUTHORIZATION+'1'+AUTHORIZATION2+HOMEPAGEID+'101'+HOMEPAGEID2+Data2
get_postDict = '''<Data><Action>GETNOTICEDATABYWHERE</Action><TYPE></TYPE><STATE>APPROVE</STATE><AUTHORIZATION>1</AUTHORIZATION><HOMEPAGEID>101</HOMEPAGEID></Data>'''
#get_postdata=urllib.parse.urlencode(get_postDict).encode()
#get_request = urllib.request.Request(t_url,headers=t_headers,data=get_postdata)

#这时openner对象中应该含有前面获取到的cookie信息
try:
    #模拟浏览器发送请求，并获取返回结果
    get_response = opener.open(t_url,get_postDict.encode('utf-8'))
    #将返回结果解压
    get_response = ungzip(get_response.read())
    #将返回结果解码
    page = get_response.decode()
    print(page)
    #bsObj = BeautifulSoup(page,"html.parser")
    '''
    for link in bsObj.findAll('a'):
        print(link)
    '''
except urllib.error.URLError as e:
    print(e.code,':',e.reason)



print("*********************************************************")


#$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$




#----------------------------------------------------------------
#利用oookie请求访问另外一个网址
'''
get_headers = {
    'Accept':'*/*',
    'Accept-Language':'zh-cn',
    'Referer':'http://IP/ERP_OA/WorkTask/TaskQuery/ListPage.aspx',
    'x-requested-with':'Ext.basex',
    'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
    'Accept-Encoding':'gzip, deflate',
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
    'Host':'IP',
    'Connection':'Keep-Alive',
    'Pragma':'no-cache'   
    }


get_url = 'http://IP/ERP_OA/WorkTask/TaskQuery/ListPage.aspx?FlowGuid=8D11A66F0EAF44FCBBD9DEBEE6D45BFE'
start = 0
limit = 30
SORTFIELD = 'FTASKID'
SORTTYPE = 'DESC'
ACTION = 'GETWORKTASK'
QUERYPARA = '%3CData%3E%3CQueryPara%3E%3CBEGDATE%3E2017-01-01%3C%2FBEGDATE%3E%3CENEDATE%3E2017-01-13%3C%2FENEDATE%3E%3CFCLASS%3E%3C%2FFCLASS%3E%3CFTYPE%3E%3C%2FFTYPE%3E%3CFDELAY%3E%3C%2FFDELAY%3E%3CFTITLE%3E%3C%2FFTITLE%3E%3CFEEDBACKID%3E%3C%2FFEEDBACKID%3E%3CFSUSER%3E%3C%2FFSUSER%3E%3CFRUSER%3E%3C%2FFRUSER%3E%3CSTATE%3E%3C%2FSTATE%3E%3C%2FQueryPara%3E%3C%2FData%3E'
get_postDict = {
    'start':start,
    'limit':limit,
    'SORTFIELD':SORTFIELD,
    'SORTTYPE':SORTTYPE,
    'ACTION':ACTION,
    'QUERYPARA':QUERYPARA
    }
get_postdata=urllib.parse.urlencode(postDict).encode()

get_request = urllib.request.Request(get_url,get_postdata,headers=get_headers)
#这时openner对象中应该含有前面获取到的cookie信息
try:
    #模拟浏览器发送请求，并获取返回结果
    get_response = opener.open(get_request)
    #将返回结果解压
    get_response = ungzip(get_response.read())
    #将返回结果解码
    page = get_response.decode()
    print(page)
    #bsObj = BeautifulSoup(page,"html.parser")

    for link in bsObj.findAll('a'):
        print(link)

except urllib.error.URLError as e:
    print(e.code,':',e.reason)
#get_response = ungzip(opener.open(get_request).read())
#print(get_response.decode())
#------------------------------------------------------------------------------------------------------------------
'''
tomWND
关注
0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Python模拟浏览器登录

'''import gzipimport sysimport reimport urllib.requestimport urllib.parseimport urllib.errorimport http.cookiejarfrom bs4 import BeautifulSoupimport datetimeimport randompages = set()rando
复制链接

扫一扫