python模拟登录网站_基于Python的HTTPS协议模拟登陆+爬取页面

这篇博客详细介绍了如何使用Python的httplib和urllib库进行HTTPS模拟登录及页面GET操作。关键点包括设置正确的POST请求头中的cookie值,以及在GET请求中使用POST响应的set-cookie。示例代码展示了完整的登录、获取页面内容和解析HTML的过程。
摘要由CSDN通过智能技术生成

之前写的一直没成功,原因是用的不是HTTPS相关的函数。这次仔细研究了一下,有几个需要注意的点,一个是POST模拟登陆的时候,header中的cookie值,不同的网站应该会有不同的要求;另一个是GET页面的时候,是需要加上POST得到的response中的set-cookie的。这样才能利用登陆的成功。

写完POST和GET页面后,顺便写了个简单的命令行实现。

import httplib, urllib

import urllib2

import cookielib

import sys

file_text = "build_change.txt"

resultTable = dict()

host = 'buuuuuuu.knight.com'

def Login(username, password , csrf =  'Gy2O70iSjOTbWhWgBLvf4HDuf4jUe4RP'):

url = '/login/'

values = {

'username' : username,

'password' : password,

'next' : '',

'csrfmiddlewaretoken': csrf,

}

headers = {

'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36',

'Content-Type': 'application/x-www-form-urlencoded',

'Connection' : 'keep-alive',

'Cookie':'csrftoken=%s' % csrf ,

'Referer':'https://buuuuuuu.knight.com/login/',

'Origin':'https://buuuuuuu.knight.com',

'Content-Type':'application/x-www-form-urlencoded',

'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

}

values = urllib.urlencode(values)

conn = httplib.HTTPSConnection(host, 443)

conn.request("POST", url, values, headers)

response = conn.getresponse()

print 'Login: ', response.status, response.reason

'''

hdata = response.getheaders()

for i in xrange(len(hdata)):

for j in xrange(len(hdata[i])):

print hdata[i][j],

print

'''

return response.getheader("set-cookie")

def GetHtml(_url , cookie):

get_headers = {

'Host' : 'xxxxx.knight.com',

'Connection' : 'keep-alive' ,

'Cache-Control' : 'max-age=0',

'Cookie' : cookie ,

'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.114 Safari/537.36',

'Accept-Language' : 'zh-CN,zh;q=0.8,en;q=0.6',

}

conn=httplib.HTTPSConnection(host)

conn.request("GET", _url,None,get_headers)

res2=conn.getresponse()

print "Get %s:" % _url ,res2.status, res2.reason

'''

hdata1 = res2.getheaders()

for i in xrange(len(hdata1)):

for j in xrange(len(hdata1[i])):

print hdata1[i][j],

print

'''

data = res2.read()

fp = open("build_change.txt","w")

fp.write(data)

fp.close()

def ParseHtml():

fp = open(file_text,"r")

content = fp.readline()

_pos = 0

while content:

if content.find("class=\"change-body\"") >= 0:

topic = content.split(">")

resultTable[_pos] = topic[1]

while content:

content = fp.readline()

resultTable[_pos] = resultTable[_pos] + content

if content.find("

")>= 0:

_pos = _pos + 1

break

content = fp.readline()

fp.close()

print "Parse html success."

def GenerateResultTxt():

f = open("build_change_result.txt","w")

for m in resultTable.keys():

f.write("-------------------------------------------------------------------------------------------\n")

f.write(resultTable[m])

f.close()

print "Generate result success : build_change_result.txt ."

def Help():

print '-h    :    help'

print '-u    :    username(must)'

print '-p    :    password(must)'

print '-c    :    csrftoken(optional)'

print '-s    :    sandbox build id(must)'

print 'For example:'

print '[1]  python BuildChange.py -h'

print '[2]  python BuildChang.py -u u -p p -s s1 s2'

print '[3]  python BuildChang.py -u u -p p -c c -s s1 s2'

def ParseParam(com):

length = len(com)

username = ""

password = ""

csrf = ""

sid1 = ""

sid2 = ""

if length == 2 or length == 8 or length == 10:

if com[1] == '-h':

Help()

for i in range(1,length):

if com[i] == '-u' and i < (length-1):

username = com[i+1]

i += 1

elif com[i] == '-p' and i < (length-1):

password = com[i+1]

i += 1

elif com[i] == '-c' and i < (length-1):

csrf = com[i+1]

i += 1

elif com[i] == '-s' and i < (length-2):

sid1 = com[i+1]

sid2 = com[i+2]

i += 2

if username == "" or password == "" or sid1 == "" or sid2 == "":

print '[Error] Parameter error!'

print '[Error] You can use \"python BuildChange.py -h\" to see how can use this script. '

else:

if csrf == "":

cookie = Login(username, password)

else:

cookie = Login(username, password, csrf)

_url = "//changelog//between//%s//and//%s/" % (sid1, sid2)

GetHtml(_url, cookie)

ParseHtml()

GenerateResultTxt()

# C:\Python27\python.exe C:\Users\knight\Desktop\build\BuildChange.py -u xux -p KKKKKKKK -s 1859409 1858525

if __name__ == "__main__":

ParseParam(sys.argv)

©著作权归作者所有:来自51CTO博客作者风刃的原创作品,如需转载,请注明出处,否则将追究法律责任

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值