用request先得到到session对象,用其去放送请求,会自动保存cookie。
模拟有验证码的登入步骤:
1.发送请求登入页面;
2.分析验证码的地址,以及要将登入请求发往的地址(可以先输入错的密码登入一次,抓包获取发送地址)
3.将返回的验证码存入文件,读取验证码,手动输入;
4.整合所有数据以及验证码,发往登入验证界面;
5.登入成功后即可访问只有登入后的界面了,比如个人主页
#_*_ coding: utf-8 _*_
'''
Created on 2018年7月16日
@author: sss
function: 登入ctgujwc
'''
from bs4 import BeautifulSoup
import requests
import random
from pip._vendor.distlib.compat import raw_input
def CheckCode(checkcode):
with open('checkcode.jpg', 'wb+') as f:
f.write(checkcode)
text = raw_input('请输入验证码:')
return text
def WriteFile(test):
with open('ys_person.html', 'w+') as f:
f.write(test)
print('已写入!')
def jwcLogin():
#构造一个Session对象,可以保存Cookie:
sess = requests.Session()
ua_list = [
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
"Mozilla/5.0 (X11; CrOS i686 2268.111.0)like Gecko",
"Mozilla/5.0 (Macintosh; U; PPC Mac OS X ",
"Mozilla/5.0 (Macintosh; Intel Mac OS "
]
user_agnet = random.choice(ua_list)
headers = {
"Connection" : "keep-alive",
"Accept" : "application/json, text/javascript, */*; q=0.01",
"User-Agent" : user_agnet,
}
#首先过去登入页面
html = sess.get('http://210.42.38.26:84/jwc_glxt/Login.aspx', headers = headers).text
# print(html)
#调用lxml解析库
bs = BeautifulSoup(html, 'lxml')
#获取页面验证码网址:
checkcodeUrl = bs.find('img', attrs = {"id": 'ImageCheck'}).get('src')
# print(checkcodeUrl)
checkcodeUrl = 'http://210.42.38.26:84/jwc_glxt/' + checkcodeUrl
#获得验证码图片:
checkcode_data = sess.get(checkcodeUrl, headers = headers).content
# print(checkcode_data)
#验证码存入文件
text = CheckCode(checkcode_data)
#放入post表单数据,只放用户名+密码+验证码是不行的!!!
data = {
'__VIEWSTATE':'/wEPDwUKLTQ4NjU1OTA5NGQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFCGJ0bkxvZ2luMRg1SjrafPmtoydz1mPeR4vBlIE=',
'__EVENTVALIDATION':'/wEWBQK8vuPMAgKl1bKzCQKC3IeGDAK1qbSRCwLO44u1DdFTNDJgcOwlCVJHcDBqwrj3IMXf',
"txtUserName" : "name",
"txtPassword" : "password",
'btnLogin.x':'41',
'btnLogin.y':'31',
"CheckCode" : text
}
#发起post请求登入:
respense = sess.post('http://210.42.38.26:84/jwc_glxt/Login.aspx', data = data, headers = headers)
# print(respense)
#获取登入后的个人主页:
respense = sess.get('http://210.42.38.26:84/jwc_glxt/Stu_Info/Stu_info.aspx').text;
# print(respense)
#将个人主页写入文件
WriteFile(respense)
#退出登入
respense = sess.get('http://210.42.38.26:84/jwc_glxt/Login.aspx?xttc=1')
print('退出!')
if __name__ == '__main__':
jwcLogin()