1.解析思路:
按照常规步骤进行抓包:输入错误的账号密码,找到post/get请求地址—>看请求表单数据是否加密—>全局搜索加密关键字—>找到与之相关联的js加密函数,扣出js代码
这里我们可以看到,有四个参数,其中_csrf是未知的,password是经过加密的,其余两个字段是很简单的。
2. _csrf关键字
我们先全局搜索一下,可以找到_csrf其实就在网页中,是在网页中生成的,所以我们就需要抓取“login.jsp?$=deny”这个数据包,这里是get请求
import re,requests
# 先请求一次网页,获取页面中的csrf
def get_csrf():
try:
csrfUrl = "http://openlaw.cn/login.jsp?$=deny"
session = requests.session()
response = session.get(csrfUrl, headers=headers, timeout=5)
if (response.status_code == 200):
# 编译正则表达式,带上re.S
pattern = re.compile('name="_csrf" value="(.*?)"/>', re.S)
# findall是将返回结果以元组型列表
result = re.findall(pattern, response.text)
return result
except Exception as e:
print(e)
拓展:我第一次写的时候就出现了如下的错误提示,其实是re.S在使用compile编译时没有编译到,以后要注意这点
在爬取网页数据时,
p=re.compile('<div class="card-title">(.*?)</div>')
data=re.findall(p,html.text,re.S)
出现错误:
ValueError: Cannot process flags argument with a compiled pattern
原因:如果re.findall() 中有flags项,如re.S(正则中‘.’代表在每行中的任意字符,每一行遇到换行符‘\n’结束,re.S作用是将‘\n’当成是普通字符,这样就把整个html文档看成了一个字符串。
正确写法:
p=re.compile('<div class="card-title">(.*?)</div>',re.S)
data=re.findall(p,html.text)
3.password加密
依旧全局搜索这个关键字,找到password这个关键字,发现这个generateEncryptPassword(生成加密密码)很可疑,最终输出加密密码的函数其实是keyEncrypt函数(完整js加密代码,参考我的GitHub)
var $publicKey = '-----BEGIN PUBLIC KEY-----\n\
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0zI8aibR9ZN57QObFxvI\n\
wiRTmELItVVBLMrLd71ZqakR6oWUKkcAGgmxad2TCy3UeRe4A0Dduw97oXlbl5rK\n\
RGISzpLO8iMSYtsim5aXZX9SB5x3S9ees4CZ6MYD/4XQOTrU0r1TMT6wXlhVvwNb\n\
fMNYHm3vkY0rhfxBCVPFJoHjAGDFWNCAhf4KfalfvWsGL32p8N/exG2S4yXVHuV6\n\
cHDyFJAItKVmyuTmB62pnPs5KvNv6oPmtmhMxxsvBOyh7uLwB5TonxtZpWZ3A1wf\n\
43ByuU7F3qGnFqL0GeG/JuK+ZR40LARyevHy9OZ5pMa0Nwqb8PwfK810Bc8PxD8N\n\
EwIDAQAB\n\
-----END PUBLIC KEY-----\n\
';
var encryptPassChars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXTZabcdefghiklmnopqrstuvwxyz*&-%/!?*+=()";
var rsaEncrypt = new JSEncrypt();
rsaEncrypt.setPublicKey($publicKey);
var keyEncrypt = function(data) {
var passPhrase = generateEncryptPassword(32);
var iv = CryptoJS.lib.WordArray.random(128 / 8).toString(CryptoJS.enc.Hex);
var salt = CryptoJS.lib.WordArray.random(128 / 8).toString(CryptoJS.enc.Hex);
var key = CryptoJS.PBKDF2(passPhrase, CryptoJS.enc.Hex.parse(salt), {
keySize: 128 / 32,
iterations: 1000
});
var aesEncrypted = CryptoJS.AES.encrypt(data, key, {
iv: CryptoJS.enc.Hex.parse(iv)
});
var aesKey = passPhrase + ":::" + salt + ":::" + aesEncrypted.iv;
var encryptedMessage = aesEncrypted.ciphertext.toString(CryptoJS.enc.Base64);
var encryptedKey = rsaEncrypt.encrypt(aesKey);
var encrypted = encryptedKey + ":::" + encryptedMessage;
return encrypted;
};
var generateEncryptPassword = function(length) {
var randomstring = '';
for (var i = 0; i < length; i++) {
var rnum = Math.floor(Math.random() * encryptPassChars.length);
randomstring += encryptPassChars.substring(rnum, rnum + 1);
}
return randomstring;
};
4.完整代码
import re
import requests
import execjs
# 请求登录url
url = "http://openlaw.cn/login"
# 构造请求头
headers = {
"Origin": "http://openlaw.cn",
"Referer": "http://openlaw.cn/login.jsp",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
}
# 先请求一次网页,获取页面中的csrf
def get_csrf():
try:
csrfUrl = "http://openlaw.cn/login.jsp?$=deny"
session = requests.session()
response = session.get(csrfUrl, headers=headers, timeout=5)
if (response.status_code == 200):
pattern = re.compile('name="_csrf" value="(.*?)"/>', re.S)
result = re.findall(pattern, response.text)
return result
except Exception as e:
print(e)
def get_pwd():
with open('main.js', 'r', encoding='utf-8') as f:
js_code = f.read()
# 编译js函数
ctx = execjs.compile(js_code)
# 执行js中的getPassword函数,参数为password
result = ctx.call('getPassword', 'a123456')
return result
# 第二次post请求模拟登录openLaw网站
def login():
data = {
"_csrf": (get_csrf())[0],
"username": "badwoman",
"password": get_pwd(),
"_spring_security_remember_me": "true"
}
try:
# 模拟登录openLaw
response = requests.post(url=url, headers=headers, data=data, timeout=5)
if (response.status_code == 200):
return response.text
return None
except Exception as e:
print(e)
if __name__ == '__main__':
login()