登录地址是:https://passport.yhd.com/m/login_input.do
使用httpclient4.4进行登录
中间比较折腾的是,登录用户名和密码的加密过程,因为一号店使用jsencrypt进行加密,尼玛,误导性太强,以为回传的pubkey是openssl里面的rsa生成的key,一直以为要转成java能支持的特定格式
登陆的用户名和密码加密代码:还需要个base64的包,commons-codec
public static String encryptByPublicKeyN(String key,String src)
throws Exception {
byte[] keyBytes = Base64.decodeBase64(key);
byte[] data = src.getBytes();
X509EncodedKeySpec keySpec= new X509EncodedKeySpec(keyBytes);
KeyFactory keyFactory = KeyFactory.getInstance("RSA");
Key publicKey = keyFactory.generatePublic(keySpec);
// 对数据加密
String algprithm = "RSA";
Cipher cipher = Cipher.getInstance(algprithm);
cipher.init(Cipher.ENCRYPT_MODE, publicKey);
byte[] result = cipher.doFinal(data);
return Base64.encodeBase64String(result);
}
加密问题解决了,那么captchaToken是个什么鬼,是怎么生成的?
captcha看看百度的解释:
CAPTCHA项目是Completely Automated Public Turing Test to Tell Computers and Humans Apart (全自动区分计算机和人类的图灵测试)的简称,卡内基梅隆大学试图将其注册为商标,但2008年请求被驳回。 CAPTCHA的目的是区分计算机和人类的一种程序算法,是一种区分用户是计算机和人的计算程序,这种程序必须能生成并评价人类能很容易通过但计算机却通不过的测试。
所以,我的理解这个captchaToken目的就是防止爬虫的意思。
首先看下登陆时,有个访问地址https://captcha.yhd.com/public/getenv.do?callback=captchaCallback&t=0.537037932081148,这个地址会返回一个jsonp,没有看错,就是json+p。这个地方是个关键的,当数据返回的时候,开始回调captcha.js(hex反解此js,然后,用java处理下自动替换那些奇怪的数组引用)。
jsonp这个实例为:
captchaCallback({"k":"wRgs3K4kWm9J1YN6fu3NLncz","m":"Y9dAINkMn5k=","o":"vemW1o0h+QM=","t":"9Njetlv/a9M="});
解释一下里面的参数:k为生成captchaToken用的key,也是解密后面m,o,t用的key;m为加密算法mode;o为加密captchaToken时用的一个中间值,其实是保证生成的参数至少大于某个值;t为加密captchaToken的算法。
captchaToken实际上是由上面的key,使用TripleDES或者DES或者AES对一个特殊值X加密得到,所以服务端肯定是可以解密校验正确性的。
这个特殊值X包含了浏览器附加信息的64hash128值,浏览器信息,系统信息版本信息等,还有通过jsonp的o推算得到的某些值(这个校验能力有限,因为这个条件只能推算出登陆时会传的最小值,也就是大于这个值,都会被认为合法)
captchaToken加密使用的js为Google的开源项目CryptoJS
有用的java加密或者解密代码:
public class CryptoUtils {
public static int MODE_ENCRYPT = Cipher.ENCRYPT_MODE;
public static int MODE_DECRYPT = Cipher.DECRYPT_MODE;
public static String DEFAULT_PADING = ("PKCS5Padding");//NoPadding PKCS5Padding
public static String DEFAULT_MODE_CBC = "CBC";
public static String DEFAULT_MODE_ECB = "ECB";
public static final String ALGORITHM_DES = "DES";
public static final String ALGORITHM_AES = "AES";
public static final String ALGORITHM_TRIPLE = "DESede";
public static final String DEFAULT_ENCODING = "UTF-8";
public static String ECBWithNoEXP(String algorithm,String keyStr,String dataStr,int mode){
String result = null;
try {
result = ECB(algorithm, keyStr, dataStr, mode);
} catch (Exception e) {
//忽略,返回空
}
return result;
}
public static String CBCWithNoEXP(String algorithm,String keyStr,String dataStr,int mode){
String result = null;
try {
result = CBC(algorithm, keyStr, dataStr,keyStr, mode);
} catch (Exception e) {
//忽略,返回空
}
return result;
}
/**
* 解密或者加密
* @param algorithm 算法DES AES DESede
* @param keyStr
* @param dataStr
* @param mode Cipher.ENCRYPT_MODE或者Cipher.DECRYPT_MODE
* @return 加密返回base64字符串,解密返回new String(bytes,"utf-8")
* @throws Exception
*/
public static String ECB(String algorithm,String keyStr,String dataStr,int mode) throws Exception {
if(dataStr==null || keyStr==null){
return null;
}
byte[] dataBytes = null;
if(mode == Cipher.DECRYPT_MODE){
dataBytes = Base64.decodeBase64(dataStr);
}else{
dataBytes = dataStr.getBytes("UTF-8");
}
Cipher cipher = Cipher.getInstance(algorithm+"/ECB/PKCS5Padding");
SecretKeySpec key = new SecretKeySpec(keyStr.getBytes("UTF-8"), algorithm);
cipher.init(mode, key);
byte[] bytes = cipher.doFinal(dataBytes);
if(Cipher.ENCRYPT_MODE == mode){
return Base64.encodeBase64String(bytes);
}
return new String(bytes,"UTF-8");
}
/**
* 解密或者加密
* @param algorithm 算法DES AES DESede
* @param keyStr
* @param dataStr
* @param IV
* @param mode Cipher.ENCRYPT_MODE或者Cipher.DECRYPT_MODE
* @return 加密返回base64字符串,解密返回new String(bytes,"utf-8")
* @throws Exception
*/
public static String CBC(String algorithm,String keyStr,String dataStr,String IV,int mode) throws Exception {
if(dataStr==null || keyStr==null){
return null;
}
byte[] dataBytes = null;
if(mode == Cipher.DECRYPT_MODE){
dataBytes = Base64.decodeBase64(dataStr);
}else{
dataBytes = dataStr.getBytes("UTF-8");
}
Cipher cipher = Cipher.getInstance(algorithm+"/CBC/PKCS5Padding");
SecretKeySpec key = new SecretKeySpec(keyStr.getBytes("UTF-8"), algorithm);
cipher.init(mode, key,new IvParameterSpec(IV.getBytes("UTF-8")));
byte[] bytes = cipher.doFinal(dataBytes);
if(Cipher.ENCRYPT_MODE == mode){
return Base64.encodeBase64String(bytes);
}
return new String(bytes,"UTF-8");
}
public static void main(String[] args) {
String key = "QDsCzTZK";
String data = "1";
try {
String decrypted = ECB(ALGORITHM_DES, key, data, MODE_ENCRYPT);
System.out.println(decrypted);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
下面是captchaToken的生成代码:
package com.source.crawler.yhd;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import org.jsoup.helper.StringUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.source.CharAtCode;
import com.source.CryptoUtils;
public class CaptachaUtils {
// private static final String DEFAULT_X_FP = "01b987a67614c907b9e2075cc4f8c19f";
public static String X_templete = "{\"fp\":\"01b987a67614c907b9e2075cc4f8c19f\"," +
"\"bowser\":\"{\\\"name\\\":\\\"iPhone\\\",\\\"version\\\":\\\"5.0\\\",\\\"webkit\\\":true,\\\"iphone\\\":true,\\\"ios\\\":true,\\\"osversion\\\":\\\"4.2.1\\\",\\\"mobile\\\":true,\\\"c\\\":true}\"," +
"\"kdc\":\"_kdc\",\"mdc\":\"_mdc\",\"mpt\":\"_mpt\",\"mp\":\"_mp_\"}";
public static String getCaptachaTokenByEnv(String envJson){
System.out.println("环境json为:"+envJson);
JSONObject json = JSON.parseObject(envJson);
if(json == null){
throw new RuntimeException("无法格式化环境数据:"+envJson);
}
String key = json.getString("k");
String mode = json.getString("m");
String offset = json.getString("o");
String encryType = json.getString("t");
hasNull(key,mode,offset,encryType);
String algorithm = getEncryptAlgorithm(key, encryType);
mode = decrypt(algorithm,key, mode);
offset = decrypt(algorithm,key, offset);
encryType = algorithm;
System.out.println("算法为:"+encryType);
System.out.println("模式为:"+mode);
String x = wrapperX(offset);//构造X的值
System.out.println("X值为:\n"+x);
x = encrypt(algorithm, key, x);
System.out.println("加密的中间X值为:\n"+x);
System.out.println("解密的中间X值为:\n"+decrypt(algorithm, key, x ));
return URLEncode(x);
}
//兼容js的encodeURIComponent
public static String URLEncode(String src){
String encode = null;
if(src==null|| src.isEmpty()){
return null;
}
try {
encode = URLEncoder.encode(src, "UTF-8")
.replaceAll("\\+", "%20")
.replaceAll("\\%21", "!")
.replaceAll("\\%27", "'")
.replaceAll("\\%28", "(")
.replaceAll("\\%29", ")")
.replaceAll("\\%7E", "~");
System.out.println("加密最终值为:"+encode);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
return encode;
}
public static String encrypt(String algorithm,String key,String v){
try {
return CryptoUtils.ECB(algorithm, key, v, CryptoUtils.MODE_ENCRYPT);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static String decrypt(String algorithm,String key,String v){
try {
return CryptoUtils.ECB(algorithm, key, v, CryptoUtils.MODE_DECRYPT);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static String getEncryptAlgorithm(String key,String type){
String[] tested = new String[]{CryptoUtils.ALGORITHM_AES,CryptoUtils.ALGORITHM_DES,CryptoUtils.ALGORITHM_TRIPLE};
String decrypt = null;
for (String alrogithm : tested) {
decrypt = CryptoUtils.ECBWithNoEXP(alrogithm, key, type, CryptoUtils.MODE_DECRYPT);
if(decrypt!=null){
break;
}
}
if(decrypt == null){
throw new RuntimeException("无法确定加密类型");
}
return decrypt;
}
public static String wrapperX(String offset){
System.out.println("offsetVal ="+offset);
String x = X_templete;
String kdc = CharAtCode.getStr("3", Integer.valueOf(offset));
String mpt = CharAtCode.getStr(offset, Integer.valueOf(offset));
String mdc = CharAtCode.getStr(offset, Integer.valueOf(offset));
String mp = CharAtCode.getStr("null", Integer.valueOf(offset));
x = x.replace("_kdc", kdc);
x = x.replace("_mdc", mdc);
x = x.replace("_mpt", mpt);
x = x.replace("_mp_", mp);
return x;
}
public static void hasNull(String... params){
for (String string : params) {
if(StringUtil.isBlank(string)){
throw new RuntimeException("存在为空的参数");
}
}
}
public static void main(String[] args) {
String x = getCaptachaTokenByEnv("{\"k\":\"QDsCzTZK\",\"m\":\"VE7O9YRo16I=\",\"o\":\"cDQDg4r2V14=\",\"t\":\"a5RXaswkhtI=\"}");
System.out.println("最终加密结果为:"+x);
}
}