重页面抓取数据Java-Jsoup
package dex.gu.web.helper;
import
java.io
.IOException;
import
java.io
.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.MessageDigestSpi;
import java.security.NoSuchAlgorithmException;
import java.util.Base64;
import java.util.Base64.Encoder;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Connection;
import org.jsoup.Connection.Response;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Main {
private static Map config;
private static String username="admin";
private static String userpwd="ad2312";
private static Encoder base64Encoder=Base64.getEncoder();
private static Pattern ptnKey=Pattern.compile("key=\'([0-9A-Z]+)\'");
public static String bytesToHexString(byte[] src){
StringBuilder stringBuilder = new StringBuilder("");
if (src == null || src.length <= 0) {
return null;
}
for (int i = 0; i
int v = src[i] & 0xFF;
String hv = Integer.toHexString(v);
if (hv.length()
stringBuilder.append(0);
}
stringBuilder.append(hv);
}
return stringBuilder.toString();
}
public static void main(String[] args) throws IOException, NoSuchAlgorithmException {
Logger log = LogManager.getLogger(Main.class);
//
// Document doc = Jsoup.connect("
url
")
// .data("username", "admin")
// .data("userpwd","a45611")
// .post();
//
//
Response resp = Jsoup.connect("
url
").execute();
Map cookies = resp.cookies();
Document loginDoc=Jsoup.parseBodyFragment(resp.body());
Element scriptDom = loginDoc.select("script").get(1);
String keyLine=scriptDom.data().split("\n")[1];
Matcher matcher = ptnKey.matcher(keyLine);
matcher.find();
String key=matcher.group(1);
Document docLoginDo = Jsoup.connect("
url
")
.cookies(cookies)
.data("username", username)
.data("psw",encodePassword(log, key))
.post();
Document doc = Jsoup.connect("
url
")
.cookies(cookies)
.get();
Element tableDom = doc.select("table").get(0);
Elements trDoms = tableDom.select("tbody>tr");
StringBuilder sb=new StringBuilder();
for(Element tr:trDoms){
for(Element td:tr.select("td")){
sb.append(td.html()+"\t");
}
sb.append("\n");
}
log.error(sb.toString());
/*
String key="50E8EAB7243BF34A513D26E51969362F";
String md5str = encodePassword(log, key);
String encodedPwd = md5str;
log.error(md5str);
*/
//log.error(doc.toString());
log.error("done!");
}
private static String encodePassword(Logger log, String key)
throws NoSuchAlgorithmException, UnsupportedEncodingException {
MessageDigest md5diges = MessageDigest.getInstance("MD5");
byte[] md5bytes = md5diges.digest(userpwd.getBytes("utf-8"));
String md5str = bytesToHexString(md5bytes);
log.error(md5str);
md5str=md5str+"_"+key;
md5bytes=md5diges.digest(md5str.getBytes("utf-8"));
md5str=bytesToHexString(md5bytes);
return md5str;
}
}