此代码仅供个人学习、研究之用,请勿用于商业用途
import com.alibaba.fastjson.JSONObject;
import com.gargoylesoftware.htmlunit.*;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlInput;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.util.NameValuePair;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import javax.imageio.ImageIO;
import javax.net.ssl.SSLHandshakeException;
import javax.servlet.http.HttpServletRequest;
import java.awt.image.BufferedImage;
import java.io.File;
import java.net.URL;
import java.util.*;
/**
* 学信网
*
*/
@Service("XueXingWangService")
public class XueXingWangServiceImpl implements XueXingWangService {
Logger log = LoggerFactory.getLogger(getClass());
private Map<String, String> heard = new HashMap<>();
private List<NameValuePair> reqParam = new ArrayList();
private String codeSrc = "https://account.chsi.com.cn";
private String lt = null;
private String execution = null;
private Document document = null;
private String _eventId = null;
private String uuid = null;
/**
* 获取学信网首页并获取登录验证码
*
* @return
* @throws Exception
*/
@Override
public String fetchLoginCode(HttpServletRequest request) throws Exception {
JSONObject json = new JSONObject();
try {
WebClient webClient = new WebClient(BrowserVersion.CHROME);
webClient.getOptions().setCssEnabled(true);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getCookieManager().setCookiesEnabled(true);
webClient.getOptions().setUseInsecureSSL(true);
HtmlPage page = webClient.getPage("https://account.chsi.com.cn/passport/login");
if (page.asText().contains("登录_学信网")) {
document = Jsoup.parse(page.asXml());
lt = document.select("input[name=lt]").attr("value");
execution = document.select("input[name=execution]").attr("value");
_eventId = document.select("input[name=_eventId]").attr("value");
heard.clear();
heard.put("Content-Type", "application/x-www-form-urlencoded");
heard.put("Host", "account.chsi.com.cn");
heard.put("Referer", "https://account.chsi.com.cn/passport/login");
reqParam.clear();
reqParam.add(new NameValuePair("username", "10086"));
reqParam.add(new NameValuePair("password", "10086"));
reqParam.add(new NameValuePair("captcha", "10086"));
reqParam.add(new NameValuePair("submit", "登录"));
reqParam.add(new NameValuePair("lt", lt));
reqParam.add(new NameValuePair("execution", execution));
reqParam.add(new NameValuePair("_eventId", _eventId));
WebRequest webRequest = new WebRequest(new URL("https://account.chsi.com.cn/passport/login"));
webRequest.setAdditionalHeaders(heard);
webRequest.setRequestParameters(reqParam);
webRequest.setHttpMethod(HttpMethod.POST);
HtmlPage indexPage = webClient.getPage(webRequest);
if (indexPage.asText().contains("图片验证码输入有误")) {
json = getLoginCode(indexPage, request, webClient);
} else {
json.put("msg", "获取失败");
json.put("code", "0001");
json.put("data", "error");
}
}
} catch (Exception e) {
json.put("msg", "获取失败");
json.put("code", "0001");
json.put("data", "error");
}
return JSONObject.toJSONString(json);
}
@Override
public String loginXueXing(String data, String url, String img, HttpServletRequest request) throws Exception {
JSONObject returnJson = new JSONObject();
String jsonData = RsaUtil.privateDecrypt(data, RsaUtil.getPrivateKey(RsaUtil.PRIVATE_KEY));
JSONObject jsonObject = JSONObject.parseObject(jsonData);
WebClient webClient = (WebClient) request.getSession().getAttribute(uuid);
if (webClient == null) {
returnJson.put("msg", "非法请求");
returnJson.put("code", "1000");
returnJson.put("data", "error");
return JSONObject.toJSONString(returnJson);
}
String result = checkParameter(jsonObject);
if ("success".equals(result)) {
WebRequest webRequest = new WebRequest(new URL("https://account.chsi.com.cn/passport/login?service=https%3A%2F%2Fmy.chsi.com.cn%2Farchive%2Fj_spring_cas_security_check"));
heard.clear();
heard.put("Content-Type", "application/x-www-form-urlencoded");
heard.put("Host", "account.chsi.com.cn");
heard.put("Referer", "https://account.chsi.com.cn/passport/login");
reqParam.clear();
reqParam.add(new NameValuePair("username", jsonObject.getString("username")));
reqParam.add(new NameValuePair("password", jsonObject.getString("password")));
reqParam.add(new NameValuePair("captcha", jsonObject.getString("code")));
reqParam.add(new NameValuePair("submit", "登录"));
reqParam.add(new NameValuePair("lt", lt));
reqParam.add(new NameValuePair("execution", execution));
reqParam.add(new NameValuePair("_eventId", _eventId));
webRequest.setAdditionalHeaders(heard);
webRequest.setRequestParameters(reqParam);
webRequest.setHttpMethod(HttpMethod.POST);
try {
Page page = webClient.getPage(webRequest);
System.out.println(page);
if (null!=page){
Document doc = Jsoup.parse(page.getWebResponse().getContentAsString());
Element select = doc.select("div.ct_input errors").first();
returnJson.put("msg", select);
returnJson.put("code", "0001");
returnJson.put("data", "error");
return JSONObject.toJSONString(returnJson);
}
} catch (FailingHttpStatusCodeException e) {
} catch (Exception e) {
e.printStackTrace();
}
JSONObject crawlerResult = crawlerUserInfo(webClient, url, img, jsonObject.getString("custId"));
return JSONObject.toJSONString(crawlerResult);
} else {
return result;
}
}
private JSONObject crawlerUserInfo(WebClient webClient, String url, String img, String custId) {
JSONObject json = new JSONObject();
JSONObject jsonData = new JSONObject();
JSONObject errorMsg = new JSONObject();
JSONObject userInfo = getUserInfo(webClient);
JSONObject educationInfo = getEducationInfo(webClient, url, img, custId);
if (userInfo != null) {
jsonData.put("userInfo", userInfo);
json.put("msg", "采集成功");
json.put("code", "0000");
} else {
json.put("msg", "登录失败,请检查账号密码是否有误");
json.put("code", "9999");
errorMsg.put("error", "用户信息采集失败");
jsonData.put("userInfo", errorMsg);
}
if (educationInfo != null) {
jsonData.put("educationInfo", educationInfo);
} else {
errorMsg.put("error", "未采集到您的学历信息,请您登录官网查看是否绑定相关学历信息!");
jsonData.put("educationInfo", errorMsg);
}
json.put("custId",custId);
json.put("data", jsonData);
return json;
}
private String checkParameter(JSONObject jsonObject) {
JSONObject returnJson = new JSONObject();
String username = jsonObject.getString("username");
if (username == null) {
returnJson.put("msg", "username不能为空");
returnJson.put("code", "1000");
returnJson.put("data", "error");
return JSONObject.toJSONString(returnJson);
}
String password = jsonObject.getString("password");
if (password == null) {
returnJson.put("msg", "password不能为空");
returnJson.put("code", "1000");
returnJson.put("data", "error");
return JSONObject.toJSONString(returnJson);
}
String code = jsonObject.getString("code");
if (code == null) {
returnJson.put("msg", "code不能为空");
returnJson.put("code", "1000");
returnJson.put("data", "error");
return JSONObject.toJSONString(returnJson);
}
String custId = jsonObject.getString("custId");
if (custId == null) {
returnJson.put("msg", "custId不能为空");
returnJson.put("code", "1000");
returnJson.put("data", "error");
return JSONObject.toJSONString(returnJson);
}
return "success";
}
public JSONObject getLoginCode(HtmlPage indexPage, HttpServletRequest request, WebClient webClient) {
JSONObject json = new JSONObject();
JSONObject urljson = new JSONObject();
lt = null;
execution = null;
_eventId = null;
try {
HtmlForm fm = indexPage.getForms().get(0);
HtmlInput captcha = fm.getInputByName("captcha");
HtmlPage page = captcha.click();
Document doc = Jsoup.parse(page.asXml());
lt = doc.select("input[name=lt]").attr("value");
execution = doc.select("input[name=execution]").attr("value");
_eventId = doc.select("input[name=_eventId]").attr("value");
String codeUrl = codeSrc + doc.select("img").get(1).attr("src");
System.out.println(codeUrl);
uuid = UUID.randomUUID().toString().replaceAll("-", "");
urljson.put("url", codeUrl);
json.put("msg", "获取图片验证码成功");
json.put("code", "0000");
json.put("data", urljson);
request.getSession().setAttribute(uuid, webClient);
} catch (Exception e) {
e.printStackTrace();
log.info("获取页面表单异常");
json.put("msg", "获取图片验证码失败");
json.put("code", "0001");
json.put("data", "null");
}
log.info("获取验证码标签完成");
return json;
}
public JSONObject getUserInfo(WebClient webClient) {
JSONObject json = new JSONObject();
try {
HtmlPage page = webClient.getPage("https://account.chsi.com.cn/account/account!show");
if (page.asXml().contains("学信网账号-账号信息")) {
document = Jsoup.parse(page.asXml());
json.put("access", document.select("strong").get(0).text().replaceAll(" ", ""));
json.put("name", document.select("strong").get(2).text().trim().replaceAll(" ", "").trim());
json.put("country", document.select("strong").get(3).text().trim());
json.put("phone", document.select("strong").get(5).text().trim());
try {
json.put("email", document.select("strong").get(6).text());
} catch (IndexOutOfBoundsException e) {
json.put("email", "用户暂未绑定邮箱");
log.info("该用户暂未绑定邮箱");
}
} else {
log.info("获取用户基本信息异常");
}
} catch (Exception e) {
e.printStackTrace();
}
return json;
}
public JSONObject getEducationInfo(WebClient webClient, String url, String imgUrl, String custId) {
JSONObject json = new JSONObject();
try {
Thread.sleep(3000);
HtmlPage page = webClient.getPage("https://my.chsi.com.cn/archive/gdjy/xj/show.action");
document = Jsoup.parse(page.asXml());
Elements img = document.select("img");
String eduCationUrl ="";
try {
eduCationUrl = img.get(5).attr("src");
}catch (IndexOutOfBoundsException e){
return null;
}
UnexpectedPage imgPage = webClient.getPage(eduCationUrl);
BufferedImage bufferedImage = ImageIO.read(imgPage.getInputStream());
String fileName = System.currentTimeMillis() + ".png";
ImageIO.write(bufferedImage, "png", new File(imgUrl + "\\" + fileName));
String xlurl = "http://" + (url + fileName).replaceAll("\\\\", "/");
System.out.println("url" + imgUrl + "\\" + fileName);
System.out.println("xlurl:" + xlurl);
WebRequest webRequest = new WebRequest(new URL("http://27.45.147.9:8002/xuexin"), HttpMethod.POST);
webRequest.setAdditionalHeader("Content-type", "application/json; charset=utf-8");
webRequest.setRequestBody("{\"path\":\"" + xlurl + "\",\"custId\":\"" + custId + "\"}");
UnexpectedPage textPage = webClient.getPage(webRequest);
json = JSONObject.parseObject(textPage.getWebResponse().getContentAsString());
return json;
} catch (SSLHandshakeException e) {
System.out.println("PKIX路径\n构建失败:安全证书错误");
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}