JAVA模拟某信网登录信息采集

此代码仅供个人学习、研究之用,请勿用于商业用途


import com.alibaba.fastjson.JSONObject;
import com.gargoylesoftware.htmlunit.*;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlInput;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.util.NameValuePair;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

import javax.imageio.ImageIO;
import javax.net.ssl.SSLHandshakeException;
import javax.servlet.http.HttpServletRequest;
import java.awt.image.BufferedImage;
import java.io.File;
import java.net.URL;
import java.util.*;

/**
 * 学信网
 *
 */
@Service("XueXingWangService")
public class XueXingWangServiceImpl implements XueXingWangService {

    Logger log = LoggerFactory.getLogger(getClass());
    private Map<String, String> heard = new HashMap<>();
    private List<NameValuePair> reqParam = new ArrayList();
    private String codeSrc = "https://account.chsi.com.cn";
    private String lt = null;
    private String execution = null;
    private Document document = null;
    private String _eventId = null;
    private String uuid = null;

    /**
     * 获取学信网首页并获取登录验证码
     *
     * @return
     * @throws Exception
     */
    @Override
    public String fetchLoginCode(HttpServletRequest request) throws Exception {
        JSONObject json = new JSONObject();
        try {
            WebClient webClient = new WebClient(BrowserVersion.CHROME);
            webClient.getOptions().setCssEnabled(true);
            webClient.getOptions().setJavaScriptEnabled(true);
            webClient.getOptions().setThrowExceptionOnScriptError(false);
            webClient.getCookieManager().setCookiesEnabled(true);
            webClient.getOptions().setUseInsecureSSL(true);
            HtmlPage page = webClient.getPage("https://account.chsi.com.cn/passport/login");
            if (page.asText().contains("登录_学信网")) {
                document = Jsoup.parse(page.asXml());
                lt = document.select("input[name=lt]").attr("value");
                execution = document.select("input[name=execution]").attr("value");
                _eventId = document.select("input[name=_eventId]").attr("value");
                heard.clear();
                heard.put("Content-Type", "application/x-www-form-urlencoded");
                heard.put("Host", "account.chsi.com.cn");
                heard.put("Referer", "https://account.chsi.com.cn/passport/login");
                reqParam.clear();
                reqParam.add(new NameValuePair("username", "10086"));
                reqParam.add(new NameValuePair("password", "10086"));
                reqParam.add(new NameValuePair("captcha", "10086"));
                reqParam.add(new NameValuePair("submit", "登录"));
                reqParam.add(new NameValuePair("lt", lt));
                reqParam.add(new NameValuePair("execution", execution));
                reqParam.add(new NameValuePair("_eventId", _eventId));
                WebRequest webRequest = new WebRequest(new URL("https://account.chsi.com.cn/passport/login"));
                webRequest.setAdditionalHeaders(heard);
                webRequest.setRequestParameters(reqParam);
                webRequest.setHttpMethod(HttpMethod.POST);
                HtmlPage indexPage = webClient.getPage(webRequest);
                if (indexPage.asText().contains("图片验证码输入有误")) {
                    json = getLoginCode(indexPage, request, webClient);
                } else {
                    json.put("msg", "获取失败");
                    json.put("code", "0001");
                    json.put("data", "error");
                }
            }
        } catch (Exception e) {
            json.put("msg", "获取失败");
            json.put("code", "0001");
            json.put("data", "error");
        }
        return JSONObject.toJSONString(json);
    }

    @Override
    public String loginXueXing(String data, String url, String img, HttpServletRequest request) throws Exception {
        JSONObject returnJson = new JSONObject();
        String jsonData = RsaUtil.privateDecrypt(data, RsaUtil.getPrivateKey(RsaUtil.PRIVATE_KEY));
        JSONObject jsonObject = JSONObject.parseObject(jsonData);
        WebClient webClient = (WebClient) request.getSession().getAttribute(uuid);
        if (webClient == null) {
            returnJson.put("msg", "非法请求");
            returnJson.put("code", "1000");
            returnJson.put("data", "error");
            return JSONObject.toJSONString(returnJson);
        }
        String result = checkParameter(jsonObject);
        if ("success".equals(result)) {
            WebRequest webRequest = new WebRequest(new URL("https://account.chsi.com.cn/passport/login?service=https%3A%2F%2Fmy.chsi.com.cn%2Farchive%2Fj_spring_cas_security_check"));
            heard.clear();
            heard.put("Content-Type", "application/x-www-form-urlencoded");
            heard.put("Host", "account.chsi.com.cn");
            heard.put("Referer", "https://account.chsi.com.cn/passport/login");
            reqParam.clear();
            reqParam.add(new NameValuePair("username", jsonObject.getString("username")));
            reqParam.add(new NameValuePair("password", jsonObject.getString("password")));
            reqParam.add(new NameValuePair("captcha", jsonObject.getString("code")));
            reqParam.add(new NameValuePair("submit", "登录"));
            reqParam.add(new NameValuePair("lt", lt));
            reqParam.add(new NameValuePair("execution", execution));
            reqParam.add(new NameValuePair("_eventId", _eventId));
            webRequest.setAdditionalHeaders(heard);
            webRequest.setRequestParameters(reqParam);
            webRequest.setHttpMethod(HttpMethod.POST);
            try {
                Page page = webClient.getPage(webRequest);
                System.out.println(page);
                if (null!=page){
                    Document doc = Jsoup.parse(page.getWebResponse().getContentAsString());
                    Element select = doc.select("div.ct_input errors").first();
                    returnJson.put("msg", select);
                    returnJson.put("code", "0001");
                    returnJson.put("data", "error");
                    return JSONObject.toJSONString(returnJson);
                }
            } catch (FailingHttpStatusCodeException e) {
            } catch (Exception e) {
                e.printStackTrace();
            }
            JSONObject crawlerResult = crawlerUserInfo(webClient, url, img, jsonObject.getString("custId"));
            return JSONObject.toJSONString(crawlerResult);
        } else {
            return result;
        }
    }

    private JSONObject crawlerUserInfo(WebClient webClient, String url, String img, String custId) {
        JSONObject json = new JSONObject();
        JSONObject jsonData = new JSONObject();
        JSONObject errorMsg = new JSONObject();
        JSONObject userInfo = getUserInfo(webClient);
        JSONObject educationInfo = getEducationInfo(webClient, url, img, custId);
        if (userInfo != null) {
            jsonData.put("userInfo", userInfo);
            json.put("msg", "采集成功");
            json.put("code", "0000");
        } else {
            json.put("msg", "登录失败,请检查账号密码是否有误");
            json.put("code", "9999");
            errorMsg.put("error", "用户信息采集失败");
            jsonData.put("userInfo", errorMsg);
        }
        if (educationInfo != null) {
            jsonData.put("educationInfo", educationInfo);
        } else {
            errorMsg.put("error", "未采集到您的学历信息,请您登录官网查看是否绑定相关学历信息!");
            jsonData.put("educationInfo", errorMsg);
        }
        json.put("custId",custId);
        json.put("data", jsonData);
        return json;
    }

    private String checkParameter(JSONObject jsonObject) {
        JSONObject returnJson = new JSONObject();
        String username = jsonObject.getString("username");
        if (username == null) {
            returnJson.put("msg", "username不能为空");
            returnJson.put("code", "1000");
            returnJson.put("data", "error");
            return JSONObject.toJSONString(returnJson);
        }
        String password = jsonObject.getString("password");
        if (password == null) {
            returnJson.put("msg", "password不能为空");
            returnJson.put("code", "1000");
            returnJson.put("data", "error");
            return JSONObject.toJSONString(returnJson);
        }
        String code = jsonObject.getString("code");
        if (code == null) {
            returnJson.put("msg", "code不能为空");
            returnJson.put("code", "1000");
            returnJson.put("data", "error");
            return JSONObject.toJSONString(returnJson);
        }
        String custId = jsonObject.getString("custId");
        if (custId == null) {
            returnJson.put("msg", "custId不能为空");
            returnJson.put("code", "1000");
            returnJson.put("data", "error");
            return JSONObject.toJSONString(returnJson);
        }
        return "success";
    }

    public JSONObject getLoginCode(HtmlPage indexPage, HttpServletRequest request, WebClient webClient) {
        JSONObject json = new JSONObject();
        JSONObject urljson = new JSONObject();
        lt = null;
        execution = null;
        _eventId = null;
        try {
            HtmlForm fm = indexPage.getForms().get(0);
            HtmlInput captcha = fm.getInputByName("captcha");
            HtmlPage page = captcha.click();
            Document doc = Jsoup.parse(page.asXml());
            lt = doc.select("input[name=lt]").attr("value");
            execution = doc.select("input[name=execution]").attr("value");
            _eventId = doc.select("input[name=_eventId]").attr("value");
            String codeUrl = codeSrc + doc.select("img").get(1).attr("src");
            System.out.println(codeUrl);
            uuid = UUID.randomUUID().toString().replaceAll("-", "");
            urljson.put("url", codeUrl);
            json.put("msg", "获取图片验证码成功");
            json.put("code", "0000");
            json.put("data", urljson);
            request.getSession().setAttribute(uuid, webClient);
        } catch (Exception e) {
            e.printStackTrace();
            log.info("获取页面表单异常");
            json.put("msg", "获取图片验证码失败");
            json.put("code", "0001");
            json.put("data", "null");
        }
        log.info("获取验证码标签完成");
        return json;
    }

    public JSONObject getUserInfo(WebClient webClient) {
        JSONObject json = new JSONObject();
        try {
            HtmlPage page = webClient.getPage("https://account.chsi.com.cn/account/account!show");
            if (page.asXml().contains("学信网账号-账号信息")) {
                document = Jsoup.parse(page.asXml());
                json.put("access", document.select("strong").get(0).text().replaceAll(" ", ""));
                json.put("name", document.select("strong").get(2).text().trim().replaceAll(" ", "").trim());
                json.put("country", document.select("strong").get(3).text().trim());
                json.put("phone", document.select("strong").get(5).text().trim());
                try {
                    json.put("email", document.select("strong").get(6).text());
                } catch (IndexOutOfBoundsException e) {
                    json.put("email", "用户暂未绑定邮箱");
                    log.info("该用户暂未绑定邮箱");
                }
            } else {
                log.info("获取用户基本信息异常");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return json;
    }

    public JSONObject getEducationInfo(WebClient webClient, String url, String imgUrl, String custId) {
        JSONObject json = new JSONObject();
        try {
            Thread.sleep(3000);
            HtmlPage page = webClient.getPage("https://my.chsi.com.cn/archive/gdjy/xj/show.action");
            document = Jsoup.parse(page.asXml());
            Elements img = document.select("img");
            String eduCationUrl ="";
            try {
                eduCationUrl = img.get(5).attr("src");
            }catch (IndexOutOfBoundsException e){
                return null;
            }

            UnexpectedPage imgPage = webClient.getPage(eduCationUrl);
            BufferedImage bufferedImage = ImageIO.read(imgPage.getInputStream());
            String fileName = System.currentTimeMillis() + ".png";
            ImageIO.write(bufferedImage, "png", new File(imgUrl + "\\" + fileName));
            String xlurl = "http://" + (url + fileName).replaceAll("\\\\", "/");

            System.out.println("url" + imgUrl + "\\" + fileName);
            System.out.println("xlurl:" + xlurl);

            WebRequest webRequest = new WebRequest(new URL("http://27.45.147.9:8002/xuexin"), HttpMethod.POST);
            webRequest.setAdditionalHeader("Content-type", "application/json; charset=utf-8");
            webRequest.setRequestBody("{\"path\":\"" + xlurl + "\",\"custId\":\"" + custId + "\"}");
            UnexpectedPage textPage = webClient.getPage(webRequest);
            json = JSONObject.parseObject(textPage.getWebResponse().getContentAsString());

            return json;
        } catch (SSLHandshakeException e) {
            System.out.println("PKIX路径\n构建失败:安全证书错误");
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值