HTMLUnit爬虫模拟登录Linkedin

public static void loginLinkedin(String URL) throws IOException,SQLException {

        try {

            WebClient webClient =new WebClient();//创建WebClient

            webClient.getOptions().setJavaScriptEnabled(false);

            webClient.getOptions().setCssEnabled(false);

            // 获取页面

            HtmlPage page = webClient.getPage("https://www.linkedin.com/uas/login");// 打开linkedin

 

            // 获得name"session_key"html元素

            HtmlElement usernameEle =page.getElementByName("session_key");

            // 获得id"session_password"html元素

            HtmlElement passwordEle = (HtmlElement)page.getElementById

 

            ("session_password-login");

            usernameEle.focus();// 设置输入焦点

            usernameEle.type("z_hao1975@hotmail.com");// 填写值

 

            passwordEle.focus();// 设置输入焦点

            passwordEle.type("XXXXX");// 填写值

            // 获得name"submit"的元素

            HtmlElement submitEle =page.getElementByName("signin");

            // 点击登陆

            page = submitEle.click();

            String result = page.asXml();// 获得click()后的html页面(包括标签)

            if (result.contains("Sign Out")){

                System.out.println("登陆成功");

                HtmlPage page2 =webClient.getPage(URL);

                String pageXml =page2.asXml();

                Document doc2 = Jsoup.parse(pageXml);

                Element background_text =doc2.getElementById("background");

                Elements name =doc2.getElementsByClass("full-name");

                if (background_text !=null) {

                    //System.out.println(background_text.toString());

 

                    // iMatch method to check if including sensitive content

                    booleaniMatch = iContentMatch(background_text.toString());

                    if (iMatch) {

                        // String res = downloadFromUrl(URL, getDir());

                        writefile(filePath("linkedin"),name.toString()+"<br>"+background_text.toString(),false);

                        //writefile(filePath("linkedin"),result, false);

                        Sub_ID++;

                    }

 

                    // write into database

                    /*

                     * str_SQL =

                     * " INSERT INTO waverly.tsearch(ID,cliuid_2,unedname,Sub_ID,SearchResult) VALUES (88,"

                     * + Integer.valueOf(cliuid_2) + ","+ "\"" +unedname +

                     * "\"" + "," + Sub_ID+ "," + "\"" +

                     *background_text.toString().replace("\"","\\\"") +"\"" +

                     * ")";

                     *

                     * try {Class.forName("com.mysql.jdbc.Driver");

                     * java.sql.Connectionconn =DriverManager.getConnection(

                     * "jdbc:mysql://localhost:3306/waverly?user=root&password=197544"

                     * ); java.sql.Statementstmt =conn.createStatement();

                     * stmt.executeUpdate(str_SQL); Sub_ID++; }catch

                     * (SQLException e) { e.printStackTrace(); }catch

                     * (ClassNotFoundException e) { //TODO Auto-generated catch

                     * block e.printStackTrace(); }

                     */

 

                    // 创建目录

                    // str_m=str_trim.substring(0,

                    // 4-cliuid_2.length())+cliuid_2;

                    // writefile("c:/1.txt",background_text.toString(), false);

                } else {

                    System.out.println("登陆失败");

                }

            }

        } catch (FailingHttpStatusCodeExceptione) {

            // TODO Auto-generated catch block

            e.printStackTrace();

        } catch (Exceptione) {

            // TODO Auto-generated catch block

            e.printStackTrace();

        }

    }


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值