爬取校内网早操刷卡记录

httpClient方式

package com.jiuzhouchedai.qqhru;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.ProtocolException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;

import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ClientConnectionRequest;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

import com.jiuzhouchedai.crawlerWX.Record;

public class QDzaocao {



    public static void main(String[] args) {
        QDUtil2 qdUtil2=new QDUtil2();

        //爬虫爬到的数据
        String zaocaoData = qdUtil2.zaocaoToWeixin("2014094041","2014094041");
        System.out.println(zaocaoData);



        InputStream ins = null;

        try {




             HttpClient httpclient = new DefaultHttpClient();  
                HttpPost httpPost = new HttpPost("http://www.jiuzhouchedai.com/weixintest/zaocaoUpdate");  
                List<NameValuePair> nvps = new ArrayList<NameValuePair>();

                //httpPost.addHeader("Content-Type","text/html;charset=UTF-8");

                nvps.add(new BasicNameValuePair("zaocaoData",zaocaoData));  

                nvps.add(new BasicNameValuePair("student_id", "2014094041"));  
                httpPost.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8"));  
                httpclient.execute(httpPost);  
                httpclient.getConnectionManager().shutdown();  






//          
//          
//          
//          String xx = "2014094041";
//          String path = "http://www.jiuzhouchedai.com/weixintest/zaocaoUpdate?zaocaoData='xxxxxx'";
//          URL url = new URL(path);
//          HttpURLConnection connection = (HttpURLConnection) url.openConnection();
//          connection.setRequestMethod("GET");
            connection.setRequestProperty("Cookie",
                    "JSESSIONID=4191C1C6BD26564DE7D7D7EF63CBE7B3");
//          connection.setReadTimeout(5000);
//          connection.setRequestProperty("zaocaoData", xx);
//      //  connection.setRequestProperty("zaocaoData", zaocaoToWeixin);
//          //connection.setRequestProperty("zaocaoData", zaocaoToWeixin);
//          
//          //connection.addRequestProperty("zaocaoData", zaocaoToWeixin);
//          
//          
//          int code = connection.getResponseCode();
//          if (code == 200) {
//              ins = connection.getInputStream();
//              System.out.println("===========上传数据成功!");
//              
//          }
        } catch (Exception e) {
            e.printStackTrace();
        }

    }



    }




package com.jiuzhouchedai.qqhru;

import java.io.InputStream;
import java.util.List;

import org.w3c.dom.Element;

import com.jiuzhouchedai.crawlerWX.Record;

public class QDUtil2 {

    public   String zaocaoToWeixin(String name,String pwd) { 

        //这个是微信需要的结果
        String zaocaoResult = null;
        try {
            InputStream sreamHtml = QDUtil.getSreamHtml(name,pwd);

            String strHtml = QDUtil.streamToString(sreamHtml);
             System.out.println(strHtml);

            String zaocaoTable = QDUtil.zaocaoTable(strHtml);
            // System.out.println(zaocaoTable);

            InputStream zaocaoTableStream = QDUtil.stringToStream(zaocaoTable);

            Element rootNode = QDUtil.getRootNode(zaocaoTableStream);
            // System.out.println(rootNode.getNodeName());

            List<Record> zaocaoPoList = QDUtil.zaocaoToPoList(rootNode);


            zaocaoResult = QDUtil.zaocaoResult(zaocaoPoList);
        } catch (Exception e) {
            e.printStackTrace();
        }
        return zaocaoResult;
    }




}
package com.jiuzhouchedai.qqhru;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;

import com.jiuzhouchedai.crawlerWX.Record;
import com.jiuzhouchedai.crawlerWX.Test001;

public class QDUtil {

    public static String jssonid=null;
    /**
     * 爬到的网站写到流
     * @return
     * @throws Exception
     */
    public static InputStream getSreamHtml(String name,String pwd) throws Exception {
        InputStream inStream = null;


            HttpClient httpclient = new DefaultHttpClient();  
            HttpPost httpPost = new HttpPost("http://172.16.51.37/personQueryZC_personalDetailQuery.html");  
            List<NameValuePair> nvps = new ArrayList<NameValuePair>();


            nvps.add(new BasicNameValuePair("loginName","2014094041")); 
            nvps.add(new BasicNameValuePair("password","2014094041"));  

            httpPost.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8"));  
            HttpResponse response = httpclient.execute(httpPost);

            InputStream content = response.getEntity().getContent();



            httpclient.getConnectionManager().shutdown();
            return content;  


























//      if(jssonid==null){
//          
//          String loginName = "2014094041";
//          String password = "2014094041";
//          
//          URL url = new URL("http://172.16.51.37/personQueryZC_personalDetailQuery.html");
//          HttpURLConnection connection = (HttpURLConnection) url.openConnection();
//          connection.setRequestMethod("POST");
//          connection.setReadTimeout(5000);
//          
//          
//          connection.setDoOutput(true);// 是否输入参数
//
//          
//          StringBuffer params = new StringBuffer();
//          // 表单参数与get形式一样
//          params.append("loginName").append("=").append(loginName).append("&")
//                .append("password").append("=").append(password);
//          byte[] bypes = params.toString().getBytes();
//          connection.getOutputStream().write(bypes);// 输入参数
//          connection.connect();
//          
//          int code = connection.getResponseCode();
//          
//          
//          if (code == 200) {
//              
//              jssonid=connection.getHeaderField("Set-Cookie").split(";")[0];
//              System.out.println("jssonid:"+jssonid);
//                inStream=connection.getInputStream();
//              
//              
//          }
//          
//          
//      }
//      
//      return inStream;

//          String loginName = "2014094041";
//          String password = "2014094041";
//          
//          
//          URL url = new URL("http://172.16.51.37/personQueryZC_personalDetailQuery.html");
//          
//          
//          HttpURLConnection conn = (HttpURLConnection) url.openConnection();
//          conn.setRequestMethod("POST");// 提交模式
//          // conn.setConnectTimeout(10000);//连接超时 单位毫秒
//          // conn.setReadTimeout(2000);//读取超时 单位毫秒
//          conn.setDoOutput(true);// 是否输入参数
//
//          
//          StringBuffer params = new StringBuffer();
//          // 表单参数与get形式一样
//          params.append("loginName").append("=").append(loginName).append("&")
//                .append("password").append("=").append(password);
//          byte[] bypes = params.toString().getBytes();
//          conn.getOutputStream().write(bypes);// 输入参数
//          InputStream inStream=conn.getInputStream();
//          
//          
//          
//          
//          return inStream;



















//      if(jssonid==null){
//          
//          
//          URL url = new URL("http://172.16.51.37/personQueryZC_personalDetailQuery.html");
//          HttpURLConnection connection = (HttpURLConnection) url.openConnection();
//          connection.setRequestMethod("POST");
//          connection.setReadTimeout(5000);
//          connection.set
//          connection.connect();
//          int code = connection.getResponseCode();
//          if (code == 200) {
//              
//              jssonid=connection.getHeaderField("Set-Cookie").split(";")[0];
//              System.out.println("jssonid:"+jssonid);
//              
//          }
//          
//          
//      }
//      InputStream ins = null;
//      
//       
//      String path = "http://172.16.51.37/personQueryZC_personalDetailQuery.html";
//      URL url = new URL(path);
//      HttpURLConnection connection = (HttpURLConnection) url.openConnection();
//      connection.setRequestMethod("POST");
//      connection.setRequestProperty("Cookie",jssonid);
//      
//      connection.setReadTimeout(5000);
//      connection.setRequestProperty("loginName", name);
//      connection.setRequestProperty("password", pwd);
//
//      int code = connection.getResponseCode();
//      if (code == 200) {
//          ins = connection.getInputStream();
//          
//      }
//      return ins;












    }

    /**
     * 将流变成字符串
     * @param ins
     * @return
     * @throws Exception
     */
    public static String streamToString(InputStream ins) throws Exception {

        StringBuffer sb = new StringBuffer();
        byte[] data = new byte[1024];
        int len = 0;

        while ((len = ins.read(data)) != -1) {
            String ss = new String(data, 0, len, "UTF-8");
            sb.append(ss);

        }
        return sb.toString();

    }

    public static String zaocaoTable(String strHtml){
     String tableHtml;
     int begin=strHtml.indexOf("<table id=\"dataTable\"");
     int end=strHtml.indexOf("</table>", strHtml.indexOf("<table id=\"dataTable\""));
     tableHtml = strHtml.substring(begin, end)+"\n</table>";
     tableHtml = tableHtml.replaceAll("&nbsp;", "");

     return tableHtml;

    }



    /**
     * 将字符串变成流
     * @param str
     * @return
     * @throws Exception
     */
    public static InputStream stringToStream(String str) throws Exception {

        InputStream stream = null;
        if (str != null && !str.trim().equals("")) {
            stream = new ByteArrayInputStream(str.getBytes("UTF-8"));

        }
        return stream;

    }







    /**
     * 获取dom根节点
     * w3cdom解析
     * 
     * @param is
     * @return
     * @throws Exception 
     */
    public static Element getRootNode(InputStream is) throws Exception {
           //创建Document对象及读取XML文件
            DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
            //解析XML
            Document document=null;
            DocumentBuilder builder = builderFactory.newDocumentBuilder();
            document = builder.parse(is);
            Element rootElement = document.getDocumentElement(); //获取根节点

            return rootElement;
        }

    /**
     * 封装到POJO
     * @param rootNode
     * @return
     */
    public static   List<Record> zaocaoToPoList(Element rootNode){
     NodeList trs = rootNode.getElementsByTagName("tr");
     List<Record> listRecord=new ArrayList<Record>();
     for (int i = 0; i < trs.getLength(); i++) {
         NodeList tds = trs.item(i).getChildNodes();

         List<String> list=new ArrayList<String>();
         for (int j = 0; j < tds.getLength(); j++) {
             if("td".equals(tds.item(j).getNodeName())){
                String tdValue= tds.item(j).getTextContent().trim();
                list.add(tdValue);
             }
        }
         Record record=new Record();
         record.setId(list.get(0));
         record.setStudentID(list.get(1));
         record.setName(list.get(2));
         record.setIDNumber(list.get(3));
         record.setTime(list.get(4));
         record.setIsValid(list.get(5));
         listRecord.add(record);

     }
    return listRecord;


    }
    public static String zaocaoResult(List<Record> zaocaoPoList) throws Exception{
    StringBuffer result=new StringBuffer();



    for (int i = 0; i < zaocaoPoList.size(); i++) {
        //System.out.println("=======================");
        //result.append("=====================\n");
        //System.out.println("记录号:"+zaocaoPoList.get(i).getId());
        result.append("记录号:"+zaocaoPoList.get(i).getId()+"\n");
        //System.out.println("学号:"+zaocaoPoList.get(i).getStudentID());
        result.append("学号:"+zaocaoPoList.get(i).getStudentID()+"\n");
        //System.out.println("姓名:"+zaocaoPoList.get(i).getName());
        result.append("姓名:"+zaocaoPoList.get(i).getName()+"\n");
        //System.out.println("卡号:"+zaocaoPoList.get(i).getIDNumber());
        result.append("卡号:"+zaocaoPoList.get(i).getIDNumber()+"\n");
        //System.out.println("刷卡时间:"+zaocaoPoList.get(i).getTime());
        result.append("刷卡时间:"+zaocaoPoList.get(i).getTime()+"\n");
        //System.out.println("是否有效:"+zaocaoPoList.get(i).getIsValid());
        result.append("是否有效:"+zaocaoPoList.get(i).getIsValid()+"\n");
        //System.out.println("=======================");
    }
    return result.toString();

    }





    /**
     * 通过HttpURLConnection模拟post表单提交
     * 网上找的
     * @param path
     * @param params 例如"name=zhangsan&age=21"
     * @return
     * @throws Exception
     */
    public static InputStream sendPostRequestByForm(String path, String params) throws Exception{
        URL url = new URL(path);
        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
        conn.setRequestMethod("POST");// 提交模式
        // conn.setConnectTimeout(10000);//连接超时 单位毫秒
        // conn.setReadTimeout(2000);//读取超时 单位毫秒
        conn.setDoOutput(true);// 是否输入参数
        byte[] bypes = params.toString().getBytes();
        conn.getOutputStream().write(bypes);// 输入参数
        InputStream inStream=conn.getInputStream();

        return inStream;

    }








}

WebClient

//爬取校内网数据,并上传到微信服务器
import java.util.ArrayList;
import java.util.List;



import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.ScriptResult;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.DomNodeList;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class TestC {

    public static HtmlPage home_index(String name,String pwd ) throws Exception {

        WebClient webClient = new WebClient(BrowserVersion.CHROME);

        webClient.setJavaScriptTimeout(5000);
        webClient.getOptions().setCssEnabled(false);
        webClient.getOptions().setJavaScriptEnabled(true);
        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        webClient.setAjaxController(new NicelyResynchronizingAjaxController());
        // WebClient webClient = new WebClient();// 创建WebClient
        HtmlPage page = webClient
                .getPage("http://172.16.51.37/user_login.html"); // 打开百度

        HtmlElement usernameEle = page.getElementByName("loginName");
        HtmlElement passwordEle = (HtmlElement) page.getElementById("password");

        usernameEle.focus(); // 设置输入焦点
        usernameEle.type(name); // 填写值

        passwordEle.focus(); // 设置输入焦点
        passwordEle.type(pwd); // 填写值

        HtmlElement sub = (HtmlElement) page.getElementsByTagName("input")
                .item(2);

        page = sub.click();

        //System.out.println(page.asXml());

        return page;


    }




    public static void main(String[] args) {
        try {
            HtmlPage home_index = home_index("2015041001", "2015041001");

            StringBuffer sb = new StringBuffer();

            ScriptResult zaocao_scriptResult = home_index.executeJavaScript("javascript:f_openTab('tabid_personaldetailZC','早操刷卡明细','personQueryZC_personalDetailQuery.html')");
            HtmlPage  zaocao_html = (HtmlPage) zaocao_scriptResult.getNewPage();



            //System.out.println(zaocao_html.asXml());


            DomElement title_Tag = (DomElement) zaocao_html.getElementsByTagName("title").item(0);
            //System.out.println(textContent);
            sb.append("title_value:"+title_Tag.getTextContent());  //添加标题



            //zaocao_tr_html: 相当于   记录的集合
            DomNodeList<HtmlElement> zaocao_tr_html = zaocao_html.getElementById("dataTable").getElementsByTagName("tr");


            List<Record> record_list=new ArrayList<>();

            for (int i = 0; i < zaocao_tr_html.size(); i++) {

                //zaocao_td_html: 相当于  某一条记录的属性的集合
                DomNodeList<HtmlElement> zaocao_td_html=    ((DomElement) zaocao_tr_html.item(i)).getElementsByTagName("td");




                List<String> each_str=new ArrayList<>();
                for (int j = 0; j < zaocao_td_html.size(); j++) {

                    String  each= zaocao_td_html.item(j).getTextContent();
                    each_str.add(each.trim());


                }
                Record record=new Record();
                record.setId(each_str.get(0));
                record.setStudentID(each_str.get(1).substring(0, 10));
                record.setName(each_str.get(2).substring(0, each_str.get(2).length()-1));
                record.setIDNumber(each_str.get(3).substring(0, each_str.get(3).length()-1));
                record.setTime(each_str.get(4).substring(0, each_str.get(4).length()-1));
                record.setIsValid(each_str.get(5));

                record_list.add(record);


                //String textContent = zaocao_td_html.item(0).getTextContent();

            //System.out.println(textContent);


            }



            StringBuffer sbs=new StringBuffer();
            for (int i = 0; i < record_list.size(); i++) {

                Record record = record_list.get(i);

                System.out.println("========================");

                System.out.println("记录号:"+record.getId());
                System.out.println("学号:"+record.getStudentID());
                System.out.println("姓名:"+record.getName());
                System.out.println("卡号:"+record.getIDNumber());
                System.out.println("刷卡时间:"+record.getTime());
                System.out.println("是否有效:"+record.getIsValid());


                sbs.append("\n========================");
                sbs.append("\n记录号:"+record.getId());
                sbs.append("\n学号:"+record.getStudentID());
                sbs.append("\n姓名:"+record.getName());
                sbs.append("\n卡号:"+record.getIDNumber());
                sbs.append("\n刷卡时间:"+record.getTime());
                sbs.append("\n是否有效:"+record.getIsValid());






            }


            System.out.println(sbs.toString());









            //String zaocao_text = zaocao_html.asText();

//          String[] temp = zaocao_text.split("。");
//          String temp2=temp[1].split("页")[0];
//          




    //  System.out.println(temp2);


            //String replaceAll = temp2.replaceAll(" ", "--");



        //System.out.println(replaceAll);

            //String[] split = replaceAll.split("--");



            //System.out.println("====================");
        //System.out.println(split[1].trim());
        //System.out.println(split[2].trim());
        //System.out.println(split[3].trim());

//          DomNodeList<DomElement> span = page.getElementsByTagName("span");
//          
//          HtmlElement zaocao = (HtmlElement) span.item(3);
//          
//          page = zaocao.click();


//          DomNodeList<DomElement> a = page.getElementsByTagName("a");
//          
//          System.out.println(a.item(2).getAttributes().getNamedItem("href"));
//          
//          
//          HtmlElement namedItem = a.item(2).getAttributes().getNamedItem("href");
//          HtmlAnchor aa=page.
//          
//          page=aa.click();
//          


            //System.out.println(page);


            //System.out.println(page.asXml());



             HttpClient httpclient = new DefaultHttpClient();  
                HttpPost httpPost = new HttpPost("http://www.jiuzhouchedai.com/weixintest/zaocaoUpdate");  
                List<NameValuePair> nvps = new ArrayList<NameValuePair>();

                //httpPost.addHeader("Content-Type","text/html;charset=UTF-8");

                nvps.add(new BasicNameValuePair("zaocaoData",sbs.toString()));  

                nvps.add(new BasicNameValuePair("student_id", "2014094041"));  
                httpPost.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8"));  
                httpclient.execute(httpPost);  
                httpclient.getConnectionManager().shutdown();  


        } catch (Exception e) {
            e.printStackTrace();
        }





    }

}


//==================================================

public class Record {

    private String id;

    private String studentID;


    private String name;

    private String IDNumber;

    private String time;

    private String  isValid;

    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getStudentID() {
        return studentID;
    }

    public void setStudentID(String studentID) {
        this.studentID = studentID;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getIDNumber() {
        return IDNumber;
    }

    public void setIDNumber(String iDNumber) {
        IDNumber = iDNumber;
    }

    public String getTime() {
        return time;
    }

    public void setTime(String time) {
        this.time = time;
    }

    public String getIsValid() {
        return isValid;
    }

    public void setIsValid(String isValid) {
        this.isValid = isValid;
    }





}


//微信服务器通过  servlet中写  request.getParamer("xxx") 获取数据。

火狐插件 selenium IDE

实现自动化爬虫,
自动生成代码,支持生成为java代码
http://www.cnblogs.com/fnng/archive/2011/10/23/2222157.html

https://addons.mozilla.org/en-US/firefox/addon/selenium-ide-button/

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值