httpClient方式
package com.jiuzhouchedai.qqhru;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.ProtocolException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ClientConnectionRequest;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import com.jiuzhouchedai.crawlerWX.Record;
public class QDzaocao {
public static void main(String[] args) {
QDUtil2 qdUtil2=new QDUtil2();
//爬虫爬到的数据
String zaocaoData = qdUtil2.zaocaoToWeixin("2014094041","2014094041");
System.out.println(zaocaoData);
InputStream ins = null;
try {
HttpClient httpclient = new DefaultHttpClient();
HttpPost httpPost = new HttpPost("http://www.jiuzhouchedai.com/weixintest/zaocaoUpdate");
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
//httpPost.addHeader("Content-Type","text/html;charset=UTF-8");
nvps.add(new BasicNameValuePair("zaocaoData",zaocaoData));
nvps.add(new BasicNameValuePair("student_id", "2014094041"));
httpPost.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8"));
httpclient.execute(httpPost);
httpclient.getConnectionManager().shutdown();
//
//
//
// String xx = "2014094041";
// String path = "http://www.jiuzhouchedai.com/weixintest/zaocaoUpdate?zaocaoData='xxxxxx'";
// URL url = new URL(path);
// HttpURLConnection connection = (HttpURLConnection) url.openConnection();
// connection.setRequestMethod("GET");
connection.setRequestProperty("Cookie",
"JSESSIONID=4191C1C6BD26564DE7D7D7EF63CBE7B3");
// connection.setReadTimeout(5000);
// connection.setRequestProperty("zaocaoData", xx);
// // connection.setRequestProperty("zaocaoData", zaocaoToWeixin);
// //connection.setRequestProperty("zaocaoData", zaocaoToWeixin);
//
// //connection.addRequestProperty("zaocaoData", zaocaoToWeixin);
//
//
// int code = connection.getResponseCode();
// if (code == 200) {
// ins = connection.getInputStream();
// System.out.println("===========上传数据成功!");
//
// }
} catch (Exception e) {
e.printStackTrace();
}
}
}
package com.jiuzhouchedai.qqhru;
import java.io.InputStream;
import java.util.List;
import org.w3c.dom.Element;
import com.jiuzhouchedai.crawlerWX.Record;
public class QDUtil2 {
public String zaocaoToWeixin(String name,String pwd) {
//这个是微信需要的结果
String zaocaoResult = null;
try {
InputStream sreamHtml = QDUtil.getSreamHtml(name,pwd);
String strHtml = QDUtil.streamToString(sreamHtml);
System.out.println(strHtml);
String zaocaoTable = QDUtil.zaocaoTable(strHtml);
// System.out.println(zaocaoTable);
InputStream zaocaoTableStream = QDUtil.stringToStream(zaocaoTable);
Element rootNode = QDUtil.getRootNode(zaocaoTableStream);
// System.out.println(rootNode.getNodeName());
List<Record> zaocaoPoList = QDUtil.zaocaoToPoList(rootNode);
zaocaoResult = QDUtil.zaocaoResult(zaocaoPoList);
} catch (Exception e) {
e.printStackTrace();
}
return zaocaoResult;
}
}
package com.jiuzhouchedai.qqhru;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import com.jiuzhouchedai.crawlerWX.Record;
import com.jiuzhouchedai.crawlerWX.Test001;
public class QDUtil {
public static String jssonid=null;
/**
* 爬到的网站写到流
* @return
* @throws Exception
*/
public static InputStream getSreamHtml(String name,String pwd) throws Exception {
InputStream inStream = null;
HttpClient httpclient = new DefaultHttpClient();
HttpPost httpPost = new HttpPost("http://172.16.51.37/personQueryZC_personalDetailQuery.html");
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair("loginName","2014094041"));
nvps.add(new BasicNameValuePair("password","2014094041"));
httpPost.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8"));
HttpResponse response = httpclient.execute(httpPost);
InputStream content = response.getEntity().getContent();
httpclient.getConnectionManager().shutdown();
return content;
// if(jssonid==null){
//
// String loginName = "2014094041";
// String password = "2014094041";
//
// URL url = new URL("http://172.16.51.37/personQueryZC_personalDetailQuery.html");
// HttpURLConnection connection = (HttpURLConnection) url.openConnection();
// connection.setRequestMethod("POST");
// connection.setReadTimeout(5000);
//
//
// connection.setDoOutput(true);// 是否输入参数
//
//
// StringBuffer params = new StringBuffer();
// // 表单参数与get形式一样
// params.append("loginName").append("=").append(loginName).append("&")
// .append("password").append("=").append(password);
// byte[] bypes = params.toString().getBytes();
// connection.getOutputStream().write(bypes);// 输入参数
// connection.connect();
//
// int code = connection.getResponseCode();
//
//
// if (code == 200) {
//
// jssonid=connection.getHeaderField("Set-Cookie").split(";")[0];
// System.out.println("jssonid:"+jssonid);
// inStream=connection.getInputStream();
//
//
// }
//
//
// }
//
// return inStream;
// String loginName = "2014094041";
// String password = "2014094041";
//
//
// URL url = new URL("http://172.16.51.37/personQueryZC_personalDetailQuery.html");
//
//
// HttpURLConnection conn = (HttpURLConnection) url.openConnection();
// conn.setRequestMethod("POST");// 提交模式
// // conn.setConnectTimeout(10000);//连接超时 单位毫秒
// // conn.setReadTimeout(2000);//读取超时 单位毫秒
// conn.setDoOutput(true);// 是否输入参数
//
//
// StringBuffer params = new StringBuffer();
// // 表单参数与get形式一样
// params.append("loginName").append("=").append(loginName).append("&")
// .append("password").append("=").append(password);
// byte[] bypes = params.toString().getBytes();
// conn.getOutputStream().write(bypes);// 输入参数
// InputStream inStream=conn.getInputStream();
//
//
//
//
// return inStream;
// if(jssonid==null){
//
//
// URL url = new URL("http://172.16.51.37/personQueryZC_personalDetailQuery.html");
// HttpURLConnection connection = (HttpURLConnection) url.openConnection();
// connection.setRequestMethod("POST");
// connection.setReadTimeout(5000);
// connection.set
// connection.connect();
// int code = connection.getResponseCode();
// if (code == 200) {
//
// jssonid=connection.getHeaderField("Set-Cookie").split(";")[0];
// System.out.println("jssonid:"+jssonid);
//
// }
//
//
// }
// InputStream ins = null;
//
//
// String path = "http://172.16.51.37/personQueryZC_personalDetailQuery.html";
// URL url = new URL(path);
// HttpURLConnection connection = (HttpURLConnection) url.openConnection();
// connection.setRequestMethod("POST");
// connection.setRequestProperty("Cookie",jssonid);
//
// connection.setReadTimeout(5000);
// connection.setRequestProperty("loginName", name);
// connection.setRequestProperty("password", pwd);
//
// int code = connection.getResponseCode();
// if (code == 200) {
// ins = connection.getInputStream();
//
// }
// return ins;
}
/**
* 将流变成字符串
* @param ins
* @return
* @throws Exception
*/
public static String streamToString(InputStream ins) throws Exception {
StringBuffer sb = new StringBuffer();
byte[] data = new byte[1024];
int len = 0;
while ((len = ins.read(data)) != -1) {
String ss = new String(data, 0, len, "UTF-8");
sb.append(ss);
}
return sb.toString();
}
public static String zaocaoTable(String strHtml){
String tableHtml;
int begin=strHtml.indexOf("<table id=\"dataTable\"");
int end=strHtml.indexOf("</table>", strHtml.indexOf("<table id=\"dataTable\""));
tableHtml = strHtml.substring(begin, end)+"\n</table>";
tableHtml = tableHtml.replaceAll(" ", "");
return tableHtml;
}
/**
* 将字符串变成流
* @param str
* @return
* @throws Exception
*/
public static InputStream stringToStream(String str) throws Exception {
InputStream stream = null;
if (str != null && !str.trim().equals("")) {
stream = new ByteArrayInputStream(str.getBytes("UTF-8"));
}
return stream;
}
/**
* 获取dom根节点
* w3cdom解析
*
* @param is
* @return
* @throws Exception
*/
public static Element getRootNode(InputStream is) throws Exception {
//创建Document对象及读取XML文件
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
//解析XML
Document document=null;
DocumentBuilder builder = builderFactory.newDocumentBuilder();
document = builder.parse(is);
Element rootElement = document.getDocumentElement(); //获取根节点
return rootElement;
}
/**
* 封装到POJO
* @param rootNode
* @return
*/
public static List<Record> zaocaoToPoList(Element rootNode){
NodeList trs = rootNode.getElementsByTagName("tr");
List<Record> listRecord=new ArrayList<Record>();
for (int i = 0; i < trs.getLength(); i++) {
NodeList tds = trs.item(i).getChildNodes();
List<String> list=new ArrayList<String>();
for (int j = 0; j < tds.getLength(); j++) {
if("td".equals(tds.item(j).getNodeName())){
String tdValue= tds.item(j).getTextContent().trim();
list.add(tdValue);
}
}
Record record=new Record();
record.setId(list.get(0));
record.setStudentID(list.get(1));
record.setName(list.get(2));
record.setIDNumber(list.get(3));
record.setTime(list.get(4));
record.setIsValid(list.get(5));
listRecord.add(record);
}
return listRecord;
}
public static String zaocaoResult(List<Record> zaocaoPoList) throws Exception{
StringBuffer result=new StringBuffer();
for (int i = 0; i < zaocaoPoList.size(); i++) {
//System.out.println("=======================");
//result.append("=====================\n");
//System.out.println("记录号:"+zaocaoPoList.get(i).getId());
result.append("记录号:"+zaocaoPoList.get(i).getId()+"\n");
//System.out.println("学号:"+zaocaoPoList.get(i).getStudentID());
result.append("学号:"+zaocaoPoList.get(i).getStudentID()+"\n");
//System.out.println("姓名:"+zaocaoPoList.get(i).getName());
result.append("姓名:"+zaocaoPoList.get(i).getName()+"\n");
//System.out.println("卡号:"+zaocaoPoList.get(i).getIDNumber());
result.append("卡号:"+zaocaoPoList.get(i).getIDNumber()+"\n");
//System.out.println("刷卡时间:"+zaocaoPoList.get(i).getTime());
result.append("刷卡时间:"+zaocaoPoList.get(i).getTime()+"\n");
//System.out.println("是否有效:"+zaocaoPoList.get(i).getIsValid());
result.append("是否有效:"+zaocaoPoList.get(i).getIsValid()+"\n");
//System.out.println("=======================");
}
return result.toString();
}
/**
* 通过HttpURLConnection模拟post表单提交
* 网上找的
* @param path
* @param params 例如"name=zhangsan&age=21"
* @return
* @throws Exception
*/
public static InputStream sendPostRequestByForm(String path, String params) throws Exception{
URL url = new URL(path);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setRequestMethod("POST");// 提交模式
// conn.setConnectTimeout(10000);//连接超时 单位毫秒
// conn.setReadTimeout(2000);//读取超时 单位毫秒
conn.setDoOutput(true);// 是否输入参数
byte[] bypes = params.toString().getBytes();
conn.getOutputStream().write(bypes);// 输入参数
InputStream inStream=conn.getInputStream();
return inStream;
}
}
WebClient
//爬取校内网数据,并上传到微信服务器
import java.util.ArrayList;
import java.util.List;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.ScriptResult;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.DomNodeList;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
public class TestC {
public static HtmlPage home_index(String name,String pwd ) throws Exception {
WebClient webClient = new WebClient(BrowserVersion.CHROME);
webClient.setJavaScriptTimeout(5000);
webClient.getOptions().setCssEnabled(false);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
// WebClient webClient = new WebClient();// 创建WebClient
HtmlPage page = webClient
.getPage("http://172.16.51.37/user_login.html"); // 打开百度
HtmlElement usernameEle = page.getElementByName("loginName");
HtmlElement passwordEle = (HtmlElement) page.getElementById("password");
usernameEle.focus(); // 设置输入焦点
usernameEle.type(name); // 填写值
passwordEle.focus(); // 设置输入焦点
passwordEle.type(pwd); // 填写值
HtmlElement sub = (HtmlElement) page.getElementsByTagName("input")
.item(2);
page = sub.click();
//System.out.println(page.asXml());
return page;
}
public static void main(String[] args) {
try {
HtmlPage home_index = home_index("2015041001", "2015041001");
StringBuffer sb = new StringBuffer();
ScriptResult zaocao_scriptResult = home_index.executeJavaScript("javascript:f_openTab('tabid_personaldetailZC','早操刷卡明细','personQueryZC_personalDetailQuery.html')");
HtmlPage zaocao_html = (HtmlPage) zaocao_scriptResult.getNewPage();
//System.out.println(zaocao_html.asXml());
DomElement title_Tag = (DomElement) zaocao_html.getElementsByTagName("title").item(0);
//System.out.println(textContent);
sb.append("title_value:"+title_Tag.getTextContent()); //添加标题
//zaocao_tr_html: 相当于 记录的集合
DomNodeList<HtmlElement> zaocao_tr_html = zaocao_html.getElementById("dataTable").getElementsByTagName("tr");
List<Record> record_list=new ArrayList<>();
for (int i = 0; i < zaocao_tr_html.size(); i++) {
//zaocao_td_html: 相当于 某一条记录的属性的集合
DomNodeList<HtmlElement> zaocao_td_html= ((DomElement) zaocao_tr_html.item(i)).getElementsByTagName("td");
List<String> each_str=new ArrayList<>();
for (int j = 0; j < zaocao_td_html.size(); j++) {
String each= zaocao_td_html.item(j).getTextContent();
each_str.add(each.trim());
}
Record record=new Record();
record.setId(each_str.get(0));
record.setStudentID(each_str.get(1).substring(0, 10));
record.setName(each_str.get(2).substring(0, each_str.get(2).length()-1));
record.setIDNumber(each_str.get(3).substring(0, each_str.get(3).length()-1));
record.setTime(each_str.get(4).substring(0, each_str.get(4).length()-1));
record.setIsValid(each_str.get(5));
record_list.add(record);
//String textContent = zaocao_td_html.item(0).getTextContent();
//System.out.println(textContent);
}
StringBuffer sbs=new StringBuffer();
for (int i = 0; i < record_list.size(); i++) {
Record record = record_list.get(i);
System.out.println("========================");
System.out.println("记录号:"+record.getId());
System.out.println("学号:"+record.getStudentID());
System.out.println("姓名:"+record.getName());
System.out.println("卡号:"+record.getIDNumber());
System.out.println("刷卡时间:"+record.getTime());
System.out.println("是否有效:"+record.getIsValid());
sbs.append("\n========================");
sbs.append("\n记录号:"+record.getId());
sbs.append("\n学号:"+record.getStudentID());
sbs.append("\n姓名:"+record.getName());
sbs.append("\n卡号:"+record.getIDNumber());
sbs.append("\n刷卡时间:"+record.getTime());
sbs.append("\n是否有效:"+record.getIsValid());
}
System.out.println(sbs.toString());
//String zaocao_text = zaocao_html.asText();
// String[] temp = zaocao_text.split("。");
// String temp2=temp[1].split("页")[0];
//
// System.out.println(temp2);
//String replaceAll = temp2.replaceAll(" ", "--");
//System.out.println(replaceAll);
//String[] split = replaceAll.split("--");
//System.out.println("====================");
//System.out.println(split[1].trim());
//System.out.println(split[2].trim());
//System.out.println(split[3].trim());
// DomNodeList<DomElement> span = page.getElementsByTagName("span");
//
// HtmlElement zaocao = (HtmlElement) span.item(3);
//
// page = zaocao.click();
// DomNodeList<DomElement> a = page.getElementsByTagName("a");
//
// System.out.println(a.item(2).getAttributes().getNamedItem("href"));
//
//
// HtmlElement namedItem = a.item(2).getAttributes().getNamedItem("href");
// HtmlAnchor aa=page.
//
// page=aa.click();
//
//System.out.println(page);
//System.out.println(page.asXml());
HttpClient httpclient = new DefaultHttpClient();
HttpPost httpPost = new HttpPost("http://www.jiuzhouchedai.com/weixintest/zaocaoUpdate");
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
//httpPost.addHeader("Content-Type","text/html;charset=UTF-8");
nvps.add(new BasicNameValuePair("zaocaoData",sbs.toString()));
nvps.add(new BasicNameValuePair("student_id", "2014094041"));
httpPost.setEntity(new UrlEncodedFormEntity(nvps,"UTF-8"));
httpclient.execute(httpPost);
httpclient.getConnectionManager().shutdown();
} catch (Exception e) {
e.printStackTrace();
}
}
}
//==================================================
public class Record {
private String id;
private String studentID;
private String name;
private String IDNumber;
private String time;
private String isValid;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getStudentID() {
return studentID;
}
public void setStudentID(String studentID) {
this.studentID = studentID;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getIDNumber() {
return IDNumber;
}
public void setIDNumber(String iDNumber) {
IDNumber = iDNumber;
}
public String getTime() {
return time;
}
public void setTime(String time) {
this.time = time;
}
public String getIsValid() {
return isValid;
}
public void setIsValid(String isValid) {
this.isValid = isValid;
}
}
//微信服务器通过 servlet中写 request.getParamer("xxx") 获取数据。
火狐插件 selenium IDE
实现自动化爬虫,
自动生成代码,支持生成为java代码
http://www.cnblogs.com/fnng/archive/2011/10/23/2222157.html
https://addons.mozilla.org/en-US/firefox/addon/selenium-ide-button/