package proxy;
import java.io.IOException;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.httpclient.methods.PostMethod;
public class bolg_Login {
/**
* @param args
*/
public static void login() {
System.out.println("登陆开始");
String url = "http://tianying-fly.blog.hexun.com/5558132_d.html";
HttpClient client = new HttpClient();
PostMethod method = new PostMethod(url);
//
method.addRequestHeader("Content-Type","application/x-www-form-urlencoded;charset=gb2312");//设置传输字符集,如果需要传输中文这需要设置为中文字符集
try {
// NameValuePair pairA = new NameValuePair("j_username","018911");//设置要登录的用户名
//
// NameValuePair pairB = new NameValuePair("j_password", "1");
NameValuePair[] pair = new NameValuePair[] { }; // 设置要登录得密码
method.setRequestBody(pair);
//设置cookis(目的是每次访问都在同一个session中)
//client.getState().addCookies(cookis);
client.executeMethod(method);//发出请求
if (method.getStatusCode() == HttpStatus.SC_OK) {
System.out.println("method.getStatusCode()"+method.getStatusCode()+",HttpStatus.SC_OK="+HttpStatus.SC_OK);
System.out.println(method.getResponseBodyAsString());
}
//
} catch (URIException e) {
// e.printStackTrace();
} catch (IOException e) {
// e.printStackTrace();
}
System.out.println("登陆结束");
}
}
//特别注意的是:
1.在传参数得时候需要设置请求传送内容得字符集method.addRequestHeader("Content-Type","application/x-www-form-urlencoded;charset=gb2312");不然服务器接收到得中文可能会是乱码,
2.对于重定向的问题需要根据返回的状态进行处理,
3.是session问题
这就需要cooke与session进行合作,如果每次访问要共享同一个session,则需要每次设置client.getState().addCookies(cookis);其中cookis为上次或者上次以前在同意session期中返回得cookie,及cookis = client.getState().getCookies();
4.返回得结果可能需要转码,及method.getResponseBodyAsString()内容可能需要转码!
运用httpclient需要对http协议有一些深入的了解,同时需要明白session与cookie的关系。
1.想下载远程URL地址的内容。可以使用httpclient现在整理一下相关的代码:
而且解决中文乱码问题
方法一:流转码
public String convertStreamToString(InputStream is) throws UnsupportedEncodingException {
BufferedReader reader = new BufferedReader(new InputStreamReader(is,"gbk"));
StringBuilder sb = new StringBuilder();
String line = null;
try {
while ((line = reader.readLine()) != null) { sb.append(line + "\n");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return sb.toString();
}
//下载内容
private String urlContent(String urlString) throws HttpException, IOException {
HttpClient client = new HttpClient();
GetMethod get = new GetMethod("http://www.tianya.cn/publicforum/articleslist/0/no20.shtml"); client.executeMethod(get); System.out.print(get.getResponseCharSet()); InputStream iStream = get.getResponseBodyAsStream();
String contentString = convertStreamToString(iStream);
get.releaseConnection();
return contentString;
}
==========利用httpclient访问受保护的资源地址=============
模拟登录
1、session方式
package com.crawinfo.httpclient;
import java.io.IOException;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
public class HttpClientTest {
/**
* @param args
* @throws IOException
* @throws HttpException
*/
public static void main(String[] args) throws HttpException, IOException {
HttpClient httpclient=new HttpClient();//创建一个客户端,类似打开一个浏览器
GetMethod getMethod=new GetMethod("受保护的地址");
PostMethod postMethod = new PostMethod("登录url");
NameValuePair[] postData = new NameValuePair[2];
postData[0] = new NameValuePair("loginName", "***");
postData[1] = new NameValuePair("loginPswd", "**");
postMethod.addParameters(postData);
int statusCode=httpclient.executeMethod(postMethod);//回车——出拳!
statusCode= httpclient.executeMethod(getMethod);
System.out.println("response1=" + postMethod.getResponseBodyAsString());//察看拳头命中情况,可以获得的东西还有很多,比如head, cookies等等
System.out.println("response2=" + getMethod.getResponseBodyAsString());//察看拳头命中情况,可以获得的东西还有很多,比如head, cookies等等
getMethod.releaseConnection();
postMethod.releaseConnection();//释放,记得收拳哦
}
}
二、cookie方式
package com.crawinfo.rencai;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.methods.GetMethod;
public class CrawTest {
static Cookie[] cookies=new Cookie[2];
public static void login() throws Exception {
cookies[0] = new Cookie("抓取信息网址", "SessionId",
"0", "/", -1, false);
cookies[1] = new Cookie("抓取信息网址", "otherproperty",
"xxx", "/", -1, false);
}
public static String getRes(String path)throws Exception{
String res=null;
HttpClient client=new HttpClient();
HttpMethod method=new GetMethod(path);
client.getState().addCookies(cookies);
client.executeMethod(method);
if(method.getStatusCode()==200){
res=method.getResponseBodyAsString();
cookies=client.getState().getCookies();
}
method.releaseConnection();
return res;
}
public static void main(String[] args) throws Exception {
CrawTest.login();
String info = CrawTest.getRes("抓取信息网址");
System.out.println("info="+info);
}
}
import java.io.IOException;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.httpclient.methods.PostMethod;
public class bolg_Login {
/**
* @param args
*/
public static void login() {
System.out.println("登陆开始");
String url = "http://tianying-fly.blog.hexun.com/5558132_d.html";
HttpClient client = new HttpClient();
PostMethod method = new PostMethod(url);
//
method.addRequestHeader("Content-Type","application/x-www-form-urlencoded;charset=gb2312");//设置传输字符集,如果需要传输中文这需要设置为中文字符集
try {
// NameValuePair pairA = new NameValuePair("j_username","018911");//设置要登录的用户名
//
// NameValuePair pairB = new NameValuePair("j_password", "1");
NameValuePair[] pair = new NameValuePair[] { }; // 设置要登录得密码
method.setRequestBody(pair);
//设置cookis(目的是每次访问都在同一个session中)
//client.getState().addCookies(cookis);
client.executeMethod(method);//发出请求
if (method.getStatusCode() == HttpStatus.SC_OK) {
System.out.println("method.getStatusCode()"+method.getStatusCode()+",HttpStatus.SC_OK="+HttpStatus.SC_OK);
System.out.println(method.getResponseBodyAsString());
}
//
} catch (URIException e) {
// e.printStackTrace();
} catch (IOException e) {
// e.printStackTrace();
}
System.out.println("登陆结束");
}
}
//特别注意的是:
1.在传参数得时候需要设置请求传送内容得字符集method.addRequestHeader("Content-Type","application/x-www-form-urlencoded;charset=gb2312");不然服务器接收到得中文可能会是乱码,
2.对于重定向的问题需要根据返回的状态进行处理,
3.是session问题
这就需要cooke与session进行合作,如果每次访问要共享同一个session,则需要每次设置client.getState().addCookies(cookis);其中cookis为上次或者上次以前在同意session期中返回得cookie,及cookis = client.getState().getCookies();
4.返回得结果可能需要转码,及method.getResponseBodyAsString()内容可能需要转码!
运用httpclient需要对http协议有一些深入的了解,同时需要明白session与cookie的关系。
1.想下载远程URL地址的内容。可以使用httpclient现在整理一下相关的代码:
而且解决中文乱码问题
方法一:流转码
public String convertStreamToString(InputStream is) throws UnsupportedEncodingException {
BufferedReader reader = new BufferedReader(new InputStreamReader(is,"gbk"));
StringBuilder sb = new StringBuilder();
String line = null;
try {
while ((line = reader.readLine()) != null) { sb.append(line + "\n");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return sb.toString();
}
//下载内容
private String urlContent(String urlString) throws HttpException, IOException {
HttpClient client = new HttpClient();
GetMethod get = new GetMethod("http://www.tianya.cn/publicforum/articleslist/0/no20.shtml"); client.executeMethod(get); System.out.print(get.getResponseCharSet()); InputStream iStream = get.getResponseBodyAsStream();
String contentString = convertStreamToString(iStream);
get.releaseConnection();
return contentString;
}
==========利用httpclient访问受保护的资源地址=============
模拟登录
1、session方式
package com.crawinfo.httpclient;
import java.io.IOException;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
public class HttpClientTest {
/**
* @param args
* @throws IOException
* @throws HttpException
*/
public static void main(String[] args) throws HttpException, IOException {
HttpClient httpclient=new HttpClient();//创建一个客户端,类似打开一个浏览器
GetMethod getMethod=new GetMethod("受保护的地址");
PostMethod postMethod = new PostMethod("登录url");
NameValuePair[] postData = new NameValuePair[2];
postData[0] = new NameValuePair("loginName", "***");
postData[1] = new NameValuePair("loginPswd", "**");
postMethod.addParameters(postData);
int statusCode=httpclient.executeMethod(postMethod);//回车——出拳!
statusCode= httpclient.executeMethod(getMethod);
System.out.println("response1=" + postMethod.getResponseBodyAsString());//察看拳头命中情况,可以获得的东西还有很多,比如head, cookies等等
System.out.println("response2=" + getMethod.getResponseBodyAsString());//察看拳头命中情况,可以获得的东西还有很多,比如head, cookies等等
getMethod.releaseConnection();
postMethod.releaseConnection();//释放,记得收拳哦
}
}
二、cookie方式
package com.crawinfo.rencai;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.methods.GetMethod;
public class CrawTest {
static Cookie[] cookies=new Cookie[2];
public static void login() throws Exception {
cookies[0] = new Cookie("抓取信息网址", "SessionId",
"0", "/", -1, false);
cookies[1] = new Cookie("抓取信息网址", "otherproperty",
"xxx", "/", -1, false);
}
public static String getRes(String path)throws Exception{
String res=null;
HttpClient client=new HttpClient();
HttpMethod method=new GetMethod(path);
client.getState().addCookies(cookies);
client.executeMethod(method);
if(method.getStatusCode()==200){
res=method.getResponseBodyAsString();
cookies=client.getState().getCookies();
}
method.releaseConnection();
return res;
}
public static void main(String[] args) throws Exception {
CrawTest.login();
String info = CrawTest.getRes("抓取信息网址");
System.out.println("info="+info);
}
}