先看代码,方便以后复制,就不作为code格式了。
package com.amazon.commons;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
public class HttpClientHelper {
public static String RESPONSE_TEXT="responseText";
public static String CHARSET="charset";
public static void main(String[] args) {
System.out.println(get("http://www.baidu.com/").get(HttpClientHelper.RESPONSE_TEXT));
}
public static Map<String, String> get(String url) {
Map<String, String> result=new HashMap<String, String>();
// Create an instance of HttpClient.
HttpClient client = new HttpClient();
// Create a method instance.
GetMethod method = new GetMethod(url);
// Provide custom retry handler is necessary
method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler(3, false));
try {
// Execute the method.
int statusCode = client.executeMethod(method);
String charset=method.getResponseCharSet();
System.out.println(charset);
method.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, charset);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: " + method.getStatusLine());
}
// Read the response body.
/**
* Note: This will cause the entire response body to be buffered in memory.
* A malicious server may easily exhaust all the VM memory.
* It is strongly recommended, to use getResponseAsStream if the content
* length of the response is unknown or resonably large.
*/
byte[] responseBody = method.getResponseBody();
// Deal with the response.
// Use caution: ensure correct character encoding and is not binary
// data
result.put(HttpClientHelper.RESPONSE_TEXT, new String(responseBody,charset));
result.put(HttpClientHelper.CHARSET, charset);
} catch (HttpException e) {
System.err.println("Fatal protocol violation: " + e.getMessage());
e.printStackTrace();
} catch (IOException e) {
System.err.println("Fatal transport error: " + e.getMessage());
e.printStackTrace();
} finally {
// Release the connection.
method.releaseConnection();
return result;
}
}
}
查看控制台我们可以看到,百度的页面编码设置是gb2312.
核心代码:
这里我之所以把charset作为一个返回参数是有原因的,因为在action或者servlet调用完这个之后如果只返回一个responseText,那么response在打印输出的时候是不知道这段text的编码的,
action代码:
response.setCharacterEncoding(result.get(HttpClientHelper.CHARSET));这句话不设置就可能导致乱码。
package com.amazon.commons;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
public class HttpClientHelper {
public static String RESPONSE_TEXT="responseText";
public static String CHARSET="charset";
public static void main(String[] args) {
System.out.println(get("http://www.baidu.com/").get(HttpClientHelper.RESPONSE_TEXT));
}
public static Map<String, String> get(String url) {
Map<String, String> result=new HashMap<String, String>();
// Create an instance of HttpClient.
HttpClient client = new HttpClient();
// Create a method instance.
GetMethod method = new GetMethod(url);
// Provide custom retry handler is necessary
method.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler(3, false));
try {
// Execute the method.
int statusCode = client.executeMethod(method);
String charset=method.getResponseCharSet();
System.out.println(charset);
method.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, charset);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: " + method.getStatusLine());
}
// Read the response body.
/**
* Note: This will cause the entire response body to be buffered in memory.
* A malicious server may easily exhaust all the VM memory.
* It is strongly recommended, to use getResponseAsStream if the content
* length of the response is unknown or resonably large.
*/
byte[] responseBody = method.getResponseBody();
// Deal with the response.
// Use caution: ensure correct character encoding and is not binary
// data
result.put(HttpClientHelper.RESPONSE_TEXT, new String(responseBody,charset));
result.put(HttpClientHelper.CHARSET, charset);
} catch (HttpException e) {
System.err.println("Fatal protocol violation: " + e.getMessage());
e.printStackTrace();
} catch (IOException e) {
System.err.println("Fatal transport error: " + e.getMessage());
e.printStackTrace();
} finally {
// Release the connection.
method.releaseConnection();
return result;
}
}
}
查看控制台我们可以看到,百度的页面编码设置是gb2312.
核心代码:
int statusCode = client.executeMethod(method);
String charset=method.getResponseCharSet();
System.out.println(charset);
method.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, charset);
result.put(HttpClientHelper.RESPONSE_TEXT, new String(responseBody,charset));
result.put(HttpClientHelper.CHARSET, charset);
这里我之所以把charset作为一个返回参数是有原因的,因为在action或者servlet调用完这个之后如果只返回一个responseText,那么response在打印输出的时候是不知道这段text的编码的,
action代码:
try {
HttpServletResponse response= ServletActionContext.getResponse();
response.setCharacterEncoding(result.get(HttpClientHelper.CHARSET));
PrintWriter writer=response.getWriter();
writer.write(result.get(HttpClientHelper.RESPONSE_TEXT));
writer.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
response.setCharacterEncoding(result.get(HttpClientHelper.CHARSET));这句话不设置就可能导致乱码。