闲暇来,想通过Http请求一直获取某个网站的公告文章列表,搜索和服务费有关的公告。
刚开始,在get请求时一直返回被aqyun网管过滤掉,非法的请求。
后来尝试加上了User-Agent: Apache-HttpClient/4.2.6 (java 1.5),就可以获取返回的json数据。
猜测只要消息头部有User-Agent就能通过。
以下是我使用Java写的代码,需要的可以参考下。
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.http.NameValuePair;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.CollectionUtils;
public final class HttpUtils {
private static final Logger logger = LoggerFactory
.getLogger(HttpUtils.class);
private final static String ENCODER_UTF8 = "UTF-8";
private HttpUtils(){}
/**
* get请求
* 测试发现对某些网站请求需要携带User-Agent头部,值校验不确定有没有
* 以下两个都能够通过:
* User-Agent: Apache-HttpClient/4.2.6 (java 1.5)
* User-Agent:Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.108 Safari/537.36
* @param url
* @param pairs
* @return
*/
public static String get(String url, List<NameValuePair> pairs){
CloseableHttpClient httpClient = HttpClients.custom().build();
HttpGet get = new HttpGet(url);
StringBuilder sb = new StringBuilder();
if (!CollectionUtils.isEmpty(pairs)) {
try {
for (NameValuePair pair : pairs) {
if (sb.length() > 0) {
sb.append("&");
}
sb.append(pair.getName()).append("=").append(URLEncoder.encode(pair.getValue(), ENCODER_UTF8));
}
} catch (UnsupportedEncodingException e) {
logger.error("encoder utf-8 not found", e);
}
}
if (sb.length() > 0){
if (url.contains("?")) {
url = url + "&" + sb.toString();
} else {
url = url + "?" + sb.toString();
}
}
get.addHeader("User-Agent", "Apache-HttpClient/4.2.6 (java 1.5)");
CloseableHttpResponse resp = null;
try {
//logger.info("Debug request url: {}", url);
resp = httpClient.execute(get);
String respContent = EntityUtils.toString(resp.getEntity(), ENCODER_UTF8);
//logger.info("response content: {}", respContent);
return respContent;
} catch (IOException e) {
logger.error("Http get request error", e);
} finally {
IOUtils.closeQuietly(resp);
if (get != null) {
get.releaseConnection();
}
IOUtils.closeQuietly(httpClient);
}
return null;
}
}