最近做http://qqma.sinaapp.com的时候 用到一个淘宝的关键字抓取
然后写了一个简单的例子。
直接贴个代码。
package baidu;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
public class TaobaoSuggestion {
public static List<String> getSuggestion(String keyword, int limit)
throws HttpException, IOException, JSONException {
List<String> suggestion = new ArrayList<String>();
try{
HttpClient httpClient = new HttpClient();
// Create get method
//http://suggest.taobao.com/sug?extras=1&code=utf-8&callback=KISSY.Suggest.callback&q=hello
GetMethod getMethod = new GetMethod("http://suggest.taobao.com/sug?extras=1&code=utf-8&callback=KISSY.Suggest.callback&q="+ URLEncoder.encode(keyword, "utf-8"));
// Execute the method
httpClient.executeMethod(getMethod);
HttpConnectionManagerParams managerParams = httpClient.getHttpConnectionManager().getParams();
// 设置连接超时时间(单位毫秒)
managerParams.setConnectionTimeout(3000);
// 设置读数据超时时间(单位毫秒)
managerParams.setSoTimeout(2200);
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,new DefaultHttpMethodRetryHandler());
// If failed, return empty list
int statusCode = httpClient.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: " + getMethod.getStatusLine());
return suggestion;
}
// Get the response body
byte[] responseBody = getMethod.getResponseBody();
String response = new String(responseBody, "utf8");
return parseTaobaoJson(response);
}catch(Exception ex){
}
return suggestion;
}
public static List<String> parseTaobaoJson(String txt){
List<String> resultlist = new ArrayList<String>();
txt = txt.replace("KISSY.Suggest.callback({\"result\": [", "");
txt = txt.replace("]})", "");
String[] arr = txt.split("],");
for(String a:arr){
String[] arr2 = a.split(",");
if(arr2.length==2){
try{
resultlist.add(arr2[0].replace("[\"", "").replace("\"", "").trim());
}catch(Exception ex){
}
}
}
return resultlist;
}
public static void main(String[] args) {
try {
for (String sug : getSuggestion("hr", 10)) {
System.out.println(sug);
}
} catch (HttpException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (JSONException e) {
e.printStackTrace();
}
}
}
兔子的口袋 http://tzdkd.sinaapp.com