[原创] Google Custom Search & Yahoo Boss Search | Web Search API 使用

[b]引用本文时,请标明本文地址[/b]

话外音:前几天上网找资料时,发现自己写的文章被人转载了,心中暗喜。
可是我去发现,该转载者并未标明我的文章原文出处,不标明这个也罢了,
可是转载都竟然连标题都不改(我的文章标题已经写了【原创】二字)。
哎,哪怕你把"【原创】"两字给删了再转载也行啊!
嘿嘿!话外音就说到这吧!开始正文。

Google和Yahoo的新Web Search API已经升级了一段时间了,最近正好
有项目要做相关的API升级,因此就对新的API进行了调查,并写了perl和java
的例子。现将java例子的主要代码与大家分享下。

[b]API简介:[/b]
Google升级后的Web Search叫"Google Custom Search"(简称CSE),
Yahoo升级后的Web Search叫"Yahoo! Search BOSS",

二者都采用REST的方式进行调用,并且都支持JSON格式的返回结果。
以下例子,就是对Google CSE的基本使用加以说明,并且处理JSON形式的返回结果。

由于两者都是RESTful的,因此Yahoo的例子这里就不贴出来了
(因为可以很方便的根据下面的例子,改成Yahoo的),大家可能参考下
Google的例子,自己改写成支持"Yahoo! Search BOSS"。

本例子使用了httpclient4 + google cse api + json
httpclient4的使用入门,大家可以参考我写的如下文章:
[url]http://yhz61010.iteye.com/blog/654678[/url]

[b]本类功能说明:[/b]
1. 按指定关键字进行结果检索。
2. 查找指定的关键字或URL在Google上的排名。

以下是主类的详细source:

/**
* Google Custom Search Engine
*
* @author Michael Leo
* @version 1.0 2011/01/24
*/
public class GoogleCSE {
protected static final Class<GoogleCSE> clazz = GoogleCSE.class;

private String cseKey = "Your custom search id";

private String apiConsoleKey = "Your console api id";

private String keyword = "Google";

private String language;

private int num = 10;

private int start = 1;

private String uri;

public List<RankBean> getRank(String[] targetWords, String[] targetUrls,
Map<String, Object> result) {
if (targetWords == null && targetUrls == null) {
Log.log(LogLevel.DEBUG, clazz,
"Both of target words and urls are null.");
return null;
}
ResponseBean res = (ResponseBean) result.get("response");
int startIndex = res.getQueries().getRequest().get(0).getStartIndex();

List<RankBean> rank = new ArrayList<RankBean>();

@SuppressWarnings("unchecked")
List<ItemsBean> list = (List<ItemsBean>) result.get("items");
String link = null;
String title = null;
String snippet = null;
for (int i = 0; i < list.size(); i++) {
link = list.get(i).getLink();
title = list.get(i).getTitle();
snippet = list.get(i).getSnippet();
for (int j = 0; targetWords != null && j < targetWords.length; j++) {
if (title.indexOf(targetWords[j]) > -1
|| snippet.indexOf(targetWords[j]) > -1) {
RankBean ranking = new RankBean();
ranking.setRank(startIndex + i);
try {
BeanUtils.copyProperties(ranking, list.get(i));
} catch (Exception e) {
Log.log(LogLevel.DEBUG, clazz,
"Can't copy properties: targetWords");
}
rank.add(ranking);
}
}
for (int k = 0; targetUrls != null && k < targetUrls.length; k++) {
if (link.indexOf(targetUrls[k]) > -1) {
RankBean ranking = new RankBean();
ranking.setRank(startIndex + i);
try {
BeanUtils.copyProperties(ranking, list.get(i));
} catch (Exception e) {
Log.log(LogLevel.DEBUG, clazz,
"Can't copy properties: targetUrls");
}
rank.add(ranking);
}
}
}

return RemoveDuplication.removeDuplication(rank, "link");
}

public NextPageBean nextPageInfo(Map<String, Object> result) {
ResponseBean res = (ResponseBean) result.get("response");
if (res.getQueries().getNextPage() == null) {
return null;
}
return res.getQueries().getNextPage().get(0);
}

public Map<String, Object> execute() throws Exception {
Map<String, Object> result = new HashMap<String, Object>();

DefaultHttpClient httpclient = new DefaultHttpClient();

List<NameValuePair> params = new ArrayList<NameValuePair>();
params.add(new BasicNameValuePair("alt", "json"));
params.add(new BasicNameValuePair("cx", cseKey));
params.add(new BasicNameValuePair("key", apiConsoleKey));
params.add(new BasicNameValuePair("q", keyword));
if (StringUtils.isNotBlank(language)) {
params.add(new BasicNameValuePair("lr", language));
}
params.add(new BasicNameValuePair("num", String.valueOf(num)));
params.add(new BasicNameValuePair("start", String.valueOf(start)));
URI uri = URIUtils.createURI("https", "www.googleapis.com", -1,
"/customsearch/v1", URLEncodedUtils.format(params, "UTF-8"),
null);

HttpGet httpget = new HttpGet(uri);
this.uri = httpget.getURI().toString();
Log.log(LogLevel.DEBUG, clazz, this.uri);

HttpResponse response = httpclient.execute(httpget);
HttpEntity entity = response.getEntity();

if (entity != null) {
entity = new BufferedHttpEntity(entity);
} else {
Log.log(LogLevel.DEBUG, clazz, "Entity is null.");
return null;
}

String strResponse = EntityUtils.toString(entity, HTTP.UTF_8);
JSONObject json = JsonUtils.object2Json(strResponse);

Map<String, Class<?>> classMap = new HashMap<String, Class<?>>();
classMap.put("bodyLines", BodyLinesBean.class);
classMap.put("context", ContextBean.class);
classMap.put("items", ItemsBean.class);
classMap.put("nextPage", NextPageBean.class);
classMap.put("previousPage", PreviousPageBean.class);
classMap.put("promotions", PromotionsBean.class);
classMap.put("queries", QueriesBean.class);
classMap.put("request", RequestBean.class);
classMap.put("url", UrlBean.class);
classMap.put("pagemap", PageMapBean.class);
classMap.put("metatags", MetatagsBean.class);
classMap.put("person", PersonBean.class);
classMap.put("hcard", HcardBean.class);
classMap.put("Movie", MovieBean.class);
classMap.put("moviereview", MovieReviewBean.class);
classMap.put("error", ErrorBean.class);
classMap.put("errors", ErrorsBean.class);
ResponseBean res = JsonUtils.json2Object(json, ResponseBean.class,
classMap);

if (res.getError() != null) {
result.put("error", res.getError());
} else {
result.put("totalResults", res.getQueries().getRequest().get(0)
.getTotalResults());
result.put("count", res.getQueries().getRequest().get(0).getCount());
result.put("startIndex", res.getQueries().getRequest().get(0)
.getStartIndex());
result.put("items", res.getItems());
result.put("response", res);
}

return result;
}
}


以下是Junit的测试类:


/**
* Google CSE Test
*
* @author Michael Leo
* @version 2011/01/25
*/
public class GoogleCSETest {
@Test
public void case01() throws Exception {
P.p("Google CSE - Start.");
P.p();
long ast = System.currentTimeMillis();
long aed = 0;

long st = 0;
long ed = 0;

GoogleCSE cse = new GoogleCSE();
cse.setApiConsoleKey("Your console api key");

cse.setCseKey("Your cse key");
cse.setKeyword("Google");
cse.setLanguage("lang_zh-CN");
cse.setNum(10);

NextPageBean np = null;
@SuppressWarnings("unused")
int index = 1;
ErrorBean err = null;
do {
st = System.currentTimeMillis();
Map<String, Object> result = cse.execute();
if ((err = (ErrorBean) result.get("error")) != null) {
P.p("Error code: " + err.getCode());
P.p("Message: " + err.getMessage());
return;
}
np = cse.nextPageInfo(result);
if (np != null) {
cse.setStart(np.getStartIndex());
}

int startIndex = ((ResponseBean) result.get("response"))
.getQueries().getRequest().get(0).getStartIndex();
P.p("Start index: " + startIndex);
P.p("Query url:\n" + cse.getUri());
P.p("totalResults: "
+ MiscellaneousUtils.formatNumber(result
.get("totalResults")));

// @SuppressWarnings("unchecked")
// List<ItemsBean> list = (List<ItemsBean>) result.get("items");
//
// for (int i = 0; i < list.size(); i++) {
// P.p(index++ + ": " + list.get(i).getTitle());
// P.p(list.get(i).getSnippet());
// P.p(list.get(i).getLink());
// P.p();
// }

String[] targetWords = { "Google Chrome" };
String[] targetUrls = { "google.com" };
List<RankBean> ranking = cse.getRank(targetWords, targetUrls,
result);

for (int i = 0; i < ranking.size(); i++) {
P.p();
P.p("Rank: " + ranking.get(i).getRank());
P.p(ranking.get(i).getTitle());
P.p(ranking.get(i).getSnippet());
P.p(ranking.get(i).getLink());
}

ed = System.currentTimeMillis();
P.p("Cost: " + (ed - st) / 1000.0 + "s");
P.p();
} while (np != null);
aed = System.currentTimeMillis();
P.p("Google CSE - Finished.");
P.p("Cost: " + (aed - ast) / 1000.0 + "s");
}
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值