nutch自带的搜索页面嵌入在jsp中,不好用。 自己重新写成servlet,用来返回搜索结果。去除了很多不需要的功能 package cn.net.nit.jpCourse.search.servlet; import java.io.IOException; import javax.servlet.RequestDispatcher; import javax.servlet.ServletConfig; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.nutch.searcher.Hit; import org.apache.nutch.searcher.HitDetails; import org.apache.nutch.searcher.Hits; import org.apache.nutch.searcher.NutchBean; import org.apache.nutch.searcher.Query; import org.apache.nutch.searcher.Summary; import org.apache.nutch.searcher.response.SearchResults; import org.apache.nutch.util.NutchConfiguration; import cn.net.nit.jpCourse.search.model.JpCourseQuery; import cn.net.nit.jpCourse.search.util.Paginator; import cn.net.nit.jpCourse.search.util.RequestUtils; /** * Servlet that allows returning search results in multiple different formats * through a ResponseWriter Nutch extension point. * * @see org.apache.nutch.searcher.response.ResponseWriter * @author yiihsia */ public class SearchServlet extends HttpServlet { private static final long serialVersionUID = 3758898038862511079L; public static final Log LOG = LogFactory.getLog(SearchServlet.class); private NutchBean bean; private Configuration conf; // private ResponseWriters writers; private int defaultNumRows = 10; private String defaultDedupField = "site"; private int defaultNumDupes = 4; public static final String RESPONSE_TYPE = "rt"; public static final String QUERY = "query"; public static final String LANG = "lang"; public static final String START = "start"; public static final String ROWS = "rows"; public static final String SORT = "sort"; public static final String DEDUPE = "ddf"; public static final String NUM_DUPES = "dupes"; public static final String SUMMARY = "summary"; public static final String FIELDS = "field"; /** * Initializes servlet configuration default values. Gets NutchBean and * ResponseWriters. */ public void init(ServletConfig config) throws ServletException { // set sensible defaults for response writer values and cache NutchBean. // Also get and cache all ResponseWriter implementations. super.init(config); try { this.conf = NutchConfiguration.get(config.getServletContext()); this.defaultNumRows = conf.getInt( "search.response.default.numrows", 10); this.defaultDedupField = conf.get( "search.response.default.dedupfield", "site"); this.defaultNumDupes = conf.getInt( "search.response.default.numdupes", 1); bean = NutchBean.get(config.getServletContext(), this.conf); // writers = new ResponseWriters(conf); NutchBean.LOG.info(SearchServlet.class.getName() + "加载"); } catch (IOException e) { throw new ServletException(e); } } /** * Forwards all responses to doGet. */ protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { doGet(request, response); } /** * Handles all search requests. Gets parameter input. Does the search and * gets Hits, details, and summaries. Passes off to ResponseWriter classes * to writer different output formats directly to HttpServletResponse. */ protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { double s = now(); if (NutchBean.LOG.isInfoEnabled()) { NutchBean.LOG.info("Query request from " + request.getRemoteAddr()); } JpCourseQuery myQuery = null; try { myQuery = new JpCourseQuery(request); } catch (IOException e) { response.sendRedirect("/jpsearch"); return; } // get the query // get various other search parameters, fields allows only returning a // given set of fields boolean withSummary = RequestUtils.getBooleanParameter(request, SUMMARY, true); int start = RequestUtils.getIntegerParameter(request, START, 0); int rows = RequestUtils.getIntegerParameter(request, ROWS, defaultNumRows); String dedup = RequestUtils.getStringParameter(request, DEDUPE, defaultDedupField); int numDupes = RequestUtils.getIntegerParameter(request, NUM_DUPES, defaultNumDupes); // parse out the query Query queryObj = null; try { queryObj = Query.parse(myQuery.getQuery(), "zh", this.conf); } catch (Exception e1) { // TODO Auto-generated catch block e1.printStackTrace(); } if (NutchBean.LOG.isInfoEnabled()) { NutchBean.LOG.info("query: " + myQuery.getQuery()); } SearchServlet.LOG.info("numdupes=" + numDupes); // search and return hits Hits hits; try { hits = bean.search(queryObj, start + rows, 0, dedup); } catch (IOException e) { if (NutchBean.LOG.isWarnEnabled()) { NutchBean.LOG.warn("Search Error", e); } hits = new Hits(0, new Hit[0]); } // get the total number of hits, the hits to show, and the hit details long totalHits = hits.getTotal(); NutchBean.LOG.info("totalHits = " + totalHits); int end = (int) Math.min(hits.getLength(), start + rows); NutchBean.LOG.info("getLength = " + hits.getLength()); int numHits = (end > start) ? (end - start) : 0; Hit[] show = hits.getHits(start, numHits); HitDetails[] details = bean.getDetails(show); // setup the SearchResults object, used in response writing SearchResults results = new SearchResults(); // results.setResponseType(respType); results.setQuery(myQuery.getShowQuery()); // results.setSort(sort); // results.setReverse(reverse); results.setStart(start); results.setRows(rows); results.setEnd(end); results.setTotalHits(totalHits); results.setHits(show); results.setDetails(details); // are we returning summaries with results, if not avoid network hit if (withSummary) { Summary[] summaries = bean.getSummary(details, queryObj); results.setSummaries(summaries); results.setWithSummary(true); } else { results.setWithSummary(false); } //构造分页类 Paginator自己编写的 Paginator paginator = new Paginator((int) totalHits, start, rows, myQuery.getUrlQuery() + "&rows=" + rows + "&",myQuery.getShowQuery()); request.setAttribute("results", results); request.setAttribute("stats", myQuery.getStats(totalHits, String .valueOf((now() - s) / 1000), start + 1, end)); request.setAttribute("paginator", paginator.getPaginatorString()); request.setAttribute("radio", myQuery.getRadioHtml()); RequestDispatcher view = request.getRequestDispatcher("search.jsp"); view.forward(request, response); } private double now() { return System.currentTimeMillis(); } }