Lucene使用笔记

最新推荐文章于 2014-07-08 14:59:39 发布

ixr_wang

最新推荐文章于 2014-07-08 14:59:39 发布

阅读量603

点赞数

分类专栏：笔记-这是笔记！~ 文章标签： lucene string properties join exception date

本文链接：https://blog.csdn.net/ixr_wang/article/details/6583697

版权

笔记-这是笔记！~ 专栏收录该内容

42 篇文章 0 订阅

订阅专栏

package name.ixr.service;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;

import name.ixr.collector.GroupingCollector;
import name.ixr.dao.SearchDao;
import name.ixr.util.SqlUtil;
import name.ixr.vo.SearchResultSet;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

/**
 * 搜索引擎逻辑
 * 
 * @author IXR
 */
public class SearchService {

    private static final String CONF_PATH = "jdbc.properties"; // 数据库配置文件
    /** 时间格式化（lucene只认识这个格式yyyyMMddHHmmss） */
    public static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMdd");
    /** 数据库查询工具包 */
    private static SqlUtil sqlUtil = null;
    /** 索引位置 */
    private static File INDEX_DIR = null;
    
    static {
        Properties properties = new Properties();
        InputStream inputStream = Object.class.getResourceAsStream("/" + CONF_PATH);
        try {
            properties.load(inputStream);
            inputStream.close();
        } catch (IOException e) {
            System.err.println("数据库配置文件加载失败");
        }
        try {
            sqlUtil = new SqlUtil(properties);
        } catch (ClassNotFoundException e) {
            System.err.println("找不到数据库连接驱动");
        }
        String path = properties.getProperty("index");
        URI dirUri = null;
        try {
            String starts = "classpath:";
            if (path.startsWith(starts)) {
                path = path.substring(starts.length());
                dirUri = Class.class.getResource("/").toURI();
                dirUri = new URI(dirUri.toString() + path);
            } else {
                dirUri = new URI(path);
            }
        } catch (URISyntaxException e) {
            System.err.println("索引目录构建失败");
        }
        INDEX_DIR = new File(dirUri);
    }
    
    /**
     * 删除索引
     */
    private static void dropIndex() {
        for (File file : INDEX_DIR.listFiles()) {
            file.delete();
        }
    }

    /**
     * 获取索引目录
     * 
     * @return
     * @throws IOException
     */
    private static Directory getDirectory() throws IOException {
        return FSDirectory.open(INDEX_DIR);
    }

    /**
     * 获取分词器
     * 
     * @return
     */
    private static Analyzer getAnalyzer() {
        // StandardAnalyzer，IKAnalyzer
        Analyzer analyzer = new IKAnalyzer();
        return analyzer;
    }

    /**
     * 添加索引
     * 
     * @param documents
     *            要添加的索引文档
     * 
     * @throws CorruptIndexException
     * @throws LockObtainFailedException
     * @throws IOException
     */
    private static void addDocument(Document... documents) throws CorruptIndexException, LockObtainFailedException, IOException {
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_32, getAnalyzer());
        IndexWriter writer = new IndexWriter(getDirectory(), indexWriterConfig);
        for (Document document : documents) {
            writer.addDocument(document);
        }
        writer.optimize();
        writer.close();
    }

    /**
     * 更新搜索引擎索引
     * 
     * @throws SQLException
     * @throws IOException
     * @throws LockObtainFailedException
     * @throws CorruptIndexException
     * @return
     */
    public static long updateIndex() throws SQLException, CorruptIndexException, LockObtainFailedException, IOException {
        Date beginDate = new Date();
        Connection connection = sqlUtil.getConnection();
        Statement teamStatement = connection.createStatement();
        ResultSet teams = SearchDao.searchTeam(teamStatement);
        ResultSetMetaData metaData = teams.getMetaData();
        Map<String, String> fieldTypeMap = new HashMap<String, String>();
        for (int i = 0; i < metaData.getColumnCount(); i++) {
            fieldTypeMap.put(metaData.getColumnName(i + 1), metaData.getColumnTypeName(i + 1));
        }
        List<Document> documents = new ArrayList<Document>();
        while (teams.next()) {
            Document document = new Document();
            for (String field : fieldTypeMap.keySet()) {
                String typeName = fieldTypeMap.get(field);
                String value = teams.getString(field);
                if ("datetime".equals(typeName)) {
                    value = dateFormat.format(teams.getDate(field));
                } else {
                    value = teams.getString(field);
                }
                if (value != null) {
                    Field dField = null;
                    if ("uniqueidentifier".equals(typeName) || "int".equals(typeName) || "decimal".equals(typeName) || "datetime".equals(typeName)) {
                        dField = new Field(field, value, Store.YES, Index.NOT_ANALYZED_NO_NORMS);
                    } else {
                        dField = new Field(field, value, Store.YES, Index.ANALYZED);
                    }
                    document.add(dField);
                }
            }
            documents.add(document);
        }
        addDocument(documents.toArray(new Document[0]));
        teams.close();
        teamStatement.close();
        connection.close();
        Date endDate = new Date();
        return endDate.getTime() - beginDate.getTime();
    }

    /**
     * 按字段搜索
     * 
     * @param fields
     *            要搜索的字段集合
     * @param keyword
     *            要搜索的关键词
     * @param sortField
     *            排序字段
     * @param desc
     *            排序正负
     * @param group
     *            分组字段，如果为空不分组
     * @param page
     *            页码
     * @param size
     *            数量
     * @throws CorruptIndexException
     * @throws IOException
     * @throws ParseException
     */
    private static SearchResultSet search(String[] fields, String keyword, String sortField, boolean desc, String groupField, int page, int size) throws CorruptIndexException, IOException,
            ParseException {
        Date beginDate = new Date();
        SearchResultSet result = new SearchResultSet();
        result.setCurrentPage(page);
        result.setPageSize(size);
        List<Document> documents = new ArrayList<Document>();
        IndexSearcher searcher = new IndexSearcher(getDirectory());
        int start = (page - 1) * size;
        int end = page * size;
        MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_32, fields, getAnalyzer());
        Query query = parser.parse(keyword);
        int total = 0;
        if (groupField != null) {
            GroupingCollector groupingCollector = new GroupingCollector(sortField, desc, groupField);
            searcher.search(query, groupingCollector);
            List<Document> s_documents = groupingCollector.getDocuments();
            total = s_documents.size();
            for (int i = start; i < end && i < total; i++) {
                documents.add(s_documents.get(i));
            }
        } else {
            TopDocs topFieldDocs = null;
            if (sortField != null) {
                topFieldDocs = searcher.search(query, end, new Sort(new SortField(sortField, Locale.CANADA, desc)));
            } else {
                topFieldDocs = searcher.search(query, end);
            }
            total = topFieldDocs.totalHits;
            for (int i = start; i < end && i < total; i++) {
                documents.add(searcher.doc(topFieldDocs.scoreDocs[i].doc));
            }
        }
        result.setTotalRows(total);
        if (total > 0) {
            int totalPages = total / size + (total % size > 0 ? 1 : 0);
            result.setTotalPages(totalPages);
        }
        result.setList(documents);
        Date endDate = new Date();
        result.setHours(endDate.getTime() - beginDate.getTime());
        return result;
    }

    public static void main(String[] args) throws Exception {
        testUpdateIndex();
        /*
         * testSeachIndex(1); testSeachIndex(2); testSeachIndex(3);
         * testSeachIndex(4);
         */
    }

    public static void testSeachIndex(int page) throws Exception {
        SearchResultSet resultSet = search(new String[] { "clinename" }, "漓江 AND \"六日\"", "dbgndate", true, null, page, 30);
        List<Document> list = resultSet.getList();
        System.out.println(String.format("About %s results (%s ms) ", resultSet.getTotalRows(), resultSet.getHours()));
        for (Document document : list) {
            System.out.println(document.get("dbgndate") + "\t" + document.get("clinename") + "\t" + document.get("ulineid"));
        }
    }

    public static void testUpdateIndex() throws Exception {
        dropIndex();
        System.out.println("平均耗时：" + updateIndex() + "ms");
    }
}

我的SQL也很强大

package name.ixr.dao;

import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

/**
 * 搜索提供的数据
 * 
 * @author IXR
 */
public class SearchDao {
    /**
     * 查询团队
     * 
     * @param statement
     * @return
     * @throws SQLException
     */
    public static ResultSet searchTeam(Statement statement) throws SQLException {
        StringBuffer sql = new StringBuffer();
        sql.append("select team.uid uid,team.clinename clinename,team.ulineid ulineid ");// t_team
        sql.append(",team.cteamcode cteamcode,team.idays idays,team.coptype coptype");// t_team
        sql.append(",team.iplanqty-team.ikeepqty-team.iqty remainingseats,team.cnation cnation,team.cstartcity cstartcity ");// t_team
        sql.append(",team.dbgndate dbgndate,team.denddate denddate,team.cfeature cfeature ");// t_team
        sql.append(",lineprice.nprice nprice,lineprice.nprice1 nprice1 ");// t_line_pricegg
        sql.append(",dept.ucorpid ucorpid ");// t_dept
        sql.append(",picture.cpicpath cpicpath ");// t_picture
        sql.append(",theme.[values] themes ");//t_theme
        sql.append(",tag.[values] tags ");//t_tag
        sql.append(",knowledge.knowledges knowledges ");//t_knowledge
        sql.append("from t_team team ");
        sql.append("left join (select picture.ulinkid,max(picture.cpicpath) cpicpath from t_picture picture ");
        sql.append("where picture.ctype='线路' and picture.cdisplaytype='查询' group by picture.ulinkid) ");
        sql.append("picture on picture.ulinkid=team.ulineid ");
        sql.append("left join t_line_price lineprice on lineprice.ulineid=team.ulineid and lineprice.ino=1 ");
        sql.append("left join t_dept dept on dept.uid=team.udeptid ");
        // themes
        sql.append("left join (select * from (select distinct ulinkid ");
        sql.append("from t_theme_link)a outer apply(select [values]=");
        sql.append("stuff(replace(replace((select t.cname cname from ");
        sql.append("t_theme_link u left join t_theme t on u.uthemeid = ");
        sql.append("t.uid where u.ulinkid = a.ulinkid for xml auto ), ");
        sql.append("'<t cname=\"', ','), '\"/>', '), 1, 1, '))n) ");
        sql.append("theme on theme.ulinkid=team.ulineid ");
        // tags
        sql.append("left join (select * from (select distinct uinfoid ");
        sql.append("from t_tag_link)a outer apply(select [values]=");
        sql.append("stuff(replace(replace((select t.ctagname ctagname from ");
        sql.append("t_tag_link l left join t_tag t on l.utagid = ");
        sql.append("t.uid where l.uinfoid = a.uinfoid for xml auto ), ");
        sql.append("'<t ctagname=\"', ','), '\"/>', '), 1, 1, '))n) ");
        sql.append("tag on tag.uinfoid=team.uid ");
        //knowledge
        sql.append("left join (");
        sql.append("select * from (select distinct ulinkid ");
        sql.append("from t_knowledge_link)a outer apply(select [knowledges]=");
        sql.append("stuff(replace(replace(replace(replace((select k.uid uid,k.ctitle ctitle,k.cpicpath cpicpath  from ");
        sql.append("t_knowledge_link l left join t_knowledge k on l.uknowledgeid = ");
        sql.append("k.uid left join t_team_route route on route.uid=l.ulinkid ");
        sql.append("where l.ulinkid = a.ulinkid and l.ctype='3' or route.uteamid=a.ulinkid and l.ctype='4' for xml auto ), ");
        sql.append("'<k uid=', ',{uid:'), '\"/>', '}'),'ctitle=',',ctitle:'),'cpicpath=',',cpicpath:'), 1, 1, ')");
        sql.append(")n) ");
        sql.append("knowledge on knowledge.ulinkid=team.uid order by dbgndate");
        return statement.executeQuery(sql.toString());
    }
}

ixr_wang

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Lucene使用笔记

package name.ixr.service;import java.io.File;import java.io.IOException;import java.io.InputStream;import java.net.URI;impor
复制链接

扫一扫

专栏目录