lucence的应用（java）

最新推荐文章于 2024-03-30 15:38:48 发布

浅曦忆

最新推荐文章于 2024-03-30 15:38:48 发布

阅读量1.2k

点赞数

本文链接：https://blog.csdn.net/sinat_29673403/article/details/79004488

版权

java 同时被 3 个专栏收录

14 篇文章 0 订阅

订阅专栏

php

13 篇文章 0 订阅

订阅专栏

sql相关

3 篇文章 0 订阅

订阅专栏

最近做了一个需要猜测用户需求推送文章的猜你喜欢功能，为了便于查找关键词在文章中出现的次数，计算词频，通过词频对推送的文章进行排序获取文章list进行推送，通过自己的实践发现每次查询数据库去计算然后sort获取最终的文章list需要耗费大量的时间，用户体验非常不好；所以最后想到了用lucence这个文章全文检索引擎来简化流程，缩短用户等待时间。通过linux的定时任务增量索引文章表，大大减少了使用关键词查找文章的时间。下面就来说明具体的实现过程。
首先，我们先从lucence的官网上下载源码，一定要是源码不要封装好的jar包，因为每个人的需求不一样，有的时候需要修改源码的部分方法来获取自己想要的参数或结果，在这里，我们需要对lucence帮我们计算出的freq（词频）进行处理，所以需要将freq的接口改造，使它暴露出来让我们可以进行使用并修改。

/**
*修改lucence源码中ScoreDoc.java中的方法，将freq暴露出来
**/
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;


/** Holds one hit in {@link TopDocs}. */

public class ScoreDoc {

  /** The score of this document for the query. */
  public float score;

  /** A hit document's number.
   * @see IndexSearcher#doc(int) */
  public int doc;

  /** Only set by {@link TopDocs#merge}*/
  public int shardIndex;

  /** phrase freqency*/
  public int freq;

  /** Constructs a ScoreDoc. */
  public ScoreDoc(int doc, float score) {
    this(doc, score, -1,0);
  }

  /** Constructs a ScoreDoc. */
  public ScoreDoc(int doc, float score, int shardIndex) {
    this.doc = doc;
    this.score = score;
    this.shardIndex = shardIndex;
    this.freq = 0;
  }

  /** Constructs a ScoreDoc. */
  public ScoreDoc(int doc, float score, int shardIndex, int freq) {
    this.doc = doc;
    this.score = score;
    this.shardIndex = shardIndex;
    this.freq = freq;
  }

  // A convenience method for debugging.
  @Override
  public String toString() {
    return "doc=" + doc + " score=" + score + " shardIndex=" + shardIndex + " freq="+freq;
  }
}

现在我们想要的freq就暴露出来了，接下来我们写一个articleElement类，把我们需要的每篇文章中的元素变量写进来，以便我们后续使用。


public class ArticleElement {
    private String id = null;
    private int titleFreq = 0;
    private int contentFreq = 0;
    private int titleLen = 0;
    private int contentLen = 0;
    private int type = 0;
    private int time = 0;
    private float article_weight = 0;
    private String class_id = null;

    public String getId() {
        return id;
    }
    public void setId(String id) {
        this.id = id;
    }
    public int getTitleFreq() {
        return titleFreq;
    }
    public void setTitleFreq(int titleFreq) {
        this.titleFreq = titleFreq;
    }
    public int getContentFreq() {
        return contentFreq;
    }
    public void setContentFreq(int contentFreq) {
        this.contentFreq = contentFreq;
    }
    public int getTitleLen() {
        return titleLen;
    }
    public void setTitleLen(int titleLen) {
        this.titleLen = titleLen;
    }
    public int getContentLen() {
        return contentLen;
    }
    public void setContentLen(int contentLen) {
        this.contentLen = contentLen;
    }
    public int getType() {
        return type;
    }
    public void setType(int type) {
        this.type = type;
    }
    public float getArticle_weight() {
        return article_weight;
    }
    public void setArticle_weight(float article_weight) {
        this.article_weight = article_weight;
    }
    public String getClass_id() {
        return class_id;
    }
    public void setClass_id(String class_id) {
        this.class_id = class_id;
    }
    public int getTime() {
        return time;
    }
    public void setTime(int time) {
        this.time = time;
    }
}

现在我们先给每篇文章建立索引，以便后续用订阅的关键词进行推送文章。

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


import java.io.IOException;
import java.nio.file.Paths;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

/** Index all article.
 * <p>
 * This is a command-line application demonstrating simple Lucene indexing.
 * Run it with no command-line arguments for usage information.
 */
public class IndexArticle {
    private IndexWriter writer = null;

  public IndexArticle(boolean create,String path) {
      this.initWriter(create,path);
  }

  /** Index all text files under a directory. */

  private void initWriter(boolean create,String path){
      String indexPath = path;
      Directory dir = null;
      try {
        dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }
        iwc.setMaxBufferedDocs(1000);
        this.writer = new IndexWriter(dir, iwc);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
  }

  public boolean createIndex(String article_id, String article_title, String article_detail, String article_class_id, int type, int article_time) {
        //Date start = new Date();
        try {
            Document doc = new Document();
            Field pathField = new StringField("path", article_id, Field.Store.YES);
            doc.add(pathField);
            doc.add(new StringField("type", Integer.toString(type), Field.Store.YES));
            doc.add(new StringField("time", Integer.toString(article_time),  Field.Store.YES));
            doc.add(new StringField("titleLen", Integer.toString(article_title.length()), Field.Store.YES));
            doc.add(new StringField("contentsLen", Integer.toString(article_detail.length()), Field.Store.YES));
            if(article_class_id==null || article_class_id == "")
                article_class_id = "-1";
            doc.add(new TextField("class_id", article_class_id, Field.Store.YES));
            doc.add(new TextField("title", article_title, Field.Store.NO));
            doc.add(new TextField("contents", article_detail, Field.Store.NO));
            this.writer.addDocument(doc);
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        // writer.close();
        //Date end = new Date();
        //System.out.println(end.getTime() - start.getTime() + " total milliseconds");
        return true;
  }

  public void closeWriter(){
      try {
        this.writer.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
  }
}

接下来就是对于建好索引的文章用关键词搜索出想要的结果了。

import java.io.IOException;
import java.nio.file.Paths;
import java.sql.Date;
import java.text.SimpleDateFormat;
import java.util.HashMap;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;

public class SearchKeyword {

    private IndexSearcher searcher = null;
    private Analyzer analyzer = null;

    public SearchKeyword(String path) {
        String index = path;
        IndexReader reader = null;
        try {
            reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        this.searcher = new IndexSearcher(reader);
        this.analyzer = new StandardAnalyzer();
    }

    public TopDocs findKeyword(String field, String keyword) {
        try {
            QueryParser parser = new QueryParser(field, analyzer);
            Query query = parser.parse(keyword);
            return searcher.search(query, 100000000);
        } catch (IOException | ParseException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return null;
    }

    public HashMap<Integer,ArticleElement> getArticleInfo(String keyword){
        return this.getArticleInfo(keyword, -1, -1);
    }
    public HashMap<Integer, ArticleElement> getArticleInfoAll(int timestamp_start, int timestamp_end){
        String timeQuery = "";
        if(timestamp_start!=-1 && timestamp_end != -1){
            SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
            timeQuery = String.format("time:[%s TO %s]", sdf.format(new Date(timestamp_start * 1000L)),sdf.format(new Date((timestamp_end - 60*60*12) * 1000L)));
        }else
            return null;
        TopDocs tdocs = findKeyword("title",timeQuery);
        HashMap<Integer, ArticleElement> articleMap = new HashMap<Integer,ArticleElement>();
        if(tdocs!=null){
            for(ScoreDoc hit: tdocs.scoreDocs){
                ArticleElement element = new ArticleElement();
                Document doc = null;
                try {
                    doc = searcher.doc(hit.doc);
                } catch (IOException e) {
                    continue;
                }
                element.setTitleFreq(0);
                element.setId(doc.get("path"));
                element.setClass_id(doc.get("class_id"));
                element.setTitleLen(0);
                element.setContentLen(0);
                element.setArticle_weight(1);
                element.setType(Integer.valueOf(doc.get("type")));
                articleMap.put(hit.doc, element);
            }
        }
        return articleMap;
    }
    public HashMap<Integer, ArticleElement> getArticleInfo(String keyword,int timestamp_start, int timestamp_end){
        String timeQuery = "";
        if(timestamp_start!=-1 && timestamp_end != -1){
            SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
            timeQuery = String.format(" AND time:[%s TO %s]", sdf.format(new Date(timestamp_start * 1000L)),sdf.format(new Date((timestamp_end - 60*60*12) * 1000L)));
        }
        TopDocs tdocs = findKeyword("title","title:\""+keyword+"\"" + timeQuery);
        TopDocs cdocs = findKeyword("contents","contents:\""+keyword+"\"" + timeQuery);
        HashMap<Integer, ArticleElement> articleMap = new HashMap<Integer,ArticleElement>();
        if(tdocs!=null){
            for(ScoreDoc hit: tdocs.scoreDocs){
                ArticleElement element = new ArticleElement();
                element.setTitleFreq(hit.freq);
                articleMap.put(hit.doc, element);
            }
        }
        if(cdocs!=null){
            for(ScoreDoc hit: cdocs.scoreDocs){
                ArticleElement element = articleMap.get(hit.doc);
                if(element==null)
                    element = new ArticleElement();
                element.setContentFreq(hit.freq);
                articleMap.put(hit.doc, element);
            }
        }

        for (HashMap.Entry<Integer, ArticleElement> entry : articleMap.entrySet()) {  
            try {
                ArticleElement element = entry.getValue();
                Document doc = searcher.doc(entry.getKey());
                element.setId(doc.get("path"));
                element.setClass_id(doc.get("class_id"));
                element.setTitleLen(Integer.valueOf(doc.get("titleLen")));
                element.setContentLen(Integer.valueOf(doc.get("contentsLen")));
                element.setType(Integer.valueOf(doc.get("type")));
                float weight = 100*((float)keyword.length()*element.getTitleFreq()/element.getTitleLen() + (float)keyword.length()*element.getContentFreq()/element.getContentLen());
                element.setArticle_weight(weight);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }  

        return articleMap;
    }
}

最后，在文章处理类中对这几个方法进行调用处理，读取数据库中的文章和关键字进行处理，在main 方法中设置程序入口，从控制台读取参数来判断是建立文章索引，还是根据关键字生成推荐文章list 文件。（因为测试数据库和线上数据库的连接字段不一样，所以建立了一个config.properties文件用来读取配置文件，确定连接的数据库）

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.Date;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

import com.huaban.analysis.jieba.JiebaSegmenter;
import com.huaban.analysis.jieba.SegToken;
import com.huaban.analysis.jieba.JiebaSegmenter.SegMode;

public class ArticleDealer {

    private String driver = "";
    private String url = "";
    private String password = "";
    private String user = "";
    private String configFilePath = "";



    private int user_id;
    private int timestamp; //结束时间戳
    private int timestamp_start;//开始时间戳
    private Statement statement;
    private Statement statement_class;
    private String filename;//保存的文件名
    private String indexPath;
    private JiebaSegmenter segmenter = new JiebaSegmenter();

    public ArticleDealer() {
        this.readConfigInfo();
    }
    public ArticleDealer(String configFilePath) {
        this.configFilePath = configFilePath;
        this.readConfigInfo();
    }
    public ArticleDealer(int user_id, int timestamp_start,int timestamp,String filename,String indexPath){
        this.readConfigInfo();
        this.user_id = user_id;
        this.timestamp = timestamp;
        this.timestamp_start = timestamp_start;
        this.filename = filename;
        this.indexPath = indexPath;
    }
    public ArticleDealer(String configFilePath, int user_id, int timestamp_start,int timestamp,String filename,String indexPath){
        this.configFilePath = configFilePath;
        this.user_id = user_id;
        this.timestamp = timestamp;
        this.timestamp_start = timestamp_start;
        this.filename = filename;
        this.indexPath = indexPath;
        this.readConfigInfo();
    }

    private void readConfigInfo(){
        Properties properties = new Properties();
        InputStream inputStream = null;
        if(this.configFilePath.equals("")){
            inputStream = Thread.currentThread().getContextClassLoader().getResourceAsStream("config.properties");
        }else{
            try {
                inputStream = new FileInputStream(this.configFilePath);
            } catch (FileNotFoundException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        try {
            properties.load(inputStream);
            this.driver = properties.getProperty("driver");
            this.url = properties.getProperty("url");
            this.password = properties.getProperty("password");
            this.user = properties.getProperty("user");
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    public Statement getConnection(){
        return this.getConnection(false);
    }
    public void closeConnection() {
        if(this.statement!=null) {
            try {
                this.statement.close();
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        if(this.statement_class!=null) {
            try {
                this.statement_class.close();
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }
    public Statement getConnection(boolean isClass) {
        if (!isClass && this.statement != null)
            return this.statement;
        if(isClass && this.statement_class !=null)
            return this.statement_class;
        Statement statement = null;
        try {
            Class.forName(driver);//new com.mysql.jdbc.Driver();
            Connection conn = DriverManager.getConnection(this.url, this.user, this.password);
            statement = conn.createStatement();
            if (!conn.isClosed()) {
                System.out.println("Succeeded connecting to the Database!");
            }
        } catch (SQLException | ClassNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        if(!isClass)
            this.statement = statement;
        else
            this.statement_class = statement;
        return statement;
    }

    public ResultSet executeSql(Statement statement, String sql) {
        ResultSet rs = null;
        try {
            rs = statement.executeQuery(sql);
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        return rs;
    }

    public boolean dealArticle(String path) {
        IndexArticle indexer = new IndexArticle(true,path);
        Statement statement = this.getConnection();
        Map<String, String> map = new HashMap<String, String>();
        map.put("article_details", "details");
        map.put("industry_bidding", "content");
        Iterator<String> iterator = map.keySet().iterator();
        int loop = 0;
        while (iterator.hasNext()) {
            Object key = iterator.next();
            int type = 1;// 信息
            if (key.toString().equals("article_details"))
                type = 0;// 知识
            String sql = "select count(id) as article_num from " + key.toString() + " where state = 1";
            ResultSet rs = this.executeSql(statement, sql);
            try {
                if (rs.next()) {
                    int totalnum = rs.getInt("article_num");
                    int pagesize = 50;
                    int count = 1;
                    while (true) {
                        int start = (count - 1) * pagesize;
                        if (start > totalnum)
                            break;
                        String sql1 = "select article.* from "
                                + key.toString()
                                + " as article where state = 1 order by article.id desc limit " + start + ", "
                                + pagesize;
                        rs = this.executeSql(this.getConnection(true), sql1);
                        while (rs.next()) {
                            String article_id = rs.getString("id");
                            String sql2 = "select group_concat(article_class_id) as article_class_id from article_class_map where type="+(type+1)+" and article_id="+article_id+" group by article_id";
                            ResultSet rs2 = this.executeSql(statement, sql2);
                            String article_class_id = "";
                            if(rs2.next()){
                                article_class_id = rs2.getString("article_class_id");
                            }
                            String article_title = rs.getString("title");
                            String article_detail = rs.getString(map.get(key).toString());
                            article_detail = article_detail.replaceAll("</?\\w+[^>]*>", "").replaceAll("\\s+", "");
                            int article_time = rs.getInt("time");
                            SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
                            int date = Integer.parseInt(sdf.format(new Date(article_time * 1000L)));

                            indexer.createIndex(article_id, article_title, article_detail, article_class_id, type,
                                    date);
                            if (++loop % 100 == 0)
                                System.out.println("处理到第" + loop + "篇文档...");
                        }
                        count++;
                    }
                }
            } catch (SQLException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
        indexer.closeWriter();

        return true;
    }

    /**
     * 获取当前用户的所属分类
     * 
     * @return
     */
    public ResultSet getUserClass(int type) {
        Statement statement = this.getConnection();
        SimpleDateFormat sdf = new SimpleDateFormat("%Y-%m-%d");
        int date = Integer.parseInt(sdf.format(new Date(timestamp * 1000L)));
        String sql = "select * from weight_detail where user_id=" + this.user_id
                + "and FROM_UNIXTIME(time,'%Y-%m-%d') = " + date + " and type = " + type
                + " order by weight_score desc";
        ResultSet rs = null;
        try {
            rs = statement.executeQuery(sql);
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return rs;
    }

    public List<String> getKeyWord() {
        List<String> result = new ArrayList<String>();
        Statement statement = this.getConnection();

        String sql = "select * from user_push_keyword where user_id=" + this.user_id + " and time <= " + timestamp
                + " and valid = 1 order by time desc limit 0, 5";
        ResultSet rs = null;
        int time = -1;
        try {
            rs = statement.executeQuery(sql);
            if(!rs.next()) {
                sql = "select * from user_push_keyword where user_id=" + this.user_id
                        + " and valid = 1 and time>= " + timestamp +" and time<="+(timestamp+24*3600-1)+" order by time limit 0, 5";
                rs = statement.executeQuery(sql);
                if(!rs.next())
                    return result;
            }
            do{
                if (time == -1) {
                    time = rs.getInt("time");
                }
                if (time != rs.getInt("time"))
                    break;
                result.add(rs.getString("key_word"));
            }while (rs.next()) ;
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return result;
    }

    public void getUserArticle() {
        List<String> keyWords = this.getKeyWord();
        HashMap<Integer, ArticleElement> article_map = null;
        for (String keyword : keyWords) {
            HashMap<Integer, ArticleElement> map = this.dealKeyword(keyword);
            if (article_map == null) {
                article_map = map;
                continue;
            }
            for (HashMap.Entry<Integer, ArticleElement> entry : map.entrySet()) {
                ArticleElement element = entry.getValue();
                ArticleElement element2 = article_map.get(entry.getValue());
                if (element2 == null) {
                    article_map.put(entry.getKey(), element);
                } else if (element2.getArticle_weight() < element.getArticle_weight()) {
                    element2.setArticle_weight(element.getArticle_weight());
                }
            }
        }
        List<ArticleElement> article_list = new ArrayList<ArticleElement>();
        if(article_map!=null && article_map.size()!=0) {
            for (HashMap.Entry<Integer, ArticleElement> entry : article_map.entrySet()) {
                article_list.add(entry.getValue());
            }
            article_list.sort(new Comparator<ArticleElement>() {

                @Override
                public int compare(ArticleElement arg0, ArticleElement arg1) {
                    // TODO Auto-generated method stub
                    if (arg0.getArticle_weight() == arg1.getArticle_weight()) {
                        if(arg0.getTime() == arg1.getTime())
                            return 0;
                        return arg0.getTime() < arg1.getTime() ? 1 : -1;
                    }
                    return arg0.getArticle_weight() < arg1.getArticle_weight() ? 1 : -1;
                }
            });
        }else {
            SearchKeyword search = new SearchKeyword(this.indexPath);
            article_map = search.getArticleInfoAll(this.timestamp_start,this.timestamp);
            for (HashMap.Entry<Integer, ArticleElement> entry : article_map.entrySet()) {
                article_list.add(entry.getValue());
            }
        }
        this.getArticleList(article_list);
    }

    private void getArticleList(List<ArticleElement> article_list) {
        try {
            FileWriter writer = new FileWriter(filename);
            for (ArticleElement entry : article_list) {
                String article_id = entry.getId();
                int article_type = entry.getType();
                String article_class_id = entry.getClass_id();
                writer.write(article_id + " " + article_type + " " + article_class_id + "\n");
            }
            writer.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    /**
     * 1. 需要根据用户id和日期获取当前用户所属分类 2. 需要根据用户id和日期获取当前用户的订阅关键词组 3. 根据关键词组计算文章权重 4.
     * 按照分类进行分组，根据权重对文章排序
     * 
     * @return
     */
    public HashMap<Integer, ArticleElement> dealKeyword(String keyword) {
        SearchKeyword search = new SearchKeyword(this.indexPath);
        if (keyword.length() > 10)
            return null;
        String[] keyword_list = keyword.split("[,;\\s'\\*\\+|\\^]+");
        HashMap<Integer, ArticleElement> totalMap = null;
        keyword_list = keyword.split("[,;\\s'\\*\\+|\\^]+");
        Set<String> keywordList = new LinkedHashSet<String>(Arrays.asList(keyword_list));
        for(String l : keywordList){// 对数据表中单条关键词组分割成单个关键词进行关键词密度计算
            HashMap<Integer, ArticleElement> article_map = search.getArticleInfo(l,this.timestamp_start,this.timestamp);
            if (totalMap == null) {
                totalMap = article_map;
                continue;
            }
            for (HashMap.Entry<Integer, ArticleElement> entry : article_map.entrySet()) {
                int key = entry.getKey();
                if (totalMap.get(key) != null) {
                    ArticleElement element = totalMap.get(key);
                    ArticleElement e = entry.getValue();
                    element.setArticle_weight(element.getArticle_weight() + e.getArticle_weight());// 对一组关键词中的每个关键词密度进行相加求出该篇文章对于这一组关键词的总关键词密度
                } else {
                    totalMap.put(entry.getKey(), entry.getValue());
                }
            }
        }
        return totalMap;
    }

    /**
     * index path, make install
     * search UserId start_timestamp end_timestamp savePath
     * @param args
     */
    public static void main(String[] args) {
        if(args[0].equalsIgnoreCase("index")) {
            ArticleDealer ad = null;
            if(args.length > 2){
                ad = new ArticleDealer(args[2]);
            }else{
                ad = new ArticleDealer();
            }

            ad.dealArticle(args[1]);
            ad.closeConnection();
        }
        else {
            ArticleDealer ad = null;
            if(args.length > 6){
                ad = new ArticleDealer(args[6],Integer.valueOf(args[1]),Integer.valueOf(args[2]),Integer.valueOf(args[3]),args[4],args[5]);
            }else{
                ad = new ArticleDealer(Integer.valueOf(args[1]),Integer.valueOf(args[2]),Integer.valueOf(args[3]),args[4],args[5]);
            }
            ad.getUserArticle();
            ad.closeConnection();
        }
    }
}

到这里对于文章的处理，及对每个用户猜你喜欢文章list 的获取就完成了。
在php调用直接可以使用如下的语句即可生成关于某个用户的推荐文章list

$command="java -jar /data/article.jar search ".$this->user_id." ".$time_start." ".$time." ".$file_path." /data/index /data/config.properties";
exec($command);