php通过调用lucene库实现检索应用。注意为下面安装的php,tomcat,java,apache添加path路径。
被检索的数据存放在mysql数据库中,检索的数据是使用python从网络上爬取的,已存储为json格式,直接可以java读取插入数据库。
用到的jar包。
1、gson-2.2.1.jar(用户直接将json格式转化为java对象)
2、lucene-core-3.0.2.jar(lucene库)
3、mysql-connector-java-5.1.37-bin.jar (java连接mysql驱动库)
4、LuceneExample.jar(自己编写的使用Lucene的示例库)
第一步:
安装Java,配置好环境变量。将上面的jar包拷贝到java虚拟机运行环境jre\lib\ext目录下,这样在虚拟机运行时就会自动加载这些库了。
第二步:
安装tomcat,由于php调用java需要用到php-java-bridge,而下载下来的是JavaBridge.war,所以需要用tomcat来将下载的JavaBridge.war文件解析一下,具体方法为:将JavaBridge.war放在tomcat的webapps\目录,启动tomcat,此时tomcat会自动解析在webapps\下的JavaBridge.war,生产JavaBridge文件夹,将这个文件夹拷贝到第二步的apache运行目录。
第三步:
安装php,安装apache,将第一步中得到的JavaBridge文件夹拷贝到htdocs\目录。(JavaBridge文件夹里包含一些类似头文件的东西)
第四步:
环境已经基本建立好了,下面开始具体实施。
1、登录到mysql终端,利用SQL指令建立tiku数据库,建立math数据库表。
create database tiku;
use tiku;
create table math(index_num int(11) primary key not null auto_increment,question text not null,answer text);
建完表后可以 desc math; 查看一下。
2、终端中进入加载数据代码的目录D:\soft\yangyang\luc,数据文件为out.data,为json格式。
编译LoadData.java并执行,
javac LoadData.java
java LoadData
此时在终端中查看math数据库,即可看到数据已存入数据库表math中。
select * from math limit 2;
3、编译生产自己编写的使用Lucene的示例库
同样在上述目录中执行下面命令
javac LuceneExample.java 编译生成class文件
jar -cvf LuceneExample.jar LuceneExample.class 打包class文件
接着将LuceneExample.jar文件拷贝至上面说的jre\lib\ext目录下。
4、编写服务文件
进入apache运行目录htdocs\编写test.php文件来调用上面的LuceneExample.jar库实现检索。具体代码见附录
第五步:
开启服务环境,查看运行结果:
1、首先保证开启mysql服务,可以在cmd中运行命令 net start mysql
2、进入第三步的JavaBridge\WEB-INF\lib目录下,双击运行JavaBridge.jar程序(如果不运行,手动进入该目录开启java -jar JavaBridge.jar,选择8080端口,确定)
3、进入apache的bin目录下双击ApacheMonitor.exe开启apache服务。
4、此时在浏览器中输入http:\localhost\test.php即可看到查询结果(若为乱码,请修改编码方式为UTF-8)。
LoadData.java源码
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.FileInputStream;
import com.google.gson.reflect.TypeToken;
import java.lang.reflect.Type;
import com.google.gson.Gson;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.List;
public class LoadData {
public static void loadJson(String fileName){
try{
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "utf-8"));
String data = br.readLine();
Gson gson = new Gson();
/*
List<TiMu> tt = new ArrayList<TiMu>();
tt.add(new TiMu("fff", "fsdfsd"));
tt.add(new TiMu("444", "3333232"));
System.out.println(gson.toJson(tt));
*/
Type type = new TypeToken<List<TiMu>>(){}.getType();
List<TiMu> ps = gson.fromJson(data, type);
System.out.println(ps.toString());
//JDBC Section
Class.forName("com.mysql.jdbc.Driver").newInstance();
Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/tiku", "root", "buptmm");
String sql="insert into math(question,answer) values(?,?)";
PreparedStatement preStmt =conn.prepareStatement(sql);
for(TiMu ti:ps){
preStmt.setString(1,ti.getQ());
preStmt.setString(2,ti.getA());
preStmt.executeUpdate();
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
LoadData.loadJson("out.data");
}
}
public class TiMu{
private String Q;
private String A;
public TiMu(){}
public TiMu(String q, String a){
this.Q = q;
this.A = a;
}
public String getQ(){
return Q;
}
public void setQ(String Q){
this.Q = Q;
}
public String getA(){
return A;
}
public void setA(String A){
this.A = A;
}
}
LuceneExample.java源码
import java.io.File;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
public class LuceneExample {
public static final File INDEX_DIRECTORY = new File("./");
public void createIndex() {
System.out.println("-- Indexing --");
try {
//JDBC Section
Class.forName("com.mysql.jdbc.Driver").newInstance();
//Assuming database bookstore exists
Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/tiku", "root", "buptmm");
Statement stmt = conn.createStatement();
String sql = "select question from math";
ResultSet rs = stmt.executeQuery(sql);
//Lucene Section
Directory directory = new SimpleFSDirectory(INDEX_DIRECTORY);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
IndexWriter iWriter = new IndexWriter(directory, analyzer, true,MaxFieldLength.UNLIMITED);
//Looping through resultset and adding to index file
int count = 0;
while(rs.next()) {
Document doc = new Document();
//System.out.println("question=" + rs.getString("question"));
doc.add(new Field("question", rs.getString("question"), Field.Store.YES, Field.Index.ANALYZED ));
//doc.add(new Field("book_title", rs.getString("book_title"), Field.Store.YES, Field.Index.ANALYZED ));
//doc.add(new Field("book_details", rs.getString("book_details"), Field.Store.YES, Field.Index.ANALYZED ));
//Adding doc to iWriter
iWriter.addDocument(doc);
count++;
}
System.out.println(count+" record indexed");
//Closing iWriter
iWriter.optimize();
iWriter.commit();
iWriter.close();
//Closing JDBC connection
rs.close();
stmt.close();
conn.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public String search(String keyword) {
System.out.println("-- Seaching --");
String result = "";
try {
//Searching
IndexReader reader = IndexReader.open(FSDirectory.open(INDEX_DIRECTORY), true);
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
//MultiFieldQueryParser is used to search multiple fields
//String[] filesToSearch = {"question"};
QueryParser mqp = new QueryParser(Version.LUCENE_30, "question" , analyzer);
Query query = mqp.parse(keyword);//search the given keyword
//Query query = new FuzzyQuery(new Term("question", keyword), 0.01f);
System.out.println("query >> " + keyword);
TopDocs hits = searcher.search(query, 5); // run the query
System.out.println("Results found >> " + hits.totalHits);
for (int i = 0; i < hits.totalHits; i++) {
Document doc = searcher.doc(hits.scoreDocs[i].doc);//get the next document
System.out.println(doc.get("question"));
result = doc.get("question");
break;
}
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
public String getResult(String que){
//String que = search(question);
if(que.equals(""))
return "";
try{
//JDBC Section
Class.forName("com.mysql.jdbc.Driver").newInstance();
Connection conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/tiku", "root", "buptmm");
Statement stmt = conn.createStatement();
String sql = "select answer from math where question='" + que + "'";
System.out.println("sql = " + sql);
ResultSet rs = stmt.executeQuery(sql);
while(rs.next()) {
return rs.getString("answer");
}
} catch (Exception e) {
e.printStackTrace();
}
return "";
}
public static void main(String[] args) {
LuceneExample obj = new LuceneExample();
//creating index
obj.createIndex();
//searching keyword
//System.out.println("a1: " + obj.search(""));
//using wild card serach
String question = obj.search("5");
System.out.println("a1: " + question);
System.out.println("a1: " + obj.getResult(question));
//using logical operator
//obj.search("data1 OR data2");
//obj.search("data1 AND data2");
}
}
test.php源码
<?php
require_once("JavaBridge/java/Java.inc");
#print "fffff<br/>";
#$param = $_POST['question'];
$tf = new Java('LuceneExample');
$tf->createIndex();
$q = java_values($tf->search("王阿姨"));
#$s = $tf->test();
print "题目:".$q;
print "<br/>";
$a = java_values($tf->getResult($q));
print "答案:".$a;
?>
json数据
[{"Q": "王阿姨买了3千克龙眼和8千克西瓜一共花了46元。已知1千克西瓜的价钱正好是1千克龙眼的1/5。龙眼和西瓜的单价分别是多少元?(5分)", "A": "设1千克西瓜的价钱是x,那么龙眼的价钱是5x,3*5x+8x=46,x=2,所以龙眼的单价是10元,西瓜的单价是2元"},{"Q": "有13个乒乓球,有12个质量相同,另有一个较轻一点,如果用天平称,至少称 次保证能找出这个乒乓球.", "A": "http://www.tiku.cn/q/1010405.html"}, {"Q": "有9瓶钙片,次品的一瓶少了4片.用天平至少称 次可以保证找出次品.", "A": "http://www.tiku.cn/q/1010406.html"}]