1. eclipse 新建maven项目solr,pom.xml 加入依赖
2 在项目下新建类updoctest
package com.linbin.solr;
import java.io.File;
import java.io.IOException;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest.ACTION;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
public class updoctest {
public static String solrUrl = "http://centos7:8983/solr/mycore";
public static void main(String[] args) throws Exception {
//查询 findIndex1();
//删除 deleteIndexById();
// 导入doc文档
String fileName = "/home/linbin/文档/能工巧匠进校园.doc";
String solrId = "能工巧匠进校园.doc";
indexFilesSolrCell(solrId, solrId,fileName);
}
// 查询测试
public static void findIndex1() throws IOException, SolrServerException {
HttpSolrClient solrClient = new HttpSolrClient.Builder(solrUrl).build();
SolrQuery query = new SolrQuery(); // 创建搜索对象
query.set("q","*:*"); // 设置搜索条件
query.setRows(10); //设置每页显示多少条
QueryResponse response = solrClient.query(query); //发起搜索请求
SolrDocumentList docs = response.getResults(); // 查询结果
long cnt = docs.getNumFound(); // 查询结果总数
System.out.println("总条数为"+cnt+"条");
for (SolrDocument doc : docs) {
// System.out.println(doc);
System.out.println("-------------\r\n");
System.out.println("id:"+ doc.get("id") + ",autor:"+ doc.get("author") + ",text:"+ doc.get("text"));
}
solrClient.close();
}
//删除测试
public static void deleteIndexById() throws IOException, SolrServerException {
HttpSolrClient solrClient = new HttpSolrClient.Builder(solrUrl).build();
//全删 //solrClient.deleteByQuery("*:*");
//模糊匹配删除(带有分词效果的删除)
solrClient.deleteByQuery("id:solr-word.pdf");
//指定id删除 //solrClient.deleteById("1");
solrClient.commit();
}
// 导入doc文档测试
public static void indexFilesSolrCell(String fileName, String solrId, String path)
throws IOException, SolrServerException
{
SolrClient solr = new HttpSolrClient.Builder(solrUrl).build();
ContentStreamUpdateRequest up = new ContentStreamUpdateRequest("/update/extract");
String contentType = getFileContentType(fileName);
up.addFile(new File(path), contentType);
up.setParam("literal.id", fileName);
up.setParam("uprefix", "ignored_");
up.setParam("fmap.content", "text");//文件内容
up.setAction(ACTION.COMMIT, true, true);
solr.request(up);
System.out.println("upload ok! \r\n");
}
//根据文件拓展名获取文件类型
public static String getFileContentType(String filename) {
String contentType = "";
String prefix = filename.substring(filename.lastIndexOf(".") + 1);
if (prefix.equals("xlsx")) {
contentType = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
} else if (prefix.equals("pdf")) {
contentType = "application/pdf";
} else if (prefix.equals("doc")) {
contentType = "application/msword";
} else if (prefix.equals("txt")) {
contentType = "text/plain";
} else if (prefix.equals("xls")) {
contentType = "application/vnd.ms-excel";
} else if (prefix.equals("docx")) {
contentType = "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
} else if (prefix.equals("ppt")) {
contentType = "application/vnd.ms-powerpoint";
} else if (prefix.equals("pptx")) {
contentType = "application/vnd.openxmlformats-officedocument.presentationml.presentation";
}
else {
contentType = "othertype";
}
return contentType;
}
}
2. 在solr的core目录下的solrconfig.xml增加如下内容:
<requestHandler name="/update/extract" startup="lazy" class="solr.extraction.ExtractingRequestHandler">
<lst name="defaults">
<str name="lowernames">true</str>
<str name="uprefix">ignored_</str>
<str name="fmap.content">text</str>
</lst>
</requestHandler>
其中 <str name="uprefix">ignored_</str> 部分是把读取文件时不需要映射的字段忽略掉
<str name="fmap.content">text</str> 是把读取的fmap.content字段映射为solr的 text字段
3. 修改 managed-schema 文件,增加
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
<fieldType name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
这个是生成一个动态字段,类型为ignored,承接忽略的那些字段
4. 检查 solrconfig.xml
<lib dir="${solr.install.dir:../../..}/dist/" regex="solr-dataimporthandler-.*\.jar"/>
<lib dir="${solr.install.dir:../../..}/contrib/extraction/lib" regex=".*\.jar"/>
检查以上路径是否匹配,是相对于建立的mycore路径
5. 在mycore目录下建立lib目录(如果没有)
复制 solr-7.5.0/contrib/extraction/lib下的所有文件 到mycore/lib目录
复制 solr-7.5.0/dist/solr-cell-7.5.0.jar 到mycore/lib目录
6.重新启动solr,如正常启动,再在eclipse 运行第1步建立的java程序
7. 在solr网页查询可以检查到已上传doc文件的索引