Solr将数据库中table中的数据导入到索引

Solr将数据库中table中的数据导入到索引

版权信息: 可以任意转载, 转载时请务必以超链接形式标明文章原文出处, 即下面的声明.
原文出处:http://tbwuming.iteye.com/blog/1152333

1、首先在MySQL中创建表,并写入数据

-- ----------------------------
-- Table structure for course
-- ----------------------------
CREATE TABLE `course` (
`id` int(11) NOT NULL,
`user` varchar(20) NOT NULL,
`title` varchar(50) NOT NULL,
`content` varchar(200) NOT NULL,
`time` datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;



-- ----------------------------
-- Records
-- ----------------------------
INSERT INTO `course` VALUES ('1', 'tiehong', '搜索', '这一小节讲搜索引擎,可以通过搜索引擎查找一些数据', '2011-08-18 10:41:35');
INSERT INTO `course` VALUES ('2', 'chenyun', '存储', '这一小节讲分布式存储,可以存储很多数据', '2011-08-18 10:42:19');
INSERT INTO `course` VALUES ('3', 'lengda', '数据导入', '这一小节讲数据导入,可以从mysql数据库中导入数据', '2011-08-18 10:42:59');


2、修改apache-solr-3.3.0\example\solr\conf\schema.xml文件
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="example" version="1.4">
<types>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>

<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>

<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
</types>

<fields>
<field name="id" type="tint" indexed="true" stored="true" required="true" />
<field name="user" type="string" indexed="true" stored="true"/>
<field name="title" type="text" indexed="true" stored="true"/>
<field name="content" type="text" indexed="true" stored="true" />
<field name="time" type="date" indexed="true" stored="true" default="NOW"/>
</fields>

<uniqueKey>id</uniqueKey>
<defaultSearchField>title</defaultSearchField>
<solrQueryParser defaultOperator="AND"/>

</schema>



3、编辑apache-solr-3.3.0\example\solr\conf\solrconfig.xml文件,添加内容如下:
<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler"> 
<lst name="defaults">
<str name="config">data-config.xml</str>
</lst>
</requestHandler>



4、创建data-config.xml文件,放在apache-solr-3.3.0\example\solr\conf\目录下,文件内容如下:
<dataConfig> 
<dataSource type="JdbcDataSource"
driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://localhost/dbname"
user="user-name"
password="password"/>
<document>
<entity name="id"
query="select * from course">
</entity>
</document>
</dataConfig>


5、将MySQL的JDBC驱动放在apache-solr-3.3.0\example\lib目录下

6、启动Solr服务,命令行进入apache-solr-3.3.0\example\目录,运行 java -jar start.jar
出现问题:
严重: org.apache.solr.common.SolrException: QueryElevationComponent requires the
schema to have a uniqueKeyField implemented using StrField at org.apache.solr.handler.component.QueryElevationComponent.inform(QueryElevationComponent.java:158)
at org.apache.solr.core.SolrResourceLoader.inform(SolrResourceLoader.java:522)
at org.apache.solr.core.SolrCore.<init>(SolrCore.java:594)
at org.apache.solr.core.CoreContainer.create(CoreContainer.java:463)
at org.apache.solr.core.CoreContainer.load(CoreContainer.java:316)
at org.apache.solr.core.CoreContainer.load(CoreContainer.java:207)
at org.apache.solr.core.CoreContainer$Initializer.initialize(CoreContainer.java:130)
at org.apache.solr.servlet.SolrDispatchFilter.init(SolrDispatchFilter.java:94)
at org.mortbay.jetty.servlet.FilterHolder.doStart(FilterHolder.java:97)
at org.mortbay.component.AbstractLifeCycle.start(AbstractLifeCycle.java:50)
at org.mortbay.jetty.servlet.ServletHandler.initialize(ServletHandler.java:713)
at org.mortbay.jetty.servlet.Context.startContext(Context.java:140)
at org.mortbay.jetty.webapp.WebAppContext.startContext(WebAppContext.java:1282)
at org.mortbay.jetty.handler.ContextHandler.doStart(ContextHandler.java:518)
at org.mortbay.jetty.webapp.WebAppContext.doStart(WebAppContext.java:499)
at org.mortbay.component.AbstractLifeCycle.start(AbstractLifeCycle.java:50)
at org.mortbay.jetty.handler.HandlerCollection.doStart(HandlerCollection.java:152)
at org.mortbay.jetty.handler.ContextHandlerCollection.doStart(ContextHandlerCollection.java:156)
at org.mortbay.component.AbstractLifeCycle.start(AbstractLifeCycle.java:50)
at org.mortbay.jetty.handler.HandlerCollection.doStart(HandlerCollection.java:152)
at org.mortbay.component.AbstractLifeCycle.start(AbstractLifeCycle.java:50)
at org.mortbay.jetty.handler.HandlerWrapper.doStart(HandlerWrapper.java:130)
at org.mortbay.jetty.Server.doStart(Server.java:224)
at org.mortbay.component.AbstractLifeCycle.start(AbstractLifeCycle.java:50)
at org.mortbay.xml.XmlConfiguration.main(XmlConfiguration.java:985)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.mortbay.start.Main.invokeMain(Main.java:194)
at org.mortbay.start.Main.start(Main.java:534)
at org.mortbay.start.Main.start(Main.java:441)
at org.mortbay.start.Main.main(Main.java:119)

解决:
修改apache-solr-3.3.0\example\solr\conf\solrconfig.xml文件,删除内容如下:

<!-- Query Elevation Component
http://wiki.apache.org/solr/QueryElevationComponent

a search component that enables you to configure the top
results for a given query regardless of the normal lucene
scoring.
-->
<searchComponent name="elevator" class="solr.QueryElevationComponent" >
<!-- pick a fieldType to analyze queries -->
<str name="queryFieldType">string</str>
<str name="config-file">elevate.xml</str>
</searchComponent>

<!-- A request handler for demonstrating the elevator component -->
<requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="echoParams">explicit</str>
</lst>
<arr name="last-components">
<str>elevator</str>
</arr>
</requestHandler>


wiki上的解释:
[quote]The QueryElevationComponent enables you to configure the top results for a given query regardless of the normal lucene scoring. This is sometimes called "sponsored search", "editorial boosting" or "best bets". This component matches the user query text to a configured Map of top results. Although this component will work with any QueryParser, it makes the most sense to use with DisMax style queries.[/quote]
我的理解:相当于竞价排名,无视搜索的排序,可以额外在顶部配置自定义的结果


7、运行命令:在浏览器中输入:[url]http://localhost:8983/solr/dataimport?command=full-import[/url] 来完成全量数据导入,在每次全量数据导入执行的时候,原有索引会被删除,如果不想删除原有索引,可以运行如下命令:[url]http://localhost:8983/solr/dataimport?command=full-import&clean=false[/url] ,全量数据导入在浏览器中显示的效果如下:

This XML file does not appear to have any style information associated with it. The document tree is shown below.
<response>
<lst name="responseHeader">
<int name="status">0</int>
<int name="QTime">5</int>
</lst>
<lst name="initArgs">
<lst name="defaults">
<str name="config">data-config.xml</str>
</lst>
</lst>
<str name="command">full-import</str>
<str name="status">idle</str>
<str name="importResponse"/>
<lst name="statusMessages"/>
<str name="WARNING">
This response format is experimental. It is likely to change in the future.
</str>
</response>


8、进行搜索,在浏览器中输入[url]http://localhost:8983/solr/admin/[/url]

[img]http://dl.iteye.com/upload/attachment/538748/3a0c7fc6-96d5-3fe0-9e65-9fb5612c6ef9.png[/img]


搜索结果如下:
This XML file does not appear to have any style information associated with it. The document tree is shown below.
<response>
<lst name="responseHeader">
<int name="status">0</int>
<int name="QTime">0</int>
<lst name="params">
<str name="indent">on</str>
<str name="start">0</str>
<str name="q">*:*</str>
<str name="version">2.2</str>
<str name="rows">10</str>
</lst>
</lst>
<result name="response" numFound="3" start="0">
<doc>
<str name="content">这一小节讲搜索引擎,可以通过搜索引擎查找一些数据</str>
<int name="id">1</int>
<date name="time">2011-08-18T02:41:35Z</date>
<str name="title">搜索</str>
<str name="user">tiehong</str>
</doc>
<doc>
<str name="content">这一小节讲分布式存储,可以存储很多数据</str>
<int name="id">2</int>
<date name="time">2011-08-18T02:42:19Z</date>
<str name="title">存储</str>
<str name="user">chenyun</str>
</doc>
<doc>
<str name="content">这一小节讲数据导入,可以从mysql数据库中导入数据</str>
<int name="id">3</int>
<date name="time">2011-08-18T02:42:59Z</date>
<str name="title">数据导入</str>
<str name="user">lengda</str>
</doc>
</result>
</response>


9、以不同的名字索引域
上面data-config.xml配置中mysql中表的名字和schema.xml索引的名字相同,也可以配置使用不同的名字,如下:
<dataConfig>
<dataSource type="JdbcDataSource"
driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://localhost/dbname"
user="user-name"
password="password"/>
<document>
<entity name="id"
query="select id,name,desc from mytable">
<field column="id" name="solr_id"/>
<field column="name" name="solr_name"/>
<field column="desc" name="solr_desc"/>
</entity>
</document>
</dataConfig>

此时,solr_id, solr_name,solr_desc将被写到solr域中,务必注意要在schema.xml文件中配置好这些域

10、从多个表索引数据到Solr
修改data-config.xml文件
<dataConfig>
<dataSource type="JdbcDataSource"
driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://localhost/dbname"
user="user-name"
password="password"/>
<document>
<entity name="outer"
query="select id,name,desc from mytable">
<field column="id" name="solr_id"/>
<field column="name" name="solr_name"/>
<field column="desc" name="solr_desc"/>
<entity name="inner"
query="select details from another_table where id ='${outer.id}'">
<field column="details" name="solr_details"/>
</entity>
</entity>
</document>
</dataConfig>

注意:schema.xml中还应该有solr_details域

11、参考:
Solr Data Import 快速入门:[url]http://blog.chenlb.com/2010/03/solr-data-import-quick-start.html[/url]
Index a DB table directly into Solr:[url]http://wiki.apache.org/solr/DIHQuickStart[/url]
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值