一个简单索引的配置文件

索引模式(稳定,快速,可迅速恢同步,可迅速恢复性,同一结点间的数据也能讯速同步\)
0\适合对数据库
1\能正常索引数据
2\能把索引的索引数据增量传送给集群结点
3\当总索引系统出错时,能经过重启动,迅速恢复正常,并能把数据适时同步到集群结点
4\当集群结点主服务器出现异常时,能迅速探测到该结点中其他机器的可用性,并且临时指定该机器为该结点中主服务器,索引服务器要记录这种情况的异常情况,当原来主服器恢复后,索引数据能同步


<?xml version="1.0" encoding="UTF-8"?>
<kelo.com>
<Index.search-ref>
<businessid>1</businessid>
<description>架构上:一个满足分布式搜索的索引系统(假定有三个(可以线性增加)结点(每个结点也有三台(可以线性增加)机器组成,作为冗余)组成的一个分布搜索系统)</description>
<!--2*60*1000 ms-->
<intervaltime>120000</intervaltime>
<recordnum>1000</recordnum>
<cyclecount>20</cyclecount>
<!-- 单位:ms -->
<sleeptime>10000</sleeptime>
<!--取不到数时,重试增量时间,单位:2*24*60*60*1000ms 两天 -->
<increatment>172800000</increatment>
<!--删除多少天前的数据,单位:小时 default 10*24 220-->
<ndaybeferorenotetable>220</ndaybeferorenotetable>
<clusters>
<description>一个集群结点中允许多台服务器执行相同功能,并且必须指定一台主服务器</description>
<cluster>
<name>分布式搜索集群结点一</name>
<node>1</node>
<!--是否启用-->
<use>1</use>
<addresss>
<address>
<seq>1</seq>
<ip>192.168.1.1</ip>
<port>8089</port>
<master>1</master>
</address>
<address>
<seq>12</seq>
<ip>192.168.1.2</ip>
<port>8089</port>
<master>0</master>
</address>
<address>
<seq>3</seq>
<ip>192.168.1.3</ip>
<port>8089</port>
<master>0</master>
</address>
</addresss>
<!--数据是一个大概范围,如 0,0 表示不限ID范围-->
<idrange>0,10000000</idrange>
<!--可对idrange范围的值进行拆分,以加快索引速度-->
<shard>10</shard>

<!--增量索引时,时间开始点,如果是0,表示从数据库中ID从小到大开始-->
<indexstarttime_update>2010-06-09 14:06:11.387</indexstarttime_update>
<!--数据库连接相关-->
<dbdata.business>
<dbid>1</dbid>
<dbname>测试</dbname>
<serverip>10.1.3.252</serverip>
<db-driver>com.microsoft.sqlserver.jdbc.SQLServerDriver</db-driver>
<db-url>jdbc:sqlserver://10.1.3.252;databaseName=xxxxxx;SelectMethod=cursor</db-url>
<set-tran-level>false</set-tran-level>
<select-tran-level>1</select-tran-level>
<update-tran-level>2</update-tran-level>
<max-connections>14</max-connections>
<db-user>xxx</db-user>
<db-password>xxx</db-password>
</dbdata.business>
<dbdata.indexsystem>
<dbid>1</dbid>
<dbname>测试</dbname>
<serverip>10.1.3.252</serverip>
<db-driver>com.microsoft.sqlserver.jdbc.SQLServerDriver</db-driver>
<db-url>jdbc:sqlserver://10.1.3.252;databaseName=xxxxxx;SelectMethod=cursor</db-url>
<set-tran-level>false</set-tran-level>
<select-tran-level>1</select-tran-level>
<update-tran-level>2</update-tran-level>
<max-connections>14</max-connections>
<db-user>xxx</db-user>
<db-password>xxx</db-password>
</dbdata.indexsystem>
<dbdata.log>
<dbid>17</dbid>
<dbname>日志相关</dbname>
<serverip>10.1.3.252</serverip>
<db-driver>com.microsoft.sqlserver.jdbc.SQLServerDriver</db-driver>
<db-url>jdbc:sqlserver://10.1.3.252;databaseName=数据库;SelectMethod=cursor</db-url>
<set-tran-level>false</set-tran-level>
<select-tran-level>1</select-tran-level>
<update-tran-level>2</update-tran-level>
<max-connections>16</max-connections>
<db-user>xxx</db-user>
<db-password>xxx</db-password>
</dbdata.log>
<keloIndex>
<businessseq>1</businessseq>
<businessname>业务名</businessname>
<businessindexpath>F:\file\index\业务名代号\结点\业务表\index</businessindexpath>
<businesslogfilename>F:\file\index\业务名代号\结点\业务表\Log\index.txt</businesslogfilename>
<businesslogpath>F:\file\index\业务名代号\结点\业务表\Log</businesslogpath>
</keloIndex>
</cluster>
<cluster>
<name>分布式搜索集群结点二</name>
<node>2</node>
<!--是否启用-->
<use>1</use>
<addresss>
<address>
<seq>1</seq>
<ip>192.168.1.21</ip>
<port>8089</port>
<master>1</master>
</address>
<address>
<seq>2</seq>
<ip>192.168.1.22</ip>
<port>8089</port>
<master>0</master>
</address>
<address>
<seq>3</seq>
<ip>192.168.1.23</ip>
<port>8089</port>
<master>0</master>
</address>
</addresss>
<idrange>10000000,20000000</idrange>
<!--可对idrange范围的值进行拆分,以加快索引速度-->
<shard>10</shard>

<!--增量索引时,时间开始点,如果是0,表示从数据库中ID从小到大开始-->
<indexstarttime_update>2010-06-09 14:06:11.387</indexstarttime_update>
<!--数据库连接相关-->
<dbdata.business>
<dbid>1</dbid>
<dbname>测试</dbname>
<serverip>10.1.3.252</serverip>
<db-driver>com.microsoft.sqlserver.jdbc.SQLServerDriver</db-driver>
<db-url>jdbc:sqlserver://10.1.3.252;databaseName=xxxxxx;SelectMethod=cursor</db-url>
<set-tran-level>false</set-tran-level>
<select-tran-level>1</select-tran-level>
<update-tran-level>2</update-tran-level>
<max-connections>14</max-connections>
<db-user>xxx</db-user>
<db-password>xxx</db-password>
</dbdata.business>
<dbdata.indexsystem>
<dbid>1</dbid>
<dbname>测试</dbname>
<serverip>10.1.3.252</serverip>
<db-driver>com.microsoft.sqlserver.jdbc.SQLServerDriver</db-driver>
<db-url>jdbc:sqlserver://10.1.3.252;databaseName=xxxxxx;SelectMethod=cursor</db-url>
<set-tran-level>false</set-tran-level>
<select-tran-level>1</select-tran-level>
<update-tran-level>2</update-tran-level>
<max-connections>14</max-connections>
<db-user>xxx</db-user>
<db-password>xxx</db-password>
</dbdata.indexsystem>
<dbdata.log>
<dbid>17</dbid>
<dbname>日志相关</dbname>
<serverip>10.1.3.252</serverip>
<db-driver>com.microsoft.sqlserver.jdbc.SQLServerDriver</db-driver>
<db-url>jdbc:sqlserver://10.1.3.252;databaseName=数据库;SelectMethod=cursor</db-url>
<set-tran-level>false</set-tran-level>
<select-tran-level>1</select-tran-level>
<update-tran-level>2</update-tran-level>
<max-connections>16</max-connections>
<db-user>xxx</db-user>
<db-password>xxx</db-password>
</dbdata.log>
<keloIndex>
<businessseq>1</businessseq>
<businessname>业务名</businessname>
<businessindexpath>F:\file\index\业务名代号\结点\业务表\index</businessindexpath>
<businesslogfilename>F:\file\index\业务名代号\结点\业务表\Log\index.txt</businesslogfilename>
<businesslogpath>F:\file\index\业务名代号\结点\业务表\Log</businesslogpath>
</keloIndex>

</cluster>

<cluster>
<name>分布式搜索集群结点三</name>
<node>3</node>
<!--是否启用-->
<use>1</use>
<addresss>
<address>
<seq>1</seq>
<ip>192.168.1.41</ip>
<port>8089</port>
<master>1</master>
</address>
<address>
<seq>2</seq>
<ip>192.168.1.42</ip>
<port>8089</port>
<master>0</master>
</address>
<address>
<seq>3</seq>
<ip>192.168.1.43</ip>
<port>8089</port>
<master>0</master>
</address>
</addresss>
<idrange>20000000,30000000</idrange>
<!--可对idrange范围的值进行拆分,以加快索引速度-->
<shard>10</shard>

<!--增量索引时,时间开始点,如果是0,表示从数据库中ID从小到大开始-->
<indexstarttime_update>2010-06-09 14:06:11.387</indexstarttime_update>
<!--数据库连接相关-->
<dbdata.business>
<dbid>1</dbid>
<dbname>测试</dbname>
<serverip>10.1.3.252</serverip>
<db-driver>com.microsoft.sqlserver.jdbc.SQLServerDriver</db-driver>
<db-url>jdbc:sqlserver://10.1.3.252;databaseName=xxxxxx;SelectMethod=cursor</db-url>
<set-tran-level>false</set-tran-level>
<select-tran-level>1</select-tran-level>
<update-tran-level>2</update-tran-level>
<max-connections>14</max-connections>
<db-user>xxx</db-user>
<db-password>xxx</db-password>
</dbdata.business>
<dbdata.indexsystem>
<dbid>1</dbid>
<dbname>测试</dbname>
<serverip>10.1.3.252</serverip>
<db-driver>com.microsoft.sqlserver.jdbc.SQLServerDriver</db-driver>
<db-url>jdbc:sqlserver://10.1.3.252;databaseName=xxxxxx;SelectMethod=cursor</db-url>
<set-tran-level>false</set-tran-level>
<select-tran-level>1</select-tran-level>
<update-tran-level>2</update-tran-level>
<max-connections>14</max-connections>
<db-user>xxx</db-user>
<db-password>xxx</db-password>
</dbdata.indexsystem>
<dbdata.log>
<dbid>17</dbid>
<dbname>日志相关</dbname>
<serverip>10.1.3.252</serverip>
<db-driver>com.microsoft.sqlserver.jdbc.SQLServerDriver</db-driver>
<db-url>jdbc:sqlserver://10.1.3.252;databaseName=数据库;SelectMethod=cursor</db-url>
<set-tran-level>false</set-tran-level>
<select-tran-level>1</select-tran-level>
<update-tran-level>2</update-tran-level>
<max-connections>16</max-connections>
<db-user>xxx</db-user>
<db-password>xxx</db-password>
</dbdata.log>
<keloIndex>
<businessseq>1</businessseq>
<businessname>业务名</businessname>
<businessindexpath>F:\file\index\业务名代号\结点\业务表\index</businessindexpath>
<businesslogfilename>F:\file\index\业务名代号\结点\业务表\Log\index.txt</businesslogfilename>
<businesslogpath>F:\file\index\业务名代号\结点\业务表\Log</businesslogpath>
</keloIndex>
</cluster>

</clusters>
<Index.search-ref>
</kelo.com>


监控模式
0\适时找出总索引与集群结点的索引数据的情况
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
倒排索引是一种经典的信息检索技术,它将每个单词映射到包含该单词的文档列表中。在Hadoop中,我们可以使用MapReduce来构建倒排索引。 下面是一个简单的基于Hadoop的倒排索引代码示例: 1. 首先,我们需要编写一个Mapper类来处理输入数据并输出中间结果。Mapper的任务是将每个单词映射到包含该单词的文档列表中。这里我们假设每个输入文件包含一行文本,每个文本包含多个单词。 ``` public static class InvertedIndexMapper extends Mapper<LongWritable, Text, Text, Text> { private Text word = new Text(); private Text docId = new Text(); public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 获取输入行 String line = value.toString(); // 获取文档ID int pos = line.indexOf('\t'); String docIdStr = line.substring(0, pos); docId.set(docIdStr); // 获取文本内容 String text = line.substring(pos + 1); // 将文本内容分割为单词 StringTokenizer tokenizer = new StringTokenizer(text); while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); word.set(token); context.write(word, docId); } } } ``` 2. 接下来,我们需要编写一个Reducer类来将Mapper输出的中间结果合并为最终的倒排索引。Reducer的任务是将每个单词映射到包含该单词的文档列表中。 ``` public static class InvertedIndexReducer extends Reducer<Text, Text, Text, Text> { private Text result = new Text(); public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { // 构造文档ID列表 StringBuilder sb = new StringBuilder(); for (Text val : values) { sb.append(val.toString()); sb.append(","); } String docList = sb.toString(); docList = docList.substring(0, docList.length() - 1); // 输出倒排索引 result.set(docList); context.write(key, result); } } ``` 3. 最后,我们需要编写一个Driver类来配置和启动MapReduce作业。 ``` public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "Inverted Index"); job.setJarByClass(InvertedIndex.class); job.setMapperClass(InvertedIndexMapper.class); job.setReducerClass(InvertedIndexReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } ``` 以上代码实现了一个基于Hadoop的倒排索引,可以使用以下命令来运行: ``` hadoop jar inverted-index.jar input output ``` 其中,`input`是输入文件的路径,`output`是输出文件的路径。输出文件将包含每个单词映射到包含该单词的文档列表中的倒排索引
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值