20161216-solr cloud 集群数据导入(dataimport)笔记

在前两篇blog的基础上,进行数据的导入,连接mysql数据库。


在192.168.244.201(Master)机器上,复制cloud_conf重命名col1216_cloud_conf,修改solrconfig.xml和data-config.xml,复制重命名一个delta-data-config.xml。

solrconfig.xml加上内容如下:

  <!-- Legacy config for the admin interface -->
  <admin>
    <defaultQuery>*:*</defaultQuery>
  </admin>
 <unlockOnStartup>true</unlockOnStartup>

 <requestHandler name="/update" class="solr.UpdateRequestHandler">
	<lst name="defaults">
		<str name="update.chain">dispup</str>
	</lst>
 </requestHandler>

 <updateRequestProcessorChain name="dispup">
	<processor class="solr.UUIDUpdateProcessorFactory">
		<str name="fieldName">id</str>
	</processor>
	<processor class="solr.LogUpdateProcessorFactory" />
	<processor class="solr.DistributedUpdateProcessorFactory" />
	<processor class="solr.RunUpdateProcessorFactory" />
 </updateRequestProcessorChain>

 <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
	<lst name="defaults">
		<str name="config">data-config.xml</str>
	</lst>
 </requestHandler>

<!--
 <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
	<lst name="defaults">
		<str name="config">delta-data-config.xml</str>
		<str name="update.chain">dispup</str>
	</lst>
 </requestHandler>
-->

data-config.xml

<!--
  Licensed to the Apache Software Foundation (ASF) under one or more
  contributor license agreements.  See the NOTICE file distributed with
  this work for additional information regarding copyright ownership.
  The ASF licenses this file to You under the Apache License, Version 2.0
  (the "License"); you may not use this file except in compliance with
  the License.  You may obtain a copy of the License at

      http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
  -->

<dataConfig>
 <dataSource name="gic_read" type="JdbcDataSource" driver="com.mysql.jdbc.Driver"
  url="jdbc:mysql://172.30.2.28:18806/gic?useUnicode=true&characterEncoding=utf-8"
      user="xxx" password="xxxxx" batchSize="-1" />

<document>
	<entity name="platformgoods" dataSource="gic_read" pk="id" query="SELECT
			CONCAT('', g.goodscode) AS id,
			g.goodscode AS goodsId,
			p.id AS productId,
			p.goods_name AS goodsName,
			IFNULL(p.logourl, '') AS goodsLogo,
			p.weixin_shop_price AS goodsPrice,
			g.goods_salenum AS goodsSales,
			p.goods_inventory AS goodsInventory,
			g.`status` AS `status`,
			g.shopid AS shopId,
			'1' AS goodsType,
			vc.fid AS fCateId,
			vc.fcatalog AS fCateName,
			vc.sid AS sCateId,
			vc.scatalog AS sCateName,
			vc.tid AS tCateId,
			vc.tcatalog AS tCateName,
			p.goods_brand_id AS brandId,
			p.`name` AS brandName,
			area.pid AS provId,
			area.proname AS provName,
			area.cid AS cityId,
			area.citname AS cityName,
			area.id AS areaId,
			area.areaName AS areaName,
			p.goods_transfee AS transFee,
			g.cdate,
			g.udate,
			DATE_FORMAT(SYSDATE(), '%Y-%m-%d %H:%i:%S') AS indexDate
		FROM
			t_goods_shop g
		INNER JOIN v_goods_product p ON g.productid = p.id
		LEFT JOIN v_ywd_goods_catalog_full vc ON p.gc_id = vc.gcid
		LEFT JOIN t_area_full area ON p.delivery_area_id = area.id
		LEFT JOIN t_goods_tag t ON g.goodscode = t.goodscode">

		<field column="id" name="id"  />
		<field column="goodsId" name="goodsId"  />
		<field column="productId" name="productId"  />
		<field column="goodsName" name="goodsName"  />
		<field column="goodsLogo" name="goodsLogo"  />
		<field column="goodsPrice" name="goodsPrice"  />
		<field column="goodsSales" name="goodsSales"  />
		<field column="goodsInventory" name="goodsInventory"  />
		<field column="status" name="status"  />
		<field column="fCateId" name="fCateId"  />
		<field column="fCateName" name="fCateName"  />
		<field column="sCateId" name="sCateId"  />
		<field column="sCateName" name="sCateName"  />
		<field column="tCateId" name="tCateId"  />
		<field column="tCateName" name="tCateName"  />
		<field column="provId" name="provId"  />
		<field column="provName" name="provName"  />
		<field column="cityId" name="cityId"  />
		<field column="cityName" name="cityName"  />
		<field column="areaId" name="areaId"  />
		<field column="areaName" name="areaName"  />
		<field column="brandId" name="brandId"  />
		<field column="brandName" name="brandName"  />
		<field column="transFee" name="transFee"  />
		<field column="cdate" name="cdate"  />
		<field column="udate" name="udate"  />
		<field column="indexDate" name="indexDate"  />
	</entity> 
  </document>
</dataConfig>

delta-data-config.xml

<!--
  Licensed to the Apache Software Foundation (ASF) under one or more
  contributor license agreements.  See the NOTICE file distributed with
  this work for additional information regarding copyright ownership.
  The ASF licenses this file to You under the Apache License, Version 2.0
  (the "License"); you may not use this file except in compliance with
  the License.  You may obtain a copy of the License at

      http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
  -->

<dataConfig>
 <dataSource name="gic_read" type="JdbcDataSource" driver="com.mysql.jdbc.Driver"
  url="jdbc:mysql://172.30.2.28:18806/gic?useUnicode=true&characterEncoding=utf-8"
      user="xxx" password="xxxx" batchSize="-1" />

 <document>
	<entity name="platformgoods" dataSource="gic_read" pk="id" query="SELECT
			CONCAT('', g.goodscode) AS id,
			g.goodscode AS goodsId,
			p.id AS productId,
			p.goods_name AS goodsName,
			IFNULL(p.logourl, '') AS goodsLogo,
			p.weixin_shop_price AS goodsPrice,
			g.goods_salenum AS goodsSales,
			p.goods_inventory AS goodsInventory,
			g.`status` AS `status`,
			g.shopid AS shopId,
			'1' AS goodsType,
			vc.fid AS fCateId,
			vc.fcatalog AS fCateName,
			vc.sid AS sCateId,
			vc.scatalog AS sCateName,
			vc.tid AS tCateId,
			vc.tcatalog AS tCateName,
			p.goods_brand_id AS brandId,
			p.`name` AS brandName,
			area.pid AS provId,
			area.proname AS provName,
			area.cid AS cityId,
			area.citname AS cityName,
			area.id AS areaId,
			area.areaName AS areaName,
			p.goods_transfee AS transFee,
			g.cdate,
			g.udate,
			DATE_FORMAT(SYSDATE(), '%Y-%m-%d %H:%i:%S') AS indexDate
		FROM
			t_goods_shop g
		INNER JOIN v_goods_product p ON g.productid = p.id
		LEFT JOIN v_ywd_goods_catalog_full vc ON p.gc_id = vc.gcid
		LEFT JOIN t_area_full area ON p.delivery_area_id = area.id
		LEFT JOIN t_goods_tag t ON g.goodscode = t.goodscode"
		
		deltaImportQuery="SELECT
			CONCAT('', g.goodscode) AS id,
			g.goodscode AS goodsId,
			p.id AS productId,
			p.goods_name AS goodsName,
			IFNULL(p.logourl, '') AS goodsLogo,
			p.weixin_shop_price AS goodsPrice,
			g.goods_salenum AS goodsSales,
			p.goods_inventory AS goodsInventory,
			g.`status` AS `status`,
			g.shopid AS shopId,
			'1' AS goodsType,
			vc.fid AS fCateId,
			vc.fcatalog AS fCateName,
			vc.sid AS sCateId,
			vc.scatalog AS sCateName,
			vc.tid AS tCateId,
			vc.tcatalog AS tCateName,
			p.goods_brand_id AS brandId,
			p.`name` AS brandName,
			area.pid AS provId,
			area.proname AS provName,
			area.cid AS cityId,
			area.citname AS cityName,
			area.id AS areaId,
			area.areaName AS areaName,
			p.goods_transfee AS transFee,
			g.cdate,
			g.udate,
			DATE_FORMAT(SYSDATE(), '%Y-%m-%d %H:%i:%S') AS indexDate
		FROM
			t_goods_shop g
		INNER JOIN v_goods_product p ON g.productid = p.id
		LEFT JOIN v_ywd_goods_catalog_full vc ON p.gc_id = vc.gcid
		LEFT JOIN t_area_full area ON p.delivery_area_id = area.id
		LEFT JOIN t_goods_tag t ON g.goodscode = t.goodscode
		WHERE g.goodscode = '${dih.delta.id}'"

		deltaQuery="SELECT
			g.goodscode AS id
		FROM
			t_goods_shop g
		INNER JOIN t_product p ON g.productid = p.hele_id
		LEFT JOIN v_ywd_goods_catalog_full vc ON p.gc_id = vc.gcid
		LEFT JOIN t_area_full area ON p.delivery_area_id = area.id
		LEFT JOIN t_goods_tag t ON g.goodscode = t.goodscode
		WHERE
			g.udate >= DATE_ADD('${dih.last_index_time}', INTERVAL - 3 SECOND)
		OR p.udate >= DATE_ADD('${dih.last_index_time}', INTERVAL - 3 SECOND)
		OR vc.udate >= DATE_ADD('${dih.last_index_time}', INTERVAL - 3 SECOND)
		OR t.udate >= DATE_ADD('${dih.last_index_time}', INTERVAL - 3 SECOND)"
		transformer="RegexTransformer">

		<field column="id" name="id"  />
		<field column="goodsId" name="goodsId"  />
		<field column="productId" name="productId"  />
		<field column="goodsName" name="goodsName"  />
		<field column="goodsLogo" name="goodsLogo"  />
		<field column="goodsPrice" name="goodsPrice"  />
		<field column="goodsSales" name="goodsSales"  />
		<field column="goodsInventory" name="goodsInventory"  />
		<field column="status" name="status"  />
		<field column="fCateId" name="fCateId"  />
		<field column="fCateName" name="fCateName"  />
		<field column="sCateId" name="sCateId"  />
		<field column="sCateName" name="sCateName"  />
		<field column="tCateId" name="tCateId"  />
		<field column="tCateName" name="tCateName"  />
		<field column="provId" name="provId"  />
		<field column="provName" name="provName"  />
		<field column="cityId" name="cityId"  />
		<field column="cityName" name="cityName"  />
		<field column="areaId" name="areaId"  />
		<field column="areaName" name="areaName"  />
		<field column="brandId" name="brandId"  />
		<field column="brandName" name="brandName"  />
		<field column="transFee" name="transFee"  />
		<field column="cdate" name="cdate"  />
		<field column="udate" name="udate"  />
		<field column="indexDate" name="indexDate"  />
	</entity> 
 </document>
</dataConfig>


漏了还要加个schema.xml,可以复制managed-schema来修改,

fuck solr 6.3直接用managed-schema,不用schema.xml。鄙视


<fieldType name="uuid" class="solr.UUIDField" sortMissingLast="true" indexed="true"/>

配置UUID,并删除原先的id域声明(<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />)

<field name="id" type="uuid" indexed="true" stored="true" required="true" multiValued="false" />

改变 id 生成策略


 <fieldType name="text_ik" class="solr.TextField" positionIncrementGap="100">
	<analyzer type="index" >
		<charFilter class="solr.HTMLStripCharFilterFactory"/>
		<tokenizer class="org.wltea.analyzer.lucene.IKTokenizerFactory" useSmart="false" />
		<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
		<filter class="solr.LowerCaseFilterFactory"/>
	</analyzer>
	<analyzer type="query">
        <charFilter class="solr.HTMLStripCharFilterFactory"/>
        <tokenizer class="org.wltea.analyzer.lucene.IKTokenizerFactory" useSmart="true" dicPath="my_dic.dic"  />
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
        <filter class="solr.LowerCaseFilterFactory" />
	</analyzer> 
 </fieldType>


 <field name="fieldA" type="text_ik" indexed="false" stored="true"/>
<field name="id" type="string" indexed="true" stored="true" required="true" />
 <field name="goodsId" type="text_ws" indexed="true" stored="true" required="true" />
 <field name="productId" type="text_ws" indexed="true" stored="true" />
 <field name="goodsName" type="text_ws" indexed="false" stored="true" />
 <field name="goodsNameCp" type="lowercase" indexed="true" stored="true" />
 <copyField source="goodsName" dest="goodsNameCp"/>
 <field name="goodsLogo" type="text_ws" indexed="false" stored="true" />
 <field name="goodsPrice" type="tdouble" indexed="true" stored="true" />
 <field name="goodsSales" type="tint" indexed="true" stored="true" />
 <field name="goodsInventory" type="tint" indexed="true" stored="true" />
 <field name="status" type="text_ws" indexed="true" stored="true" />
 <field name="goodsType" type="text_ws" indexed="true" stored="true" />

 <field name="shopId" type="text_ws" indexed="true" stored="true" />

 <field name="fCateId" type="text_ws" indexed="true" stored="true" />
 <field name="fCateName" type="text_ws" indexed="false" stored="true" />
 <field name="sCateId" type="text_ws" indexed="true" stored="true" />
 <field name="sCateName" type="text_ws" indexed="false" stored="true" />
 <field name="tCateId" type="text_ws" indexed="true" stored="true" />
 <field name="tCateName" type="text_ws" indexed="false" stored="true" />

 <field name="provId" type="text_ws" indexed="true" stored="true" />
 <field name="provName" type="text_ws" indexed="false" stored="true" />
 <field name="cityId" type="text_ws" indexed="true" stored="true" />
 <field name="cityName" type="text_ws" indexed="true" stored="true" />
 <field name="areaId" type="text_ws" indexed="true" stored="true" />
 <field name="areaName" type="text_ws" indexed="false" stored="true" />
 
 <field name="brandId" type="text_ws" indexed="true" stored="true" />
 <field name="brandName" type="text_ws" indexed="false" stored="true" />
 <field name="transFee" type="int" indexed="true" stored="true" />
 <field name="cdate" type="tdate" indexed="false" stored="true" />
 <field name="udate" type="tdate" indexed="true" stored="true" />

 <field name="indexDate" type="text_ws" indexed="true" stored="true" />


在tomcat加上mysql数据库连接驱动jar


修改tomcat启动变量,创建一个新的配置col1216_cloud_conf到zookeepr配置管理




重启tomcat,以后修改col1216_cloud_conf里面的配置文件不需要重启tomcat,直接在solr页面collection reload按钮,其它两台机也是这样就可以同步配置文件夹。



不选platformgoods,导入的数据只有id,_version两个字段


评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值