solrj使用demo(PersonSolrServer)

最新推荐文章于 2024-10-14 09:07:49 发布

Y叔输得起

最新推荐文章于 2024-10-14 09:07:49 发布

阅读量1.3k

点赞数

分类专栏： solr 文章标签： class filter string query integer solr

本文链接：https://blog.csdn.net/ssh_Kobe/article/details/6781773

版权

solr 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

solr服务器类PersonSolrServer.java:

import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.List;

import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.impl.BinaryRequestWriter;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.nstcrm.person.model.PersonDetail;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * @author Sam 时间：2011-9-16 下午3:25:15
 */
public class PersonSolrServer {
	
	private final static String URL = "http://localhost/solr/person";
	private Logger logger = LoggerFactory.getLogger(this.getClass());
	private final static Integer SOCKE_TTIMEOUT = 1000; // socket read timeout
	private final static Integer CONN_TIMEOUT = 100;
	private final static Integer MAXCONN_DEFAULT = 100;
	private final static Integer MAXCONN_TOTAL = 100;
	private final static Integer MAXRETRIES = 1;
	private CommonsHttpSolrServer server = null;
	private final static String ASC = "asc";
	
	public PersonSolrServer() throws MalformedURLException {
		System.out.println("初始化solr服务..");
		server = new CommonsHttpSolrServer( URL );//使用HTTPClient 和solr服务器进行通信
		server.setRequestWriter(new BinaryRequestWriter());//使用流输出方式
		server.setSoTimeout(SOCKE_TTIMEOUT);// socket read timeout
		server.setConnectionTimeout(CONN_TIMEOUT);
		server.setDefaultMaxConnectionsPerHost(MAXCONN_DEFAULT);
		server.setMaxTotalConnections(MAXCONN_TOTAL);
		server.setFollowRedirects(false);
		server.setAllowCompression(true);
		server.setMaxRetries(MAXRETRIES); // defaults to 0.  > 1 not recommended.
	}
	
	/**
	 * 创建索引
	 */
	public void createIndex(PersonDetail pd) throws Exception {
		SolrInputDocument doc = new SolrInputDocument();
		doc.addField("id", pd.getId());
		doc.addField("language", pd.getLanguage());
		
		server.add(doc);
		server.optimize();
		server.commit();
		System.out.println("----索引创建完毕!!!");	
	}
	/**
	 * 删除索引
	 * @author Sam 时间：2011-9-16 下午3:32:55	
	 * @throws Exception
	 */
	public void delIndex() throws Exception {
		server.deleteByQuery("*:*");
		server.commit();
		System.out.println("----索引清除完毕!!!");
	}
	
	/**
	 * 查询
	 * @author Sam 时间：2011-9-16 下午3:33:14	
	 * @param key
	 * @param startPage
	 * @param pageSize
	 * @throws Exception
	 */
	public List<Integer> queryList(String key, Integer start, Integer rows) throws Exception {
		SolrQuery query = new SolrQuery(getkey(key));
		query.setHighlight(true); //开启高亮组件
		query.addHighlightField("id");
		query.addHighlightField("chName");//高亮字段
		query.addHighlightField("enName");
		query.setHighlightSimplePre("<font color='red'>");//前缀
		query.setHighlightSimplePost("</font>");//后缀
		query.set("hl.usePhraseHighlighter", true);
		query.set("hl.highlightMultiTerm", true);
		query.set("hl.snippets", 3);//三个片段,默认是1
		query.set("hl.fragsize", 50);//每个片段50个字，默认是100
		query.setStart(start); //起始位置 …分页
		query.setRows(rows);//文档数
		
		QueryResponse rep = server.query(query);
		List<SolrDocument> docs = rep.getResults();//得到结果集
		List<Integer> idList = new ArrayList<Integer>();
		for(SolrDocument doc : docs) {
			idList.add(Integer.parseInt((String) doc.getFieldValue("id")));
			System.out.println(doc.getFieldValue("chName") + "|" + doc.getFieldValue("enName"));
		}
		return idList;
	}
	
	public String getkey(String strWord) {
		if(strWord.indexOf(" ") > 0 ){
			String wordAnd = strWord.replace(" ", "* AND *");
			String wordOr = strWord.replace(" ", "* *");
			String rt = "(*" + wordAnd + "*) *" + wordOr + "* " + strWord;
			return rt;
		} else {
			return "*" + strWord + "* " + strWord;
		}
		
	}

	public CommonsHttpSolrServer getServer() {
		return server;
	}

	public void setServer(CommonsHttpSolrServer server) {
		this.server = server;
	}

}

配置文件E:\SolrHome\solr\person\conf\data-config.xml:

<dataConfig> 
 <dataSource 
		type="JdbcDataSource" 
		driver="com.mysql.jdbc.Driver" 
		url="jdbc:mysql://localhost:3306/nstcrm" 
		user="root" 
		password="admin"/> 
 <document name="pm_person"> 
    <entity name="person" pk="id" query="select * from pm_person">   
		<field column="id" name="id"/>
		<field column="code" name="code"/>
		<field column="chName" name="chName"/>
		<field column="enName" name="enName"/>
		<field column="nickName" name="nickName"/>	
		<field column="birthdate" name="birthdate"/>
		<field column="height" name="height"/>
		<field column="email" name="email"/>
		<field column="mobile" name="mobile"/>
		<field column="workPhone" name="workPhone"/>
		<field column="homePhone" name="homePhone"/>
		<field column="address" name="address"/>
		<field column="postal" name="postal"/>	
		<field column="idNumber" name="idNumber"/>
		 <entity name="workProv" query="select chName,enName from sm_province where id='${person.workProv}'">
			<field column="chName" name="workProvchName"/>
			<field column="enName" name="workProvenName"/>
		 </entity>
		 <entity name="workCity" query="select chName,enName from sm_city where id='${person.workCity}'">
			<field column="chName" name="workCitychName"/>
			<field column="enName" name="workCityenName"/>
		 </entity>
		 <entity name="houseProv" query="select chName,enName from sm_province where id='${person.houseProv}'">
			<field column="chName" name="houseProvchName"/>
			<field column="enName" name="houseProvenName"/>
		 </entity>
		 <entity name="houseCity" query="select chName,enName from sm_city where id='${person.houseCity}'">
			<field column="chName" name="houseCitychName"/>
			<field column="enName" name="houseCityenName"/>
		 </entity>
		  <entity name="origProv" query="select chName,enName from sm_province where id='${person.origProv}'">
			<field column="chName" name="origProvchName"/>
			<field column="enName" name="origProvenName"/>
		 </entity>
		 <entity name="origCity" query="select chName,enName from sm_city where id='${person.origCity}'">
			<field column="chName" name="origCitychName"/>
			<field column="enName" name="origCityenName"/>
		 </entity>
		<field column="experience" name="experience"/>
		<entity name="compId" query="select chFname,enFname from cm_company where id='${person.compId}'">
			<field column="chFname" name="compIdchFname"/>
			<field column="enFname" name="compIdenFname"/>
		 </entity>
		 <entity name="industry" query="select chName,enName from sm_industry where id='${person.industry}'">
			<field column="chName" name="industrychName"/>
			<field column="enName" name="industryenName"/>
		 </entity>
		 <entity name="post" query="select chName,enName from sm_post where id='${person.post}'">
			<field column="chName" name="postchName"/>
			<field column="enName" name="postenName"/>
		 </entity>
		 <entity name="subPost" query="select chName,enName from sm_subpost where id='${person.subPost}'">
			<field column="chName" name="subPostchName"/>
			<field column="enName" name="subPostenName"/>
		 </entity>
		<field column="postRemark" name="postRemark"/>
		<field column="departRemark" name="departRemark"/>
		<field column="salaryMonth" name="salaryMonth"/>
		<field column="salaryOfYear" name="salaryOfYear"/>
		<field column="salaryOfMonth" name="salaryOfMonth"/>
		<field column="bonus" name="bonus"/>
		<field column="allowance" name="allowance"/>
		<field column="stock" name="stock"/>
		<field column="salaryRemark" name="salaryRemark"/>
		<field column="expectLocation" name="expectLocation"/>
		<field column="expectIndustry" name="expectIndustry"/>
		<field column="expectPost" name="expectPost"/>
		<field column="selfAssessment" name="selfAssessment"/>
		<field column="completion" name="completion"/>
		<field column="addressEn" name="addressEn"/>
		<field column="departRemarkEn" name="departRemarkEn"/>
		<field column="postRemarkEn" name="postRemarkEn"/>
		<field column="salaryRemarkEn" name="salaryRemarkEn"/>
		<field column="language" name="language"/>
		<field column="selfAssessmentEn" name="selfAssessmentEn"/>
		<field column="overseaLocation" name="overseaLocation"/>
    </entity> 
  </document> 
</dataConfig>

配置文件E:\SolrHome\solr\person\conf\schema.xml:

<?xml version="1.0" encoding="UTF-8" ?>

<schema name="example" version="1.4">
  
  <types>
   
    <!-- The StrField type is not analyzed, but indexed/stored verbatim. -->
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>

    <!-- boolean type: "true" or "false" -->
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
    <!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
    <fieldtype name="binary" class="solr.BinaryField"/>

    <!--
      Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
    -->
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>

   
    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>

   
    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>

    <!-- A Trie based date field for faster date range queries and date faceting. -->
    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>


    <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
    <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
    <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
    <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>


    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>

    <fieldType name="random" class="solr.RandomSortField" indexed="true" />

    <!-- A text field that only splits on whitespace for exact matching of words -->
    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      </analyzer>
    </fieldType>

    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <!-- in this example, we will only use synonyms at query time
        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
        -->
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>

    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
       
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="stopwords_en.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.LowerCaseFilterFactory"/>
	<filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
	<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
        <filter class="solr.EnglishMinimalStemFilterFactory"/>
	-->
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="stopwords_en.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.LowerCaseFilterFactory"/>
	<filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
	<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
        <filter class="solr.EnglishMinimalStemFilterFactory"/>
	-->
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
    </fieldType>

    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
       
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="stopwords_en.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="stopwords_en.txt"
                enablePositionIncrements="true"
                />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
    </fieldType>

    <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
         but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
    <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <filter class="solr.EnglishMinimalStemFilterFactory"/>
        <!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
             possible with WordDelimiterFilter in conjuncton with stemming. -->
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>

    <!-- Just like text_general except it reverses the characters of
	 each token, to enable more efficient leading wildcard queries. -->
    <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
           maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>

   
    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
      <analyzer>
        <!-- KeywordTokenizer does no actual tokenizing, so the entire
             input string is preserved as a single token
          -->
        <tokenizer class="solr.KeywordTokenizerFactory"/>
        <!-- The LowerCase TokenFilter does what you expect, which can be
             when you want your sorting to be case insensitive
          -->
        <filter class="solr.LowerCaseFilterFactory" />
        <!-- The TrimFilter removes any leading or trailing whitespace -->
        <filter class="solr.TrimFilterFactory" />
       
        <filter class="solr.PatternReplaceFilterFactory"
                pattern="([^a-z])" replacement="" replace="all"
        />
      </analyzer>
    </fieldType>
    
    <fieldtype name="phonetic" stored="false" indexed="true" class="solr.TextField" >
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
      </analyzer>
    </fieldtype>

    <fieldtype name="payloads" stored="false" indexed="true" class="solr.TextField" >
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
       
        <filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
      </analyzer>
    </fieldtype>

    <!-- lowercases the entire field value, keeping it as a single token.  -->
    <fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.KeywordTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory" />
      </analyzer>
    </fieldType>

    <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.PathHierarchyTokenizerFactory"/>
      </analyzer>
    </fieldType>
	
	<fieldType name="textik" class="solr.TextField" >
	   <analyzer class="org.wltea.analyzer.lucene.IKAnalyzer"/>  

	   <analyzer type="index">  
		   <tokenizer class="org.wltea.analyzer.solr.IKTokenizerFactory" isMaxWordLength="false"/>  
		   <filter class="solr.StopFilterFactory"  
				   ignoreCase="true" words="stopwords.txt"/>  
		   <filter class="solr.WordDelimiterFilterFactory"  
				   generateWordParts="1"  
				   generateNumberParts="1"  
				   catenateWords="1"  
				   catenateNumbers="1"  
				   catenateAll="0"  
				   splitOnCaseChange="1"/>  
		   <filter class="solr.LowerCaseFilterFactory"/>  
		   <filter class="solr.EnglishPorterFilterFactory"  
			   protected="protwords.txt"/>  
		   <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>  
	   </analyzer>  
		<analyzer type="query">  
		   <tokenizer class="org.wltea.analyzer.solr.IKTokenizerFactory" isMaxWordLength="false"/>  
		   <filter class="solr.StopFilterFactory"  
				   ignoreCase="true" words="stopwords.txt"/>  
		   <filter class="solr.WordDelimiterFilterFactory"  
				   generateWordParts="1"  
				   generateNumberParts="1"  
				   catenateWords="1"  
				   catenateNumbers="1"  
				   catenateAll="0"  
				   splitOnCaseChange="1"/>  
		   <filter class="solr.LowerCaseFilterFactory"/>  
		   <filter class="solr.EnglishPorterFilterFactory"  
			   protected="protwords.txt"/>  
		   <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>  
	   </analyzer>         
	</fieldType>
	
    <!-- since fields of this type are by default not stored or indexed,
         any data added to them will be ignored outright.  --> 
    <fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />

    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>

    <!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>

   <!--
    A Geohash is a compact representation of a latitude longitude pair in a single field.
    See http://wiki.apache.org/solr/SpatialSearch
   -->
    <fieldtype name="geohash" class="solr.GeoHashField"/>
 </types>


 <fields>

    <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
	 <field name="code" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="chName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="enName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="nickName" type="string" indexed="true" stored="true" multiValued="false" />
	 <field name="birthdate" type="date" indexed="true" stored="true" multiValued="false" /> 
	 <field name="height" type="tfloat" indexed="true" stored="true" multiValued="false" /> 
	 <field name="email" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="mobile" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="workPhone" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="homePhone" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="address" type="string" indexed="true" stored="true" multiValued="false" />
	<field name="postal" type="string" indexed="true" stored="true" multiValued="false"/>
	 <field name="idNumber" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="workProvchName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="workProvenName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="workCitychName" type="string" indexed="true" stored="true" multiValued="false" />
	 <field name="workCityenName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="houseProvchName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="houseProvenName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="houseCitychName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="houseCityenName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="origProvchName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="origProvenName" type="string" indexed="true" stored="true" multiValued="false" /> 
	<field name="origCitychName" type="string" indexed="true" stored="true" multiValued="false"/>
	 <field name="origCityenName" type="string" indexed="true" stored="true" multiValued="false" />
	<field name="experience" type="string" indexed="true" stored="true" multiValued="false" /> 	 
	 <field name="compIdchFname" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="compIdenFname" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="industrychName" type="string" indexed="true" stored="true" multiValued="false" />
	 <field name="industryenName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="postchName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="postenName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="subPostchName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="subPostenName" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="postRemark" type="string" indexed="true" stored="true" multiValued="false" />
	<field name="departRemark" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="salaryMonth" type="string" indexed="true" stored="true" multiValued="false" /> 
	<field name="salaryOfYear" type="string" indexed="true" stored="true" multiValued="false"/>
	 <field name="salaryOfMonth" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="bonus" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="allowance" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="stock" type="string" indexed="true" stored="true" multiValued="false" />
	 <field name="salaryRemark" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="expectLocation" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="expectIndustry" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="expectPost" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="selfAssessment" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="completion" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="addressEn" type="string" indexed="true" stored="true" multiValued="false" /> 
	<field name="departRemarkEn" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="postRemarkEn" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="salaryRemarkEn" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="language" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="selfAssessmentEn" type="string" indexed="true" stored="true" multiValued="false" /> 
	 <field name="overseaLocation" type="string" indexed="true" stored="true" multiValued="false" /> 	 

	<field name="persondetail" type="textik" indexed="true" stored="false" multiValued="true" />
	<copyField source="code" dest="persondetail" />
	<copyField source="chName" dest="persondetail" />
	<copyField source="enName" dest="persondetail" />
	<copyField source="nickName" dest="persondetail" />
	<copyField source="birthdate" dest="persondetail" />
	<copyField source="height" dest="persondetail" />
	<copyField source="email" dest="persondetail" />
	<copyField source="mobile" dest="persondetail" />
	<copyField source="workPhone" dest="persondetail" />
	<copyField source="homePhone" dest="persondetail" />
	<copyField source="address" dest="persondetail" />
	<copyField source="postal" dest="persondetail" />
	<copyField source="idNumber" dest="persondetail" />
	<copyField source="workProvchName" dest="persondetail" />
	<copyField source="workProvenName" dest="persondetail" />
	<copyField source="workCitychName" dest="persondetail" />
	<copyField source="workCityenName" dest="persondetail" />
	<copyField source="houseProvchName" dest="persondetail" />
	<copyField source="houseProvenName" dest="persondetail" />
	<copyField source="houseCitychName" dest="persondetail" />
	<copyField source="houseCityenName" dest="persondetail" />
	<copyField source="origProvchName" dest="persondetail" />
	<copyField source="origProvenName" dest="persondetail" />
	<copyField source="origCitychName" dest="persondetail" />
	<copyField source="origCityenName" dest="persondetail" />
	<copyField source="experience" dest="persondetail" />
	<copyField source="compIdchFname" dest="persondetail" />
	<copyField source="compIdenFname" dest="persondetail" />
	<copyField source="industrychName" dest="persondetail" />
	<copyField source="industryenName" dest="persondetail" />
	<copyField source="postchName" dest="persondetail" />
	<copyField source="postenName" dest="persondetail" />
	<copyField source="subPostchName" dest="persondetail" />
	<copyField source="subPostenName" dest="persondetail" />
	<copyField source="postRemark" dest="persondetail" />
	<copyField source="departRemark" dest="persondetail" />
	<copyField source="salaryMonth" dest="persondetail" />
	<copyField source="salaryOfYear" dest="persondetail" />
	<copyField source="salaryOfMonth" dest="persondetail" />
	<copyField source="bonus" dest="persondetail" />
	<copyField source="allowance" dest="persondetail" />
	<copyField source="stock" dest="persondetail" />
	<copyField source="salaryRemark" dest="persondetail" />
	<copyField source="expectLocation" dest="persondetail" />
	<copyField source="expectIndustry" dest="persondetail" />
	<copyField source="expectPost" dest="persondetail" />
	<copyField source="selfAssessment" dest="persondetail" />
	<copyField source="completion" dest="persondetail" />
	<copyField source="addressEn" dest="persondetail" />
	<copyField source="departRemarkEn" dest="persondetail" />
	<copyField source="postRemarkEn" dest="persondetail" />
	<copyField source="salaryRemarkEn" dest="persondetail" />
	<copyField source="language" dest="persondetail" />
	<copyField source="selfAssessmentEn" dest="persondetail" />
	<copyField source="overseaLocation" dest="persondetail" />
	
 </fields>
 
 <uniqueKey>id</uniqueKey>

 <!-- field for the QueryParser to use when an explicit fieldname is absent -->
 <defaultSearchField>persondetail</defaultSearchField>

 <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
 <solrQueryParser defaultOperator="OR"/>
</schema>

配置文件E:\SolrHome\solr\person\conf\solrconfig.xml:

<?xml version="1.0" encoding="UTF-8" ?>
<config>
  
  <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>
  
  <luceneMatchVersion>LUCENE_33</luceneMatchVersion>

  <lib dir="../../contrib/extraction/lib" />
  
  <lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />
  <lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" />
  <lib dir="../../dist/" regex="apache-solr-dataimporthandler-\d.*\.jar" />

  <!-- If a dir option (with or without a regex) is used and nothing
       is found that matches, it will be ignored
	   <dataDir>${solr.data.dir:solr/home/person/data}</dataDir>
    -->
  <lib dir="../../contrib/clustering/lib/" />
  <lib dir="/total/crap/dir/ignored" /> 
  
  <dataDir>E:/SolrHome/solr/person/data</dataDir>

  <directoryFactory name="DirectoryFactory" 
                    class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/>


  <indexDefaults>

    <useCompoundFile>false</useCompoundFile>

    <mergeFactor>10</mergeFactor>
    
    <ramBufferSizeMB>32</ramBufferSizeMB>
   
    <maxFieldLength>10000</maxFieldLength>
    <writeLockTimeout>1000</writeLockTimeout>
    <commitLockTimeout>10000</commitLockTimeout>

    <lockType>native</lockType>

  </indexDefaults>

 
  <mainIndex>

    <useCompoundFile>false</useCompoundFile>
    <ramBufferSizeMB>32</ramBufferSizeMB>
    <mergeFactor>10</mergeFactor>

    <unlockOnStartup>false</unlockOnStartup>
    
    <!-- If true, IndexReaders will be reopened (often more efficient)
         instead of closed and then opened.
      -->
    <reopenReaders>true</reopenReaders>

    <deletionPolicy class="solr.SolrDeletionPolicy">
      <!-- The number of commit points to be kept -->
      <str name="maxCommitsToKeep">1</str>
      <!-- The number of optimized commit points to be kept -->
      <str name="maxOptimizedCommitsToKeep">0</str>
     
    </deletionPolicy>

     <infoStream file="INFOSTREAM.txt">false</infoStream> 

  </mainIndex>

 
  <jmx />
 
  <updateHandler class="solr.DirectUpdateHandler2"></updateHandler>
  
   <query>
   
    <maxBooleanClauses>1024</maxBooleanClauses>


    <filterCache class="solr.FastLRUCache"
                 size="512"
                 initialSize="512"
                 autowarmCount="0"/>

    <queryResultCache class="solr.LRUCache"
                     size="512"
                     initialSize="512"
                     autowarmCount="0"/>
   
   
    <documentCache class="solr.LRUCache"
                   size="512"
                   initialSize="512"
                   autowarmCount="0"/>
  
   
    <enableLazyFieldLoading>true</enableLazyFieldLoading>

  
   <queryResultWindowSize>20</queryResultWindowSize>

   <!-- Maximum number of documents to cache for any entry in the
        queryResultCache. 
     -->
   <queryResultMaxDocsCached>200</queryResultMaxDocsCached>

  
    <!-- QuerySenderListener takes an array of NamedList and executes a
         local query request for each NamedList in sequence. 
      -->
    <listener event="newSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
        <!--
           <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
           <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
          -->
      </arr>
    </listener>
    <listener event="firstSearcher" class="solr.QuerySenderListener">
      <arr name="queries">
        <lst>
          <str name="q">static firstSearcher warming in solrconfig.xml</str>
        </lst>
      </arr>
    </listener>

   
    <useColdSearcher>false</useColdSearcher>

   
    <maxWarmingSearchers>2</maxWarmingSearchers>

  </query>


  <requestDispatcher handleSelect="true" >
   
    <requestParsers enableRemoteStreaming="true" 
                    multipartUploadLimitInKB="2048000" />

    <httpCaching never304="true" />
   
  </requestDispatcher>

  <requestHandler name="search" class="solr.SearchHandler" default="true">
    <!-- default values for query parameters can be specified, these
         will be overridden by parameters in the request
      -->
     <lst name="defaults">
       <str name="echoParams">explicit</str>
       <int name="rows">10</int>
     </lst>
   
   
    </requestHandler>
	
	<requestHandler name="/browse" class="solr.SearchHandler">
     <lst name="defaults">
       <str name="echoParams">explicit</str>

       <!-- VelocityResponseWriter settings -->
       <str name="wt">velocity</str>

       <str name="v.template">browse</str>
       <str name="v.layout">layout</str>
       <str name="title">Solritas</str>

       <str name="defType">edismax</str>
       <str name="q.alt">*:*</str>
       <str name="rows">10</str>
       <str name="fl">*,score</str>
       <str name="mlt.qf">
         text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
       </str>
       <str name="mlt.fl">text,features,name,sku,id,manu,cat</str>
       <int name="mlt.count">3</int>

       <str name="qf">
          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
       </str>

       <str name="facet">on</str>
       <str name="facet.field">cat</str>
       <str name="facet.field">manu_exact</str>
       <str name="facet.query">ipod</str>
       <str name="facet.query">GB</str>
       <str name="facet.mincount">1</str>
       <str name="facet.pivot">cat,inStock</str>
       <str name="facet.range">price</str>
       <int name="f.price.facet.range.start">0</int>
       <int name="f.price.facet.range.end">600</int>
       <int name="f.price.facet.range.gap">50</int>
       <str name="f.price.facet.range.other">after</str>
       <str name="facet.range">manufacturedate_dt</str>
       <str name="f.manufacturedate_dt.facet.range.start">NOW/YEAR-10YEARS</str>
       <str name="f.manufacturedate_dt.facet.range.end">NOW</str>
       <str name="f.manufacturedate_dt.facet.range.gap">+1YEAR</str>
       <str name="f.manufacturedate_dt.facet.range.other">before</str>
       <str name="f.manufacturedate_dt.facet.range.other">after</str>


       <!-- Highlighting defaults -->
       <str name="hl">on</str>
       <str name="hl.fl">text features name</str>
       <str name="f.name.hl.fragsize">0</str>
       <str name="f.name.hl.alternateField">name</str>
     </lst>
     <arr name="last-components">
       <str>spellcheck</str>
     </arr>
     <!--
     <str name="url-scheme">httpx</str>
     -->
  </requestHandler>

  <requestHandler name="/update" 
                  class="solr.XmlUpdateRequestHandler">
   
    </requestHandler>
  <!-- Binary Update Request Handler
       http://wiki.apache.org/solr/javabin
    -->
  <requestHandler name="/update/javabin" 
                  class="solr.BinaryUpdateRequestHandler" />

  <!-- CSV Update Request Handler
       http://wiki.apache.org/solr/UpdateCSV
    -->
  <requestHandler name="/update/csv" 
                  class="solr.CSVRequestHandler" 
                  startup="lazy" />

  <!-- JSON Update Request Handler
       http://wiki.apache.org/solr/UpdateJSON
    -->
  <requestHandler name="/update/json" 
                  class="solr.JsonUpdateRequestHandler" 
                  startup="lazy" />

  <!-- Solr Cell Update Request Handler

       http://wiki.apache.org/solr/ExtractingRequestHandler 

    -->
  <requestHandler name="/update/extract" 
                  startup="lazy"
                  class="solr.extraction.ExtractingRequestHandler" >
    <lst name="defaults">
      <!-- All the main content goes into "text"... if you need to return
           the extracted text or do highlighting, use a stored field. -->
      <str name="fmap.content">text</str>
      <str name="lowernames">true</str>
      <str name="uprefix">ignored_</str>

      <!-- capture link hrefs but ignore div attributes -->
      <str name="captureAttr">true</str>
      <str name="fmap.a">links</str>
      <str name="fmap.div">ignored_</str>
    </lst>
  </requestHandler>

  <requestHandler name="/analysis/field" 
                  startup="lazy"
                  class="solr.FieldAnalysisRequestHandler" />


  <requestHandler name="/analysis/document" 
                  class="solr.DocumentAnalysisRequestHandler" 
                  startup="lazy" />

  <!-- Admin Handlers

       Admin Handlers - This will register all the standard admin
       RequestHandlers.  
    -->
  <requestHandler name="/admin/" 
                  class="solr.admin.AdminHandlers" />
 
  <!-- ping/healthcheck -->
  <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
    <lst name="defaults">
      <str name="qt">search</str>
      <str name="q">solrpingquery</str>
      <str name="echoParams">all</str>
    </lst>
  </requestHandler>

  <!-- Echo the request contents back to the client -->
  <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
    <lst name="defaults">
     <str name="echoParams">explicit</str> 
     <str name="echoHandler">true</str>
    </lst>
  </requestHandler>
  
  
  <searchComponent name="spellcheck" class="solr.SpellCheckComponent">

    <str name="queryAnalyzerFieldType">textSpell</str>

    <!-- Multiple "Spell Checkers" can be declared and used by this
         component
      -->

    <!-- a spellchecker built from a field of the main index, and
         written to disk
      -->
    <lst name="spellchecker">
      <str name="name">default</str>
      <str name="field">name</str>
      <str name="spellcheckIndexDir">spellchecker</str>
      <!-- uncomment this to require terms to occur in 1% of the documents in order to be included in the dictionary
      	<float name="thresholdTokenFrequency">.01</float>
      -->
    </lst>

   
  </searchComponent>

 
  <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="spellcheck.onlyMorePopular">false</str>
      <str name="spellcheck.extendedResults">false</str>
      <str name="spellcheck.count">1</str>
    </lst>
    <arr name="last-components">
      <str>spellcheck</str>
    </arr>
  </requestHandler>

  <!-- Term Vector Component

       http://wiki.apache.org/solr/TermVectorComponent
    -->
  <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>

  <requestHandler name="tvrh" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <bool name="tv">true</bool>
    </lst>
    <arr name="last-components">
      <str>tvComponent</str>
    </arr>
  </requestHandler>

 
  <searchComponent name="clustering" 
                   enable="${solr.clustering.enabled:false}"
                   class="solr.clustering.ClusteringComponent" >
    <!-- Declare an engine -->
    <lst name="engine">
      <!-- The name, only one can be named "default" -->
      <str name="name">default</str>

      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>

      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
     
      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>

      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
    </lst>
    <lst name="engine">
      <str name="name">stc</str>
      <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
    </lst>
  </searchComponent>

  <requestHandler name="/clustering"
                  startup="lazy"
                  enable="${solr.clustering.enabled:false}"
                  class="solr.SearchHandler">
    <lst name="defaults">
      <bool name="clustering">true</bool>
      <str name="clustering.engine">default</str>
      <bool name="clustering.results">true</bool>
      <!-- The title field -->
      <str name="carrot.title">name</str>
      <str name="carrot.url">id</str>
      <!-- The field to cluster on -->
       <str name="carrot.snippet">features</str>
       <!-- produce summaries -->
       <bool name="carrot.produceSummary">true</bool>
       <!-- the maximum number of labels per cluster -->
       <!--<int name="carrot.numDescriptions">5</int>-->
       <!-- produce sub clusters -->
       <bool name="carrot.outputSubClusters">false</bool>
       
       <str name="defType">edismax</str>
       <str name="qf">
          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
       </str>
       <str name="q.alt">*:*</str>
       <str name="rows">10</str>
       <str name="fl">*,score</str>
    </lst>     
    <arr name="last-components">
      <str>clustering</str>
    </arr>
  </requestHandler>
  
  <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">　
	　　<lst name="defaults">　
	　　<str name="config">E:/SolrHome/solr/person/conf/data-config.xml</str> 　
	　　</lst>　
	</requestHandler>
  
 
  <searchComponent name="terms" class="solr.TermsComponent"/>

  <!-- A request handler for demonstrating the terms component -->
  <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
     <lst name="defaults">
      <bool name="terms">true</bool>
    </lst>     
    <arr name="components">
      <str>terms</str>
    </arr>
  </requestHandler>

  <searchComponent name="elevator" class="solr.QueryElevationComponent" >
    <!-- pick a fieldType to analyze queries -->
    <str name="queryFieldType">string</str>
    <str name="config-file">elevate.xml</str>
  </searchComponent>

  <!-- A request handler for demonstrating the elevator component -->
  <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
    <lst name="defaults">
      <str name="echoParams">explicit</str>
    </lst>
    <arr name="last-components">
      <str>elevator</str>
    </arr>
  </requestHandler>

  <!-- Highlighting Component

       http://wiki.apache.org/solr/HighlightingParameters
    -->
  <searchComponent class="solr.HighlightComponent" name="highlight">
    <highlighting>
      <!-- Configure the standard fragmenter -->
      <!-- This could most likely be commented out in the "default" case -->
      <fragmenter name="gap" 
                  default="true"
                  class="solr.highlight.GapFragmenter">
        <lst name="defaults">
          <int name="hl.fragsize">100</int>
        </lst>
      </fragmenter>

      <!-- A regular-expression-based fragmenter 
           (for sentence extraction) 
        -->
      <fragmenter name="regex" 
                  class="solr.highlight.RegexFragmenter">
        <lst name="defaults">
          <!-- slightly smaller fragsizes work better because of slop -->
          <int name="hl.fragsize">70</int>
          <!-- allow 50% slop on fragment sizes -->
          <float name="hl.regex.slop">0.5</float>
          <!-- a basic sentence pattern -->
          <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str>
        </lst>
      </fragmenter>

      <!-- Configure the standard formatter -->
      <formatter name="html" 
                 default="true"
                 class="solr.highlight.HtmlFormatter">
        <lst name="defaults">
          <str name="hl.simple.pre"><![CDATA[<em>]]></str>
          <str name="hl.simple.post"><![CDATA[</em>]]></str>
        </lst>
      </formatter>

      <!-- Configure the standard encoder -->
      <encoder name="html" 
               class="solr.highlight.HtmlEncoder" />

      <!-- Configure the standard fragListBuilder -->
      <fragListBuilder name="simple" 
                       default="true"
                       class="solr.highlight.SimpleFragListBuilder"/>

      <!-- Configure the single fragListBuilder -->
      <fragListBuilder name="single" 
                       class="solr.highlight.SingleFragListBuilder"/>

      <!-- default tag FragmentsBuilder -->
      <fragmentsBuilder name="default" 
                        default="true"
                        class="solr.highlight.ScoreOrderFragmentsBuilder">
        <!-- 
        <lst name="defaults">
          <str name="hl.multiValuedSeparatorChar">/</str>
        </lst>
        -->
      </fragmentsBuilder>

      <!-- multi-colored tag FragmentsBuilder -->
      <fragmentsBuilder name="colored" 
                        class="solr.highlight.ScoreOrderFragmentsBuilder">
        <lst name="defaults">
          <str name="hl.tag.pre"><![CDATA[
               <b style="background:yellow">,<b style="background:lawgreen">,
               <b style="background:aquamarine">,<b style="background:magenta">,
               <b style="background:palegreen">,<b style="background:coral">,
               <b style="background:wheat">,<b style="background:khaki">,
               <b style="background:lime">,<b style="background:deepskyblue">]]></str>
          <str name="hl.tag.post"><![CDATA[</b>]]></str>
        </lst>
      </fragmentsBuilder>
    </highlighting>
  </searchComponent>

  <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
    <int name="xsltCacheLifetimeSeconds">5</int>
  </queryResponseWriter>


  <admin>
    <defaultQuery>solr</defaultQuery>
  </admin>

</config>

使用到的jar包，路径：E:\apache-tomcat-6.0.29\webapps\solr\WEB-INF\lib