elasticsearch服务器 上创建索引库

1 构建工程 

2 public class HtmlBean {

        private int id;

        private String title;

        private String content;

        private String url;

        public int getId() {

            return id;

        }

        public void setId(int id) {

            this.id = id;

        }

        public String getTitle() {

            return title;

        }

        public void setTitle(String title) {

            this.title = title;

        }

        public String getContent() {

            return content;

        }

        public void setContent(String content) {

            this.content = content;

        }

         public String getUrl() {

            return url;

        }

            public void setUrl(String url) {

            this.url = url;

        }

}

package com.sxt.es;

import java.util.ArrayList;

import java.util.List;

public class PageBean<T> {

    private int size = 10;//每页显示记录

    private int index = 1;// 当前页号

    private int totalCount = 0;// 记录总数

    private int totalPageCount = 1;// 总页

    private int[] numbers;//展示页数集合

    protected List<T> list;//要显示到页面的数据集

    /**

    * 得到

    * @return

    */

    public int getStartRow() {

        return (index - 1) * size;

    }

    /**

    * 得到结束记录

    * @return

    */

    public int getEndRow() {

        return index * size;

    }

    /**

    * @return Returns the size.

    */

    public int getSize() {

        return size;

    }

    /**

    * @param size

    * The size to set.

    */

    public void setSize(int size) {

        if (size > 0) {

            this.size = size;

        }

    }

    /**

    * @return Returns the currentPageNo.

    */

    public int getIndex() {

        if (totalPageCount == 0) {

            return 0;

        }

        return index;

    }

    /**

    * @param currentPageNo

    * The currentPageNo to set.

    */

    public void setIndex(int index) {

        if (index > 0) {

            this.index = index;

        }

    }

    /**

    * @return Returns the totalCount.

    */

    public int getTotalCount() {

        return totalCount;

    }

    /**

    * @param totalCount

    *  The totalCount to set.

    */

    public void setTotalCount(int totalCount) {

        if (totalCount >= 0) {

            this.totalCount = totalCount;

            setTotalPageCountByRs();//根据总记录数计算总页

        }

    }

    public int getTotalPageCount() {

    return this.totalPageCount;

    }

    /**

    * 根据总记录数计算总页

    */

    private void setTotalPageCountByRs() {

        if (this.size > 0 && this.totalCount > 0 && this.totalCount % this.size == 0) {

        this.totalPageCount = this.totalCount / this.size;

        } else if (this.size > 0 && this.totalCount > 0 && this.totalCount % this.size > 0) {

        this.totalPageCount = (this.totalCount / this.size) + 1;

        } else {

        this.totalPageCount = 0;

    }

    setNumbers(totalPageCount);//获取展示页数集合

}

    public int[] getNumbers() {

        return numbers;

    }

    /**

    * 设置显示页数集合

    * @param totalPageCount

    */

public void setNumbers(int totalPageCount) {

    if(totalPageCount>0){

    //!.当前数组的长度

    int[] numbers = new int[totalPageCount>10?10:totalPageCount];//页面要显示的页数集合

    int k =0;

    //

    //1.数组长度<10  1 2 3 4 ....  7

    //2.数组长度>=10

    //    当前页<=6  1 2 3 4    10

    //    当前页>=总页数-5          ......12 13 14 15 

    //    其他                                5  6  7 8  当前页(10)  10  11 12  13

    for(int i = 0;i < totalPageCount;i++){

    //保证当前页为集合的中�?

    if((i>=index- (numbers.length/2+1) || i >= totalPageCount-numbers.length) &&     k<numbers.length){

    numbers[k] = i+1;

    k++;

    }else if(k>=numbers.length){

    break;

    }

    }

    this.numbers = numbers;

    }

    }

    public void setNumbers(int[] numbers) {

    this.numbers = numbers;

    }

        public List<T> getList() {

        return list;

    }

    public void setList(List<T> list) {

    this.list = list;

    }

    public void setBean(T bean){

        if(this.list==null){

        list =new ArrayList<T>();

    }

    list.add(bean);

    }

    /*

    public static int getTotalPageCount(int iTotalRecordCount, int iPageSize) {

        if (iPageSize == 0) {

        return 0;

    } else {

        return (iTotalRecordCount % iPageSize) == 0 ? (iTotalRecordCount / iPageSize) :         (iTotalRecordCount / iPageSize) + 1;

    }

    }*/

}

 

public class IndexService {

//存放html文件的目录

public static String DATA_DIR="d:\\data\\";

private static Client client;

static {

Settings settings = Settings.settingsBuilder()

.put("cluster.name", "bjsxt-es").build();

try {

client = TransportClient

.builder()

.settings(settings)

.build()

.addTransportAddress(

new InetSocketTransportAddress(InetAddress

.getByName("node01"), 9300))

.addTransportAddress(

new InetSocketTransportAddress(InetAddress

.getByName("node02"), 9300))

.addTransportAddress(

new InetSocketTransportAddress(InetAddress

.getByName("node03"), 9300));

} catch (Exception e) {

e.printStackTrace();

}

}

/**

* 创建索引库

* @throws Exception

*/

public void createIndex() throws Exception {

IndicesExistsResponse resp = client.admin().indices().prepareExists("bjsxt")

.execute().actionGet();

if (resp.isExists()) {

client.admin().indices().prepareExists("bjsxt").execute().actionGet();

}

client.admin().indices().prepareExists("bjsxt").execute().actionGet();

new XContentFactory();

XContentBuilder builder = XContentFactory.jsonBuilder().startObject()

.startObject("htmlbean").startObject("properties")

.startObject("title").field("type", "string")

.field("store", "yes").field("analyzer", "ik_max_word")

.field("search_analyzer", "ik_max_word").endObject()

.startObject("content").field("type", "string")

.field("store", "yes").field("analyzer", "ik_max_word")

.field("search_analyzer", "ik_max_word").endObject()

// .startObject("url").field("type", "string")

// .field("store", "yes").field("analyzer", "ik_max_word")

// .field("search_analyzer", "ik_max_word").endObject()

.endObject().endObject().endObject();

PutMappingRequest mapping = Requests.putMappingRequest("bjsxt").type("htmlbean").source(builder);

client.admin().indices().putMapping(mapping).actionGet();

}

/**

* 将html文件添加到索引库

*/

@Test

public void addHtmlToES(){

readHtml(new File(DATA_DIR));

}

/**

* 遍历数据文件目录d:/data ,递归方法

* @param file

*/

public void readHtml(File file){

if (file.isDirectory()){

File[] fs = file.listFiles();

for (int i = 0; i < fs.length; i++) {

File f = fs[i];

readHtml(f);

}

} else {

HtmlBean bean;

try {

bean = HtmlTool.parserHtml(file.getPath());

if(bean!=null){

Map<String, String> dataMap =new HashMap<String, String>();

dataMap.put("title", bean.getTitle());

dataMap.put("content", bean.getContent());

dataMap.put("url", bean.getUrl());

//写索引

client.prepareIndex("bjsxt", "htmlbean").setSource(dataMap).execute().actionGet();

}

} catch(Throwable e) {

e.printStackTrace();

}

}

}

}

package com.sxt.util;

import java.io.File;

import com.sxt.es.HtmlBean;

import com.sxt.es.IndexService;

import net.htmlparser.jericho.CharacterReference;

import net.htmlparser.jericho.Element;

import net.htmlparser.jericho.HTMLElementName;

import net.htmlparser.jericho.Source;

public class HtmlTool {

/**

*

* @param path html 文件路径

*/

public static HtmlBean parserHtml(String path)throws Throwable{

HtmlBean bean = new HtmlBean();

Source source=new Source(new File(path));

// Call fullSequentialParse manually as most of the source will be parsed.

source.fullSequentialParse();

Element titleElement=source.getFirstElement(HTMLElementName.TITLE);

if(titleElement==null){

return null;

}else{

String title=CharacterReference.decodeCollapseWhiteSpace(titleElement.getContent());

bean.setTitle(title);

}

String content =source.getTextExtractor().setIncludeAttributes(true).toString();

String url =path.substring(IndexService.DATA_DIR.length());

bean.setContent(content);

bean.setUrl(url);

return bean;

  }

public static void main(String[] args) {

try {

System.out.println(parserHtml("d:\\data\\news.cctv.com\\2019\\01\\02\\ARTIr9zKNrYdpwG0N8d1fkpC190102.shtml").getContent());

} catch (Throwable e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

}

}

junit 测试如下:

访问es服务  http://node03:9200/_plugin/head/

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值