ES笔记_通过java代码往ElasticSearch批量造数

ES中数据量的大小会影响性能结果,为了模拟真实的环境,需要保证ES中的数据量。

之前写了一篇通过hive往ES中造数https://blog.csdn.net/yiqin3399/article/details/99638411,但是如果hive中没有底层数据,大量插数就不方便了,这里提供另外一种方法,就是通过java代码批量插数。

一、首先要知道ES的index/type,以及各字段的映射

curl -X GET 'http://ip:9200/courier_growth_system_result/_mapping/courier_growth_system_result?pretty'

二、IDEA中写一个javabean,CourierGrowthBean.java,与ES中的字段对应

import com.alibaba.fastjson.annotation.JSONField;
import lombok.Data;

/**
 * @author yiqin
 * @date 2019/10/16
 */
 
@Data
public class CourierGrowthBean {
    long badReviewCount;
    long calTime;
    long cityId;
    long competitionId;
    long courierId;
    long deliveriesCount;
    long grade;
    long growValue;
    @JSONField(name="isProtect")
    boolean protect;
    long protectGrade;
    long refuseCount;
    long rewardGrowValue;
    long workDayCount;
}

三、IDEA中写一个ES通用类,ElasticSearchUtil.java

import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;

import java.net.InetAddress;
import java.net.UnknownHostException;

/**
 * @author yiqin
 * @date 2019/10/16
 */
public class ElasticSearchUtil {

    public static TransportClient getTransportClient(String hostPorts) throws UnknownHostException {
        Settings settings = Settings.builder()
                .put("client.transport.ignore_cluster_name", true)
                .put("client.transport.sniff", true).build();

        PreBuiltTransportClient preBuiltTransportClient = new PreBuiltTransportClient(settings);

        String[] hostAndPorts = hostPorts.split(",", -1);
        for (String hostAndPort : hostAndPorts) {
            String[] split = hostAndPort.split(":", -1);
            preBuiltTransportClient.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(split[0]), Integer.parseInt(split[1])));
        }

        return preBuiltTransportClient;
    }

    public static void close(TransportClient transportClient) {
        if (null != transportClient) {
            transportClient.close();
        }
    }
}

四、MockData.java,往ES中批量插数。这里遇到一个坑,之前性能不好,是由于只创建一个bulk,见代码注释

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.serializer.SerializerFeature;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.xcontent.XContentType;

import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.UnknownHostException;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Random;

/**
 * @author yiqin
 * @date 2019/10/16
 */
public class MockData {

    private static TransportClient transportClient;
    private static BulkRequestBuilder bulkRequest;
    private static String uuid;
    static FileWriter fw;


    static{
        String hostPort = "es-ip1:9300,es-ip2:9300,es-ip3:9300,es-ip4:9300";
        try {
            transportClient = ElasticSearchUtil.getTransportClient(hostPort);
        } catch (UnknownHostException e) {
            e.printStackTrace();
        }

        try {
            fw = new FileWriter("/Users/yiqin/Desktop/courierGrowthId.txt",true);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static String getCourierGrowthBean(){
        Random random = new Random();
        CourierGrowthBean courierGrowthBean = new CourierGrowthBean();
        courierGrowthBean.setBadReviewCount(random.nextInt(100));
        courierGrowthBean.setCalTime(random.nextInt(20000000)+1570723200000l);
        courierGrowthBean.setCityId(1101);
        courierGrowthBean.setCompetitionId(26);
        long courierId = getCourierId();
        uuid = String.valueOf(courierId);
        writeToTxt(uuid);
        courierGrowthBean.setCourierId(courierId);
        courierGrowthBean.setDeliveriesCount(random.nextInt(1000)+1);
        courierGrowthBean.setGrade(random.nextInt(8)+1);
        courierGrowthBean.setProtect(true);
        courierGrowthBean.setProtectGrade(6);
        courierGrowthBean.setRefuseCount(random.nextInt(100));
        courierGrowthBean.setGrowValue(random.nextInt(90000));
        courierGrowthBean.setWorkDayCount(random.nextInt(200));
        return JSON.toJSONString(courierGrowthBean, SerializerFeature.WriteMapNullValue);

    }

    /**
     * 生成唯一的闪送员id
     */
    public static Long getCourierId() {
        SimpleDateFormat df = new SimpleDateFormat("HHmmssSSS");
        String date = df.format(new Date());

        NumberFormat f = new DecimalFormat("00000");
        Random r = new Random();
        String number = f.format(r.nextInt(10000));

        Long orderNumber = Long.parseLong(date + number);
        return orderNumber;
    }


    public static void main(String[] args) {
        String index = "courier_growth_system_result";
        String type = "courier_growth_system_result";

        bulkRequest = transportClient.prepareBulk();

        long a = System.currentTimeMillis(); //开始时间

        int num = 0;
        for(int i=0; i< 20000;i++) {//总共往ES里放的数据量,2万条

            bulkRequest.add(transportClient.prepareIndex(index, type)
                    .setSource(getCourierGrowthBean(), XContentType.JSON)
                    .setId(uuid));
            num ++;
            if (num >= 1000) {//1次1000条
                if (bulkRequest.numberOfActions() > 0) {
                    bulkRequest.execute().actionGet();
                }
                num = 0;
                //批量执行后要重新创新一个bulk,不然会一次一次重新索引相同的第一次数据,之前没写这个耗时95s,现在耗时7s
                bulkRequest = transportClient.prepareBulk();
            }

        }

        if (bulkRequest.numberOfActions() > 0) {
            bulkRequest.execute().actionGet();
        }


        long b = System.currentTimeMillis();
        System.out.println(b-a);//打印总共花费时间

        if(fw!=null){
            try {
                fw.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    static int numTemp =0;//一行写十个参数,用于jmeter参数
    public static void writeToTxt(String courierId){
        PrintWriter pw = new PrintWriter(fw);
        if(numTemp<10){
            pw.print(courierId);
            pw.print(",");
            numTemp++;
        }else if(numTemp==10){
            numTemp=0;
            pw.println();
        }
        pw.flush();
    }

}

五、ES的依赖

<dependency>
    <groupId>org.elasticsearch.client</groupId>
    <artifactId>transport</artifactId>
    <version>5.5.2</version>
</dependency>

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值