ES中数据量的大小会影响性能结果,为了模拟真实的环境,需要保证ES中的数据量。
之前写了一篇通过hive往ES中造数https://blog.csdn.net/yiqin3399/article/details/99638411,但是如果hive中没有底层数据,大量插数就不方便了,这里提供另外一种方法,就是通过java代码批量插数。
一、首先要知道ES的index/type,以及各字段的映射
curl -X GET 'http://ip:9200/courier_growth_system_result/_mapping/courier_growth_system_result?pretty'
二、IDEA中写一个javabean,CourierGrowthBean.java,与ES中的字段对应
import com.alibaba.fastjson.annotation.JSONField;
import lombok.Data;
/**
* @author yiqin
* @date 2019/10/16
*/
@Data
public class CourierGrowthBean {
long badReviewCount;
long calTime;
long cityId;
long competitionId;
long courierId;
long deliveriesCount;
long grade;
long growValue;
@JSONField(name="isProtect")
boolean protect;
long protectGrade;
long refuseCount;
long rewardGrowValue;
long workDayCount;
}
三、IDEA中写一个ES通用类,ElasticSearchUtil.java
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import java.net.InetAddress;
import java.net.UnknownHostException;
/**
* @author yiqin
* @date 2019/10/16
*/
public class ElasticSearchUtil {
public static TransportClient getTransportClient(String hostPorts) throws UnknownHostException {
Settings settings = Settings.builder()
.put("client.transport.ignore_cluster_name", true)
.put("client.transport.sniff", true).build();
PreBuiltTransportClient preBuiltTransportClient = new PreBuiltTransportClient(settings);
String[] hostAndPorts = hostPorts.split(",", -1);
for (String hostAndPort : hostAndPorts) {
String[] split = hostAndPort.split(":", -1);
preBuiltTransportClient.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(split[0]), Integer.parseInt(split[1])));
}
return preBuiltTransportClient;
}
public static void close(TransportClient transportClient) {
if (null != transportClient) {
transportClient.close();
}
}
}
四、MockData.java,往ES中批量插数。这里遇到一个坑,之前性能不好,是由于只创建一个bulk,见代码注释
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.serializer.SerializerFeature;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.xcontent.XContentType;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.UnknownHostException;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Random;
/**
* @author yiqin
* @date 2019/10/16
*/
public class MockData {
private static TransportClient transportClient;
private static BulkRequestBuilder bulkRequest;
private static String uuid;
static FileWriter fw;
static{
String hostPort = "es-ip1:9300,es-ip2:9300,es-ip3:9300,es-ip4:9300";
try {
transportClient = ElasticSearchUtil.getTransportClient(hostPort);
} catch (UnknownHostException e) {
e.printStackTrace();
}
try {
fw = new FileWriter("/Users/yiqin/Desktop/courierGrowthId.txt",true);
} catch (IOException e) {
e.printStackTrace();
}
}
private static String getCourierGrowthBean(){
Random random = new Random();
CourierGrowthBean courierGrowthBean = new CourierGrowthBean();
courierGrowthBean.setBadReviewCount(random.nextInt(100));
courierGrowthBean.setCalTime(random.nextInt(20000000)+1570723200000l);
courierGrowthBean.setCityId(1101);
courierGrowthBean.setCompetitionId(26);
long courierId = getCourierId();
uuid = String.valueOf(courierId);
writeToTxt(uuid);
courierGrowthBean.setCourierId(courierId);
courierGrowthBean.setDeliveriesCount(random.nextInt(1000)+1);
courierGrowthBean.setGrade(random.nextInt(8)+1);
courierGrowthBean.setProtect(true);
courierGrowthBean.setProtectGrade(6);
courierGrowthBean.setRefuseCount(random.nextInt(100));
courierGrowthBean.setGrowValue(random.nextInt(90000));
courierGrowthBean.setWorkDayCount(random.nextInt(200));
return JSON.toJSONString(courierGrowthBean, SerializerFeature.WriteMapNullValue);
}
/**
* 生成唯一的闪送员id
*/
public static Long getCourierId() {
SimpleDateFormat df = new SimpleDateFormat("HHmmssSSS");
String date = df.format(new Date());
NumberFormat f = new DecimalFormat("00000");
Random r = new Random();
String number = f.format(r.nextInt(10000));
Long orderNumber = Long.parseLong(date + number);
return orderNumber;
}
public static void main(String[] args) {
String index = "courier_growth_system_result";
String type = "courier_growth_system_result";
bulkRequest = transportClient.prepareBulk();
long a = System.currentTimeMillis(); //开始时间
int num = 0;
for(int i=0; i< 20000;i++) {//总共往ES里放的数据量,2万条
bulkRequest.add(transportClient.prepareIndex(index, type)
.setSource(getCourierGrowthBean(), XContentType.JSON)
.setId(uuid));
num ++;
if (num >= 1000) {//1次1000条
if (bulkRequest.numberOfActions() > 0) {
bulkRequest.execute().actionGet();
}
num = 0;
//批量执行后要重新创新一个bulk,不然会一次一次重新索引相同的第一次数据,之前没写这个耗时95s,现在耗时7s
bulkRequest = transportClient.prepareBulk();
}
}
if (bulkRequest.numberOfActions() > 0) {
bulkRequest.execute().actionGet();
}
long b = System.currentTimeMillis();
System.out.println(b-a);//打印总共花费时间
if(fw!=null){
try {
fw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
static int numTemp =0;//一行写十个参数,用于jmeter参数
public static void writeToTxt(String courierId){
PrintWriter pw = new PrintWriter(fw);
if(numTemp<10){
pw.print(courierId);
pw.print(",");
numTemp++;
}else if(numTemp==10){
numTemp=0;
pw.println();
}
pw.flush();
}
}
五、ES的依赖
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>5.5.2</version>
</dependency>