前段时间需要配合kylin的流式计算,所以用java写了一个producer的程序,每秒向topic中产生数据,kylin则作为Topic的Consumer。主要的功能就是随机产生一些数据,这个程序里面主要的点个人认为是:时间戳的随机生成,发送JSON数据格式。直接上点干货,任何问题都可以在下方评论,小厨尽全力解决。
package com.bigdata.kylin;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import net.sf.json.JSONObject;
import kafka.javaapi.producer.Producer;
import kafka.producer.KeyedMessage;
import kafka.producer.ProducerConfig;
import kafka.serializer.StringEncoder;
//模拟产生网站点击数据:包括day,regionIdArray等
//2019-07-12|GBSHD87JD6HDKI98|G03|G0302|810|Mac OS|2
public class CreateData extends Thread{
private String topic;
public CreateData(String topic) {
super();
this.topic = topic;
}
@Override
public void run() {
//创建一个producer对象
Producer producer = createProduce();
//度量指标Cookieid
String cookieIdArray[] = {"0","1","2","3","4","5","6","7","8","9",
"A","B","C","D","E","F","G","H","I","J","K","L","M",
"N","O","P","Q","R","S","T","U","V","W","