学完大数据的基本组件后需要用项目实践下,电信客服项目是对用户通话记录的数据分析。数据内容包括呼叫方手机号,呼叫方姓名,接收方手机号,接收方姓名,通话建立时间,通话时间
1、模拟通话记录数据,用java程序生产数据
package producer;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* @author tony
* @version v1.0.0 2019-02-26 下午 10:56
*/
public class ProductLog {
private String startTime = "2019-01-01 00:00:00";
private String endTime = "2019-02-27 00:00:00";
//用于存放电话号码 和 电话号码+姓名
private List<String> phoneList = new ArrayList<String>();
private Map<String,String> phoneNameMap = new HashMap<String,String>();
public void initPhone(){
phoneList.add("17078388295");
phoneList.add("13980337439");
phoneList.add("14575535933");
phoneList.add("19902496992");
phoneList.add("18549641558");
phoneList.add("17005930322");
phoneList.add("18468618874");
phoneList.add("18576581848");
phoneList.add("15978226424");
phoneList.add("15542823911");
phoneList.add("17526304161");
phoneList.add("15422018558");
phoneList.add("17269452013");
phoneList.add("17764278604");
phoneList.add("15711910344");
phoneList.add("15714728273");
phoneList.add("16061028454");
phoneList.add("16264433631");
phoneList.add("17601615878");
phoneList.add("15897468949");
phoneNameMap.put("17078388295", "李雁");
phoneNameMap.put("13980337439", "卫艺");
phoneNameMap.put("14575535933", "仰莉");
phoneNameMap.put("19902496992", "陶欣悦");
phoneNameMap.put("18549641558", "施梅梅");
phoneNameMap.put("17005930322", "金虹霖");
phoneNameMap.put("18468618874", "魏明艳");
phoneNameMap.put("18576581848", "华贞");
phoneNameMap.put("15978226424", "华啟倩");
phoneNameMap.put("15542823911", "仲采绿");
phoneNameMap.put("17526304161", "卫丹");
phoneNameMap.put("15422018558", "戚丽红");
phoneNameMap.put("17269452013", "何翠柔");
phoneNameMap.put("17764278604", "钱溶艳");
phoneNameMap.put("15711910344", "钱琳");
phoneNameMap.put("15714728273", "缪静欣");
phoneNameMap.put("16061028454", "焦秋菊");
phoneNameMap.put("16264433631", "吕访琴");
phoneNameMap.put("17601615878", "沈丹");
phoneNameMap.put("15897468949", "褚美丽");
}
/**
* 数据形式:1782928383,17839384448,2018-09-03 03:4:43,0025
* 数据形式对应字段名:caller,callee,buildTime,durtion
* @return
*/
public String product(){
String caller = null;
String callee = null;
String callerName = null;
String calleeName = null;
//取得主叫号码
int callerIndes = (int)(Math.random() * phoneList.size());
caller = phoneList.get(callerIndes);
callerName = phoneNameMap.get(caller);
while (true){
//取得被叫号码
int calleeIndes = (int)(Math.random() * phoneList.size());
callee = phoneList.get(calleeIndes);
calleeName = phoneNameMap.get(callee);
if(!caller.equals(callee)) {break;}
}
//第三个参数:随机通话建立时间
String buildTime = randomBuildTime(startTime,endTime);
//第四个参数 0000
DecimalFormat df = new DecimalFormat("0000");
String duration = df.format((int)30*60*Math.random());
StringBuilder sb = new StringBuilder();
sb.append(caller + ",").append(callee+",").append(buildTime+",").append(duration);
return sb.toString();
}
private String randomBuildTime(String startTime,String endTime){
try {
SimpleDateFormat sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date startDate = sdf1.parse(startTime);
Date endDate = sdf1.parse(endTime);
if(endDate.getTime()<=startDate.getTime()){
return null;
}
//随机通话建立时间long型
long randomTS = startDate.getTime()+(long)((endDate.getTime()-startDate.getTime())*Math.random());
Date date = new Date(randomTS);
SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String randomDate = sdf2.format(date);
return randomDate;
} catch (ParseException e) {
e.printStackTrace();
}
return null;
}
/**
* 写入到文件中
* @param filePath
*/
public void writeLog(String filePath){
try {
OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(filePath,true));
while (true){
Thread.sleep(500);
String log = product();
System.out.println(log);
osw.write(log+"\n");
osw.flush();
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String[] args){
//输出路径
//args = new String[]{"E:\\bigdata\\HDProject\\calllog.csv"};
if (args == null || args.length<=0){
System.out.println("没有路径");
return;
}
ProductLog productLog = new ProductLog();
productLog.initPhone();
productLog.writeLog(args[0]);
}
}
将工程打成jar包上传至linux服务器上
2、先启动zk集群
几台服务器都执行zkServer.sh start
3、再启动kafka集群
几台服务器都执行bin/kafka-server-start.sh …/config/server-properties &
&:后台启动的意思
创建主题calllog:
bin/kafka-topics.sh --zookeeper bigdata121:2181 --create --replication-factor 3 --partitions 5 --topic calllog
查看主题列表:
bin/kafka-topics.sh --zookeeper bigdata121:2181 --list
创建成功主题calllog后,启动消费消费者:
bin/kafka-console-consumer.sh --bootstrap-server bigdata121:9092 --topic calllog --from-beginning
4、创建flume配置文件
flume可以搜集数据发送到kafka消费者
新建配置文件:flume-kafka.conf
# 1 agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# 2 source +0是从第零行开始
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F -c +0 /root/calllog.csv
a1.sources.r1.shell = /bin/bash -c
# 3 sink
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.brokerList = bigdata121:9092,bigdata122:9092,bigdata123:9092,bigdata124:9092,bigdata125:9092
a1.sinks.k1.topic = calllog
a1.sinks.k1.batchSize = 20
a1.sinks.k1.requiredAcks = 1
# 4 channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# 5 bind
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
启动flume:
/root/flume/bin/flume-ng agent --conf /root/flume/conf/ --name a1 --conf-file /root/flume-kafka.conf
这样flume就时刻监听/root/calllog.csv文件的数据了。
5、使用jar包生产数据
java -cp /root/ct_producer-1.0-SNAPSHOT.jar producer.ProductLog /root/calllog.csv
或者把命令写入脚本文件后,直接执行脚本
data.sh
!#/bin/bash
java -cp /root/ct_producer-1.0-SNAPSHOT.jar producer.ProductLog /root/calllog.csv
当执行脚本后,Kafka的消息消费者也在不断的读取数据就说明成功了。
[root@bigdata121 ~]# . ./data.sh
16061028454,16264433631,2019-02-11 21:40:00,0287
15711910344,14575535933,2019-02-15 18:01:23,0447
18549641558,15711910344,2019-01-19 11:13:34,0228
15897468949,17269452013,2019-01-08 11:36:50,0093
18549641558,18576581848,2019-01-25 16:13:55,0134
15422018558,15714728273,2019-02-26 05:59:26,0871
17764278604,17601615878,2019-02-15 23:13:53,0469
17601615878,15714728273,2019-01-15 20:32:06,0061
17269452013,18468618874,2019-02-07 05:32:47,0031
16264433631,17005930322,2019-01-29 08:15:42,0980
15714728273,17601615878,2019-02-11 22:39:27,0200
[root@bigdata121 ~]# bin/kafka-console-consumer.sh --bootstrap-server
bigdata121:9092 --topic calllog --from-beginning
16061028454,16264433631,2019-02-11 21:40:00,0287
15711910344,14575535933,2019-02-15 18:01:23,0447
18549641558,15711910344,2019-01-19 11:13:34,0228
15897468949,17269452013,2019-01-08 11:36:50,0093
18549641558,18576581848,2019-01-25 16:13:55,0134
15422018558,15714728273,2019-02-26 05:59:26,0871
17764278604,17601615878,2019-02-15 23:13:53,0469
17601615878,15714728273,2019-01-15 20:32:06,0061
17269452013,18468618874,2019-02-07 05:32:47,0031
16264433631,17005930322,2019-01-29 08:15:42,0980
15714728273,17601615878,2019-02-11 22:39:27,0200