1、电信客服项目之数据采集

学完大数据的基本组件后需要用项目实践下,电信客服项目是对用户通话记录的数据分析。数据内容包括呼叫方手机号,呼叫方姓名,接收方手机号,接收方姓名,通话建立时间,通话时间

1、模拟通话记录数据,用java程序生产数据

package producer;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
/**
 * @author tony
 * @version v1.0.0 2019-02-26 下午 10:56
 */
public class ProductLog {
    private String startTime = "2019-01-01 00:00:00";
    private String endTime = "2019-02-27 00:00:00";

    //用于存放电话号码 和 电话号码+姓名
    private List<String> phoneList = new ArrayList<String>();
    private Map<String,String> phoneNameMap = new HashMap<String,String>();

    public void initPhone(){
        phoneList.add("17078388295");
        phoneList.add("13980337439");
        phoneList.add("14575535933");
        phoneList.add("19902496992");
        phoneList.add("18549641558");
        phoneList.add("17005930322");
        phoneList.add("18468618874");
        phoneList.add("18576581848");
        phoneList.add("15978226424");
        phoneList.add("15542823911");
        phoneList.add("17526304161");
        phoneList.add("15422018558");
        phoneList.add("17269452013");
        phoneList.add("17764278604");
        phoneList.add("15711910344");
        phoneList.add("15714728273");
        phoneList.add("16061028454");
        phoneList.add("16264433631");
        phoneList.add("17601615878");
        phoneList.add("15897468949");

        phoneNameMap.put("17078388295", "李雁");
        phoneNameMap.put("13980337439", "卫艺");
        phoneNameMap.put("14575535933", "仰莉");
        phoneNameMap.put("19902496992", "陶欣悦");
        phoneNameMap.put("18549641558", "施梅梅");
        phoneNameMap.put("17005930322", "金虹霖");
        phoneNameMap.put("18468618874", "魏明艳");
        phoneNameMap.put("18576581848", "华贞");
        phoneNameMap.put("15978226424", "华啟倩");
        phoneNameMap.put("15542823911", "仲采绿");
        phoneNameMap.put("17526304161", "卫丹");
        phoneNameMap.put("15422018558", "戚丽红");
        phoneNameMap.put("17269452013", "何翠柔");
        phoneNameMap.put("17764278604", "钱溶艳");
        phoneNameMap.put("15711910344", "钱琳");
        phoneNameMap.put("15714728273", "缪静欣");
        phoneNameMap.put("16061028454", "焦秋菊");
        phoneNameMap.put("16264433631", "吕访琴");
        phoneNameMap.put("17601615878", "沈丹");
        phoneNameMap.put("15897468949", "褚美丽");
    }

    /**
     * 数据形式:1782928383,17839384448,2018-09-03 03:4:43,0025
     * 数据形式对应字段名:caller,callee,buildTime,durtion
     * @return
     */
    public String product(){
        String caller = null;
        String callee = null;

        String callerName = null;
        String calleeName = null;

        //取得主叫号码
        int callerIndes = (int)(Math.random() * phoneList.size());
        caller = phoneList.get(callerIndes);
        callerName = phoneNameMap.get(caller);

        while (true){
            //取得被叫号码
            int calleeIndes = (int)(Math.random() * phoneList.size());
            callee = phoneList.get(calleeIndes);
            calleeName = phoneNameMap.get(callee);
            if(!caller.equals(callee)) {break;}

        }

        //第三个参数:随机通话建立时间
        String buildTime = randomBuildTime(startTime,endTime);
        //第四个参数 0000
        DecimalFormat df = new DecimalFormat("0000");
        String duration = df.format((int)30*60*Math.random());

        StringBuilder sb = new StringBuilder();
        sb.append(caller + ",").append(callee+",").append(buildTime+",").append(duration);
        return sb.toString();
    }

    private String randomBuildTime(String startTime,String endTime){
        try {
            SimpleDateFormat sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            Date startDate = sdf1.parse(startTime);
            Date endDate = sdf1.parse(endTime);

            if(endDate.getTime()<=startDate.getTime()){
                return null;
            }

            //随机通话建立时间long型
            long randomTS = startDate.getTime()+(long)((endDate.getTime()-startDate.getTime())*Math.random());
            Date date = new Date(randomTS);

            SimpleDateFormat sdf2 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            String randomDate = sdf2.format(date);
            return randomDate;
        } catch (ParseException e) {
            e.printStackTrace();
        }
        return null;
    }

    /**
     * 写入到文件中
     * @param filePath
     */
    public void writeLog(String filePath){
        try {
            OutputStreamWriter osw =  new OutputStreamWriter(new FileOutputStream(filePath,true));
            while (true){
                Thread.sleep(500);
                String log = product();
                System.out.println(log);
                osw.write(log+"\n");
                osw.flush();
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args){
        //输出路径
        //args = new String[]{"E:\\bigdata\\HDProject\\calllog.csv"};
        if (args == null || args.length<=0){
            System.out.println("没有路径");
            return;
        }
        ProductLog productLog = new ProductLog();
        productLog.initPhone();
        productLog.writeLog(args[0]);

    }
}

将工程打成jar包上传至linux服务器上

2、先启动zk集群
几台服务器都执行zkServer.sh start
3、再启动kafka集群
几台服务器都执行bin/kafka-server-start.sh …/config/server-properties &
&:后台启动的意思
创建主题calllog:
bin/kafka-topics.sh --zookeeper bigdata121:2181 --create --replication-factor 3 --partitions 5 --topic calllog
查看主题列表:
bin/kafka-topics.sh --zookeeper bigdata121:2181 --list

创建成功主题calllog后,启动消费消费者:
bin/kafka-console-consumer.sh --bootstrap-server bigdata121:9092 --topic calllog --from-beginning

4、创建flume配置文件
flume可以搜集数据发送到kafka消费者
新建配置文件:flume-kafka.conf

# 1 agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# 2 source +0是从第零行开始
a1.sources.r1.type = exec
a1.sources.r1.command = tail -F -c +0 /root/calllog.csv
a1.sources.r1.shell = /bin/bash -c

# 3 sink
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.brokerList = bigdata121:9092,bigdata122:9092,bigdata123:9092,bigdata124:9092,bigdata125:9092
a1.sinks.k1.topic = calllog
a1.sinks.k1.batchSize = 20
a1.sinks.k1.requiredAcks = 1

# 4 channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# 5 bind
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

启动flume:
/root/flume/bin/flume-ng agent --conf /root/flume/conf/ --name a1 --conf-file /root/flume-kafka.conf

这样flume就时刻监听/root/calllog.csv文件的数据了。

5、使用jar包生产数据
java -cp /root/ct_producer-1.0-SNAPSHOT.jar producer.ProductLog /root/calllog.csv
或者把命令写入脚本文件后,直接执行脚本
data.sh

!#/bin/bash
java -cp /root/ct_producer-1.0-SNAPSHOT.jar producer.ProductLog /root/calllog.csv

当执行脚本后,Kafka的消息消费者也在不断的读取数据就说明成功了。

[root@bigdata121 ~]# . ./data.sh
16061028454,16264433631,2019-02-11 21:40:00,0287
15711910344,14575535933,2019-02-15 18:01:23,0447
18549641558,15711910344,2019-01-19 11:13:34,0228
15897468949,17269452013,2019-01-08 11:36:50,0093
18549641558,18576581848,2019-01-25 16:13:55,0134
15422018558,15714728273,2019-02-26 05:59:26,0871
17764278604,17601615878,2019-02-15 23:13:53,0469
17601615878,15714728273,2019-01-15 20:32:06,0061
17269452013,18468618874,2019-02-07 05:32:47,0031
16264433631,17005930322,2019-01-29 08:15:42,0980
15714728273,17601615878,2019-02-11 22:39:27,0200
[root@bigdata121 ~]# bin/kafka-console-consumer.sh --bootstrap-server 
bigdata121:9092 --topic calllog --from-beginning

16061028454,16264433631,2019-02-11 21:40:00,0287
15711910344,14575535933,2019-02-15 18:01:23,0447
18549641558,15711910344,2019-01-19 11:13:34,0228
15897468949,17269452013,2019-01-08 11:36:50,0093
18549641558,18576581848,2019-01-25 16:13:55,0134
15422018558,15714728273,2019-02-26 05:59:26,0871
17764278604,17601615878,2019-02-15 23:13:53,0469
17601615878,15714728273,2019-01-15 20:32:06,0061
17269452013,18468618874,2019-02-07 05:32:47,0031
16264433631,17005930322,2019-01-29 08:15:42,0980
15714728273,17601615878,2019-02-11 22:39:27,0200

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值