Spark SQL基于网站Log的综合案例实战

本篇博文我们将手动添加数据,这个功能非常有用,我们可以根据自己的需要创造出满足自己需要的数据格式及内容,而且数据不需要清洗,话不多说,代码实战。

package com.dt.spark.SparkApps.sql.project;

import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Random;

/**
 * 论坛数据自动生成代码,数据格式如下:
 * date: 日期 格式为:yy-MM-dd
 * timestamp: 时间戳
 * userID: 用户ID
 * pageID: 页面ID
 * chanelID: 板块的ID
 * action: 点击和注册
 *
 */
public class SparkSQLDataManually {

    /**
     * 具体的论坛频道,在实际开发的时候可以从文件读取生成列
     */
    static String[] channelNames = new String[]{
        "Spark","Scala","Kafka","Flink","Hadoop","Storm",
        "HBase","Impala","Java","Kudu","ML","DL"
    };

    static String[] actionNames = new String[]{"View","Register"}; 
    static String yesterdayFormated;

    public static void main(String[] args) throws IOException {
        /**
         * 通过传进来的参数生成指定大小规模的数据。
         */
        long numberItems = 5000;
        String path = ".";
        if(args.length > 0){
            numberItems = Integer.valueOf(args[0]);
            path = args[1];
        }
        System.out.println("User log number is :" + numberItems);

        /**
         * 昨天的时间的生成
         */
        yesterdayFormated = yesterday();

        userlogs(numberItems,path);
    }

    @SuppressWarnings("resource")
    private static void userlogs(long numberItems, String path) throws IOException {
        StringBuffer userLogBuffer = new StringBuffer("");
        Random random = new Random();
        for(int i = 0;i < numberItems;i++){
            long timestamp = new Date().getTime();

            //用户ID,随机生成器实现
            long userID = 0L;
            long pageID = 0L;

            //随机生成用户ID
            userID = random.nextInt((int)numberItems);

            //Page ID
            //随机生成的页面ID
            pageID = random.nextInt((int)numberItems);

            //随机生成channel
            String channel = channelNames[random.nextInt(12)];

            //随机生成action行为
            String action= actionNames[random.nextInt(2)];

            userLogBuffer.append(yesterdayFormated)
                         .append("\t")
                         .append(timestamp)
                         .append("\t")
                         .append(userID)
                         .append("\t")
                         .append(pageID)
                         .append("\t")
                         .append(channel)
                         .append("\t")
                         .append(action)
                         .append("\n");
        }
        System.out.println(userLogBuffer.toString());

        PrintWriter printWriter = null;
        printWriter = new PrintWriter(new OutputStreamWriter(
                new FileOutputStream(path + "userLog.log")));
        printWriter.write(userLogBuffer.toString());
        printWriter.close();
    }

    private static String yesterday() {
        SimpleDateFormat date = new SimpleDateFormat("yy-MM-dd");
        Calendar cal = Calendar.getInstance();
        cal.setTime(new Date());
        cal.add(Calendar.DATE, -1);

        Date yesterday = cal.getTime();
        return date.format(yesterday);
    }

}

实验结果如下:

16-05-02    1462244815099   3978    1325    Scala   View
16-05-02    1462244815099   7833    4357    Hadoop  View
16-05-02    1462244815099   3148    44  HBase   Register
16-05-02    1462244815099   1012    4215    HBase   Register
16-05-02    1462244815099   9679    5166    HBase   View
16-05-02    1462244815099   7755    7357    DL  Register
16-05-02    1462244815099   8790    7303    Kudu    View
16-05-02    1462244815099   8354    456 Java    Register
16-05-02    1462244815100   1412    6501    Storm   View
16-05-02    1462244815100   6523    7202    Flink   View
16-05-02    1462244815100   4701    3808    Storm   Register
.......................
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值