大数据架构综合训练–交易数据分析

最新推荐文章于 2024-11-09 14:07:11 发布

O.O.0.0

最新推荐文章于 2024-11-09 14:07:11 发布

阅读量144

点赞数 4

文章标签：大数据架构数据分析 hadoop java

本文链接：https://blog.csdn.net/weixin_72255007/article/details/130675315

版权

大数据架构综合训练–交易数据分析

1.原始文件处理… 3

1.1创建JDBCUtils类… 3

1.2创建ProfileUtils类… 4

1.3创建insertIntoMysql类… 7

在这里插入图片描述

1 原始文件处理

1.1 创建JDBCUtils类

由于将文件导入到数据库需要用到CRUD等操作，所以使用最原始的方法直接创建一个JDBCUtils类封装这些操作。

(1) 首先创建jdbc.properties文件用于设置数据库连接配置

user=root
password=yourpwd
url=jdbc:mysql://localhost:3306/yourDataBase?useUnicode=true&characterEncoding=utf8&allowMultiQueries=true&rewriteBatchedStatements=true
driverClass=com.mysql.cj.jdbc.Driver

(2) 创建getConnection方法获取数据库连接

public static Connection getConnection() throws Exception{
        //1.读取配置文件中的4个基本信息
        InputStream is = ClassLoader.getSystemClassLoader().getResourceAsStream("jdbc.properties");
        Properties properties = new Properties();
        properties.load(is);

        String user = properties.getProperty("user");
        String password = properties.getProperty("password");
        String url = properties.getProperty("url");
        String driverClass = properties.getProperty("driverClass");

        //2.加载驱动
        Class.forName(driverClass);

        //3.获取连接
        Connection connection = DriverManager.getConnection(url, user, password);
        return connection;
    }

(3) 创建closeResource关闭资源方法

public static void closeResource(Connection conn, Statement ps){
        try {
            if (ps != null){
                ps.close();
            }
        } catch (SQLException e) {
            throw new RuntimeException(e);
        }
        try {
            if (conn != null){
                conn.close();
            }
        } catch (SQLException e) {
            throw new RuntimeException(e);
        }
    }

(4) 创建createTable方法用来创建数据库

public static void createTable(String sql){
        Connection conn = null;
        PreparedStatement ps = null;
        boolean execute = false;
        try {
            conn = JDBCUtils.getConnection();
            ps = conn.prepareStatement(sql);
            execute = ps.execute();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            JDBCUtils.closeResource(conn,ps);
        }
        if (execute){
            System.out.println("创建失败");
        }else {
            System.out.println("创建成功");
        }

    }

1.2创建ProfileUtils类

该类包含了关于文件的操作

(1) 创建getFile方法读取桌面上的文件

public static List<String> getFile(){
        FileInputStream fis = null;
        InputStreamReader isr = null;
        BufferedReader bufferedReader = null;
        List<String> data = null;
        try {
            fis = new FileInputStream("C:\\Users\\23694\\Desktop\\1-14new.txt");
            isr = new InputStreamReader(fis, "GB2312");
            bufferedReader = new BufferedReader(isr);
            data = new ArrayList<>();
            String str;
            while ((str = bufferedReader.readLine()) != null){
               if (str.trim().length() > 0){
                   data.add(str);
               }
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                if (isr != null){
                    isr.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            try {
                if (fis != null){
                    fis.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            try {
                if (bufferedReader != null){
                    bufferedReader.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return data;
    }

(2) 创建getNewFile方法，其中调用了getFile方法来获取文件，该方法主要封装了处理数据的一系列操作，方便给下一步分析关键词和插入数据库使用。

public static List<String> getNewFile(){
        List<String> file = profileUtils.getFile();
        List<String> newFile = new ArrayList<>();
        List<String> newFile1 = new ArrayList<>();
        String[] newStrings;
        String[] temp;
        int i = 0;
        for (String s : file) {
            if (s.trim().length() > 10) {
                s = s.replaceAll("\\(\\(", "");
                s = s.replaceAll("\\)\\)", "");
                s = s.replaceAll("None", "'None'");
                s = s.replaceAll("\"", "'");


                newStrings = s.split("\\), \\(");
                for (String s1 : newStrings) {
                    temp = s1.split(", '");
                    newFile.add(Arrays.toString(temp).replaceAll("'", ""));

                }
            }
        }

        String[] strings1 = null;
        int count = 0;
        for (int j = 0; j < newFile.size(); j++) {
            strings1 = newFile.get(j).replaceAll("\\[", "").replaceAll("\\]", "").split(",");
            if (strings1.length == 4) {
                for (int k = 0; k < 4; k++) {
                    newFile1.add(strings1[k]);
                }
            }
            if (strings1.length == 5) {
                for (int k = 0; k < 5; k++) {
                    if (k == 2) {
                        strings1[2] = strings1[2].concat(strings1[3]).replaceAll(",", "");
                    }
                    if (k != 3) {
                        newFile1.add(strings1[k]);
                    }
                }
            }
            if (strings1.length == 6) {
                for (int k = 0; k < 6; k++) {
                    if (k == 2) {
                        strings1[2] = strings1[2].concat(strings1[3]).concat(strings1[4].replaceAll(",", ""));
                    }
                    if (k != 4 && k != 3) {
                        newFile1.add(strings1[k]);
                    }
                }
            }
            if (strings1.length == 7) {
                for (int k = 0; k < 7; k++) {
                    if (k == 2) {
                        strings1[2] = strings1[2].concat(strings1[3]).concat(strings1[4].concat(strings1[5]).replaceAll(",", ""));
                    }
                    if (k != 4 && k != 3 && k != 5) {
                        newFile1.add(strings1[k]);
                    }
                }
            }
            if (strings1.length == 8) {
                for (int k = 0; k < 8; k++) {
                    if (k == 2) {
                        strings1[2] = strings1[2].concat(strings1[3]).concat(strings1[4].concat(strings1[5]).concat(strings1[6]).replaceAll(",", ""));
                    }
                    if (k != 4 && k != 3 && k != 5 && k != 6) {
                        newFile1.add(strings1[k]);
                    }
                }
            }
        }
        return newFile1;
    }

(3) 以下是处理后部分文本

在这里插入图片描述

1.3创建insertIntoMysql类

插入前先创建好flow_data表，由于数据量较大约有14万数据，如果每创建好一个prepareStatement后就执行提交，会和数据库交互过多，所以使用addBatch方法，攒够指定的sql再执行提交任务，有助于提高效率，使用此方法需要在JDBC配置url中加上rewriteBatchedStatements=true

(1) fileInsert方法

//将数据插入到数据库中
    public static void fileInsert() {
        //创建表 只运行一次
        JDBCUtils.createTable("create table if not exists flow_data (user_id INT(11),payment FLOAT,describe_info varchar(255),time INT(11))");
        List<String> newFile = profileUtils.getNewFile();
        Object[] objects = newFile.toArray();
            Connection conn = null;
            PreparedStatement ps = null;
            try {
                conn = JDBCUtils.getConnection();
                String sql = "insert into flow_data(user_id,payment,describe_info,time)values(?,?,?,?)";
                ps = conn.prepareStatement(sql);
                //设置不自动提交
                conn.setAutoCommit(false);
                //批量提交
                for (int o = 0; o < 559996; o = o + 4) {
                    ps.setObject(1, objects[o]);
                    ps.setObject(2, objects[o + 1]);
                    ps.setObject(3, objects[o + 2]);
                    ps.setObject(4, objects[o + 3]);
                    ps.addBatch();
                    if (o % 2000 == 0) {
                        ps.executeBatch();
                    }
                    if (o + 3 == 559995) {
                        ps.executeBatch();
                        ps.clearBatch();
                    }
                }
                //全部处理完后 执行提交
                conn.commit();
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                JDBCUtils.closeResource(conn, ps);
            }
        }

(2) 结果图

在这里插入图片描述

(3) 直接使用sqlyog导出数据为csv形式

1.4创建keyWord类

设置pom.xml文件，导入要用到的依赖jieba分词

(1) pom.xml

		<dependency>
            <groupId>com.huaban</groupId>
            <artifactId>jieba-analysis</artifactId>
            <version>1.0.2</version>
        </dependency>

(2) 导入所需的两个类Keyword，TFIDFAnalyzer

(3) 创建keyWord类

public class keyWord {
    public static List<String>  getKeyword(){
        TFIDFAnalyzer tfi = new TFIDFAnalyzer();
        List<String> newFile = profileUtils.getNewFile();
        Object[] array = newFile.toArray();
        int a = 2;
        StringBuilder builder = new StringBuilder();
        for (int i = 0; i < array.length; i++) {
            if (i  == a ){
                if (!String.valueOf(array[i]).trim().contains(" ")){
                    builder.append(array[i]);
                }
                a = a+4;
            }
        }
        List<Keyword> anaRe = tfi.analyze(builder.toString(), 20);
        List<String> result = new ArrayList<>();
        for (Keyword keyword : anaRe){
            result.add(keyword.getName());
        }
        return result;
    }
}

(4) 选取20个关键词部分结果如下

在这里插入图片描述

2. 编写HDFSClient类

包含了put，close，mkdir等操作

public class HdfsClient {
    private FileSystem fs;
    //初始化操作
    public void init() throws URISyntaxException, IOException, InterruptedException {

        //连接的集群nn地址
        URI uri = new URI("hdfs://hadoop102:8020");
        //创建一个配置文件
        Configuration configuration = new Configuration();
        //用户
        String user = "atguigu";
        //获取客户端对象
        fs = FileSystem.get(uri, configuration,user);
    }
    public void close() throws IOException {
        fs.close();
    }
    public void Mkdir(String path) throws URISyntaxException, IOException, InterruptedException {
        fs.mkdirs(new Path(path));
    }
    //上传
    public void Put(String localPath,String remotePath) throws IOException {
        //参数1：表示删除原数据；参数2：是否允许覆盖；参数3：原数据路径；参数4：目的地路径
        fs.copyFromLocalFile(true,true,new Path(localPath),new Path(remotePath));
    }
}

3. MapReduce

1.1每个顾客的消费额

(1) 配置pom.xml添加依赖

		<dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.1.3</version>
        </dependency>

(2) mapper

public class mapper extends Mapper<LongWritable,Text, IntWritable, FloatWritable> {
    @Override
    protected void map(LongWritable k1, Text v1, Mapper<LongWritable, Text, IntWritable, FloatWritable>.Context context) throws IOException, InterruptedException {

        String[] data = v1.toString().split(",");
        String userID = data[0];
        String payMent = data[1];
        context.write(
                new IntWritable(Integer.parseInt(userID)),
                new FloatWritable(Float.parseFloat(payMent))
        );
    }
}

(3) reducer

public class reducer extends Reducer<IntWritable, FloatWritable,IntWritable,FloatWritable> {
    @Override
    protected void reduce(IntWritable k3, Iterable<FloatWritable> v3, Reducer<IntWritable, FloatWritable, IntWritable, FloatWritable>.Context context) throws IOException, InterruptedException {
        float sumPayment = 0f;
        for (FloatWritable fw : v3){
            sumPayment += fw.get();
        }
        context.write(
                k3,new FloatWritable(sumPayment)
        );
    }
}

(4) aMain

public class aMain {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(aMain.class);

        job.setMapperClass(mapper.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(FloatWritable.class);

        job.setReducerClass(reducer.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(FloatWritable.class);

        Path out = new Path(args[1]);
        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,out);
        FileSystem fs = FileSystem.get(conf);

        if (fs.exists(out)) {
            fs.delete(out, true);
        }

        job.waitForCompletion(true);

    }
}

(5) 启动虚拟机，打开相关服务

在这里插入图片描述

(6) 调用API上传data.csv和jar包到hdfs

(7) 使用jar包完成数据的处理
在这里插入图片描述

在这里插入图片描述

1.2每个顾客每年的消费额

(1) mapper

public class mapper extends Mapper<LongWritable,Text, Text, FloatWritable> {

    @Override
    protected void map(LongWritable k1, Text v1, Mapper<LongWritable, Text, Text, FloatWritable>.Context context) throws IOException, InterruptedException {

        String[] data = v1.toString().split(",");
        String userID = data[0];
        String payMent = data[1];
        int time = Integer.parseInt(data[3]);
        Calendar calendar = Calendar.getInstance();
        calendar.setTime(new Date(time * 1000L));
        int year = calendar.get(Calendar.YEAR);
        String a = userID+" "+year;
        context.write(
                new Text(a),
                new FloatWritable(Float.parseFloat(payMent))

        );
    }
}

(2) reducer

public class reducer extends Reducer<Text, FloatWritable,Text,FloatWritable> {
    @Override
    protected void reduce(Text k3, Iterable<FloatWritable> v3, Reducer<Text, FloatWritable, Text, FloatWritable>.Context context) throws IOException, InterruptedException {
        float sumPayment = 0f;
        for (FloatWritable fw : v3){
            sumPayment += fw.get();
        }
        context.write(
                k3,new FloatWritable(sumPayment)
        );
    }
}

(3) aMain

public class aMain {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(aMain.class);

        job.setMapperClass(mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setOutputValueClass(FloatWritable.class);

        job.setReducerClass(reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FloatWritable.class);

        Path out = new Path(args[1]);
        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,out);
        FileSystem fs = FileSystem.get(conf);

        if (fs.exists(out)) {
            fs.delete(out, true);
        }

        job.waitForCompletion(true);

    }
}

(4) 运行
在这里插入图片描述

在这里插入图片描述

1.3总消费构成

(1) mapper

public class mapper extends Mapper<LongWritable,Text, Text, FloatWritable> {

    @Override
    protected void map(LongWritable k1, Text v1, Mapper<LongWritable, Text, Text, FloatWritable>.Context context) throws IOException, InterruptedException {
        String[] keywords = new String[]{"中袖","一网打尽","客杯","热卖","客来","长方","金城","嘉福元","玻璃瓶",
                "BBQ","极睿","ipad2","ipad3","ipad4","快抢","肌漾","取款","网银","网络","跨行"};
        String[] data = v1.toString().split(",");
        String desc = data[2].trim();
        String payMent = data[1];
        for (int i = 0; i < keywords.length; i++) {
            if (desc.contains(keywords[i])){
                desc = keywords[i];
                context.write(
                        new Text(desc),
                        new FloatWritable(Math.abs(Float.parseFloat(payMent)))

                );
            }else if ( i == 19) {
                desc = "其他";
                context.write(
                        new Text(desc),
                        new FloatWritable(Math.abs(Float.parseFloat(payMent)))

                );
            }
        }
    }
}

(2)reducer

public class reducer extends Reducer<Text, FloatWritable,Text,FloatWritable> {
    @Override
    protected void reduce(Text k3, Iterable<FloatWritable> v3, Reducer<Text, FloatWritable, Text, FloatWritable>.Context context) throws IOException, InterruptedException {
        float sumPayment = 0f;
        for (FloatWritable fw : v3){
            sumPayment += fw.get();
        }
        context.write(
                k3,new FloatWritable(sumPayment)
        );
    }
}

(3)aMain

public class aMain {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(aMain.class);

        job.setMapperClass(mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setOutputValueClass(FloatWritable.class);

        job.setReducerClass(reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FloatWritable.class);

        Path out = new Path(args[1]);
        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,out);
        FileSystem fs = FileSystem.get(conf);

        if (fs.exists(out)) {
            fs.delete(out, true);
        }

        job.waitForCompletion(true);

    }
}

(4)执行

在这里插入图片描述

1.4每一年的消费构成

(1)mapper

public class mapper extends Mapper<LongWritable,Text, Text, FloatWritable> {

    @Override
    protected void map(LongWritable k1, Text v1, Mapper<LongWritable, Text, Text, FloatWritable>.Context context) throws IOException, InterruptedException {
        String[] keywords = new String[]{"中袖","一网打尽","客杯","热卖","客来","长方","金城","嘉福元","玻璃瓶",
                "BBQ","极睿","ipad2","ipad3","ipad4","快抢","肌漾","取款","网银","网络","跨行"};
        String[] data = v1.toString().split(",");
        String desc = data[2].trim();
        String payMent = data[1];
        int time = Integer.parseInt(data[3]);
        Calendar calendar = Calendar.getInstance();
        calendar.setTime(new Date(time * 1000L));
        int year = calendar.get(Calendar.YEAR);
        for (int i = 0; i < keywords.length; i++) {
            if (desc.contains(keywords[i])){
                desc = keywords[i];
                context.write(
                        new Text(desc+" "+year),
                        new FloatWritable(Math.abs(Float.parseFloat(payMent)))
                );
            }else if ( i == 19) {
                desc = "其他";
                context.write(
                        new Text(desc+" "+year),
                        new FloatWritable(Math.abs(Float.parseFloat(payMent)))

                );
            }
        }
    }
}

(2)reducer

public class reducer extends Reducer<Text, FloatWritable,Text,FloatWritable> {
    @Override
    protected void reduce(Text k3, Iterable<FloatWritable> v3, Reducer<Text, FloatWritable, Text, FloatWritable>.Context context) throws IOException, InterruptedException {
        float sumPayment = 0f;
        for (FloatWritable fw : v3){
            sumPayment += fw.get();
        }
        context.write(
                k3,new FloatWritable(sumPayment)
        );
    }
}

(3)aMain

public class aMain {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(aMain.class);

        job.setMapperClass(mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setOutputValueClass(FloatWritable.class);

        job.setReducerClass(reducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FloatWritable.class);

        Path out = new Path(args[1]);
        FileInputFormat.setInputPaths(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,out);
        FileSystem fs = FileSystem.get(conf);

        if (fs.exists(out)) {
            fs.delete(out, true);
        }

        job.waitForCompletion(true);

    }
}