2020 10 27 数据库写入demo:商品统计ShopCount_context.write(new text(lines),new intwritable(1));-CSDN博客

本文链接：https://blog.csdn.net/Urlli/article/details/109320252

主要仍然分了三类重要的class，此处还有个特殊的方便数据库操作的自定义Writable类。

首先展示一下数据吧，本次demo只是为了验证操作的正确性，所以有无异常的结果即达到要求，数据如下：

2018-06-02 11:12:21,12321323423,裤子和衣服和洗漱品,陕西省
2018-06-01 11:12:21,12321323423,裤子和衣服和洗漱品,陕西省
2018-01-03 11:12:21,12321323423,裤子和衣服和洗漱品,陕西省
2018-06-02 11:12:21,12321323423,裤子和衣服和洗漱品,甘肃省
2016-06-01 11:12:21,12321323423,裤子和衣服和洗漱品,陕西省
2018-02-02 11:12:21,12321323423,裤子和衣服和洗漱品,陕西省
2018-06-02 11:12:21,12321323423,裤子和衣服和洗漱品,陕西省
2018-11-11 11:12:21,12321323423,裤子和衣服和洗漱品,甘肃省
2018-01-02 11:12:21,12321323423,裤子和衣服和洗漱品,陕西省
2018-11-11 11:12:21,12321323423,裤子和衣服和洗漱品,陕西省
2018-03-02 11:12:21,12321323423,裤子和衣服和洗漱品,陕西省
2011-01-02 11:12:21,12321323423,裤子和衣服和洗漱品,陕西省
2014-01-02 11:12:21,12321323423,裤子和衣服和洗漱品,陕西省
2018-11-11 11:12:21,12321323423,裤子和衣服和洗漱品,陕西省

当然数据里的信息很多，本来可以把输出做的更完美的，但是为了赶学习进度，最近的事儿也是确实多，所以就先简单的利用其中的一部分信息吧。

不多BB，上代码：

Map类：

package com.fyg.bigdata.shopcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class ShopCountMapper extends Mapper<LongWritable, Text,Text, IntWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] lines=value.toString().split(",");
        String dataStr=lines[0].substring(0,10);
            if(dataStr.equals("2018-11-11")){
                context.write(new Text(lines[3]),new IntWritable(1));
            }
    }
}

Reduce类：

package com.fyg.bigdata.shopcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

//input:<陕西省，[1,1,1]><甘肃省,[1,1,1,1]>
public class ShopCountReducer extends Reducer<Text, IntWritable,MyDBWritable,MyDBWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int sum=0;
        for (IntWritable value:values) {
            sum+=value.get();
        }
        context.write(new MyDBWritable(key.toString(),sum),null);
    }
}

Job类：

package com.fyg.bigdata.shopcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.lib.db.DBConfiguration;
import org.apache.hadoop.mapred.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class ShopCountJob {
    public static void main(String[] args)throws Exception {
        Configuration conf =new Configuration();
//        database defalut setting
        DBConfiguration.configureDB(conf,"com.mysql.jdbc.Driver","jdbc:mysql://demo05:3306/d1?characterEncoding=utf-8","root","root");
//        when you close blow setting,it will run just at localhost
//        conf.set("fs.defaultFS","nn1");
        Job job=Job.getInstance(conf);
        job.setJarByClass(ShopCountJob.class);
//        set the class type of map and reduce
        job.setMapperClass(ShopCountMapper.class);
        job.setReducerClass(ShopCountReducer.class);
//        set the format of map'soutput
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
//        set the format of reduce's output(if the formats of map and reduce are the same ,no need to set this one)
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
//        this is the setting of Formatting processing class ;
//        you won't need to set this property,the defalut setting is "TextInputFormat"
//        the InputFormat is for the map progress,the outputFormat is for the reduce progress
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(DBOutputFormat.class);
//        set the paths of input and output
        TextInputFormat.setInputPaths(job,new Path("/data/input"));
        DBOutputFormat.setOutput(job,"shops","name","count");
//        post your job
        job.waitForCompletion(true);


    }
}

MyDBWritable类：

package com.fyg.bigdata.shopcount;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.lib.db.DBWritable;

import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

public class MyDBWritable implements DBWritable {
    private String name;
    private Integer count;
    public MyDBWritable(String name,Integer count){
        this.name=name;
        this.count=count;

    }
    @Override
    public void write(PreparedStatement preparedStatement) throws SQLException {
//        此处下标从1开始
        preparedStatement.setString(1,name);
        preparedStatement.setInt(2,count);
    }
    @Override
    public void readFields(ResultSet resultSet) throws SQLException {

    }
}

虽然是为了方便数据库的应用去自定义了Writable类，但这个操作给了我很大的启迪，后续若是实践大型项目中，Java的优美之处可以体现的更淋漓尽致，也希望后续的学习中自己可以多注意自定义这种类的封装思想。

顺便给一下自己的pom配置吧：

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.fyg.homework</groupId>
    <artifactId>ShopCount</artifactId>
    <version>1.0-SNAPSHOT</version>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>6</source>
                    <target>6</target>
                </configuration>
            </plugin>
        </plugins>
    </build>


    <properties>
        <!--hadoop的版本-->
        <hadoop.version>2.6.4</hadoop.version>
    </properties>

    <dependencies>

        <!-- hadoop的公共组件-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>


        <!--  hadoop的客户端 -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>


        <!-- hdfs  -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
        </dependency>


        <!--  yarn的公共组件 -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>


        <!---yarn的客户端组件-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <!---yarn的服务端端组件-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-server-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>


        <!--yarn的resourcemanager的组件-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <!--yarn的nodemanager的组件-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-server-nodemanager</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-yarn-server-applicationhistoryservice</artifactId>
            <version>${hadoop.version}</version>
        </dependency>


        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.39</version>
        </dependency>
    </dependencies>

</project>

本次demo难度并不大，相当于只是一个简单的task基础上做一些功能上的扩充，后续学习完hive后会利用hive+hbase来做个更全面的项目吧。

See ya~