Hadoop实战学习（4）-数据库的读写

最新推荐文章于 2022-12-08 16:01:10 发布

兵工厂三剑客

最新推荐文章于 2022-12-08 16:01:10 发布

阅读量512

点赞数

分类专栏： hadoop 文章标签： hadoop 数据库

本文链接：https://blog.csdn.net/SCGH_Fx/article/details/80503268

版权

hadoop 专栏收录该内容

17 篇文章 0 订阅

订阅专栏

该文讲述Hadoop读取数据库中表的数据，并将计算结果写入到另一张表。

要读写数据库中的数据，首先需要实现一个实体类，这个实体类部分映射数据库中要查询的表的字段。且该实体类需要实

现Writable与DBWritable两个接口，DBWritable的实现类负责查询与写入，Writable的实现类负责序列化输出（到Mapper）与写入。

代码：

package com.readdb;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

/**
 * 该类在Mapper前执行
 */
public class MyDBWriteable implements DBWritable,Writable {

    private String name,sex,remark;

    private String word;

    private int count;

    private int id;

    public int getId() {
        return id;
    }

    public void setId(int id) {
        this.id = id;
    }

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getSex() {
        return sex;
    }

    public void setSex(String sex) {
        this.sex = sex;
    }

    public String getRemark() {
        return remark;
    }

    public void setRemark(String remark) {
        this.remark = remark;
    }

    public String getWord() {
        return word;
    }

    public void setWord(String word) {
        this.word = word;
    }

    public int getCount() {
        return count;
    }

    public void setCount(int count) {
        this.count = count;
    }

    /**
     * 序列化输出对象字段,将查询结果作为mapper的输入
     * 即将查询结果写入到Mapper的输入数据
     * @param dataOutput
     * @throws IOException
     */
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeInt(id);
        dataOutput.writeUTF(sex);
        dataOutput.writeUTF(name);
        dataOutput.writeUTF(remark);
    }

    /**
     * 读取向数据库写入输入字段
     * @param dataInput
     * @throws IOException
     */
    public void readFields(DataInput dataInput) throws IOException {
        word=dataInput.readUTF();
        count=dataInput.readInt();
    }

    /**
     * 向数据库写入数据
     * @param statement
     * @throws SQLException
     */
    public void write(PreparedStatement statement) throws SQLException {
        //写入顺序要与列顺序一致
        statement.setString(1,word);
        statement.setInt(2,count);
    }

    /**
     * 读取查询结果集
     * @param resultSet
     * @throws SQLException
     */

    public void readFields(ResultSet resultSet) throws SQLException {
        id=resultSet.getInt(1);
        name=resultSet.getString(2);
        sex=resultSet.getString(3);
        remark=resultSet.getString(4);
    }
}

Mapper:

package com.readdb;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.StringTokenizer;

public class WCMapper extends Mapper<LongWritable,MyDBWriteable,Text,IntWritable> {
    @Override
    protected void map(LongWritable key, MyDBWriteable value, Context context) throws IOException, InterruptedException {
        StringTokenizer stringTokenizer=new StringTokenizer(value.getRemark());
        while (stringTokenizer.hasMoreTokens()){
            context.write(new Text(stringTokenizer.nextToken()),new IntWritable(1));
        }
    }
}

Reducer:

package com.readdb;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WCReducer extends Reducer<Text,IntWritable,MyDBWriteable,NullWritable> {

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int count=0;
        for (IntWritable intWritable:values){
            count+=intWritable.get();
        }
        MyDBWriteable myDBWriteable=new MyDBWriteable();
        myDBWriteable.setWord(key.toString());
        myDBWriteable.setCount(count);
        context.write(myDBWriteable,NullWritable.get());
    }
}

Job提交：

package com.readdb;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;

public class HDFSDemo {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration=new Configuration();
        //配置作业
        Job job=Job.getInstance(configuration,"readdb");
        String[] otherArgs = new GenericOptionsParser(configuration, args).getRemainingArgs();
//        if (otherArgs.length != 1) {
//            System.err.println("Usage: wordcount <out>");
//            System.exit(2);
//        }else {
//            FileSystem.get(configuration).delete(new Path(otherArgs[0]));
//        }
        //配置数据库信息
        String driverClass="com.mysql.jdbc.Driver";
        String url="jdbc:mysql://192.168.244.3:3306/BigData?useUnicode=true&characterEncoding=utf8&useSSL=false";
        String userName="root";
        String password="123456";
        String querySelect="select * from author";
        String queryCount="select count(*) from author";
        //配置数据库
        DBConfiguration.configureDB(job.getConfiguration(),driverClass,url,userName,password);
        //配置Mapper数据输入
        DBInputFormat.setInput(job,MyDBWriteable.class,querySelect,queryCount);
        //向数据库表写入数据
        DBOutputFormat.setOutput(job,"wr_record","word","count");
        //设置搜索类
        job.setJarByClass(HDFSDemo.class);
        //设置输入格式,TextInputFormat是默认输入格式,不能设置成FileInputFormat.Class,该惨数在当前情况下可以不设置
        job.setInputFormatClass(DBInputFormat.class);
        //设置Mapper类
        job.setMapperClass(WCMapper.class);
        //设置Reducer类
        job.setReducerClass(WCReducer.class);
        //设置Reducer个数
        //job.setNumReduceTasks(1);
        //设置maper端单词输出格式
        job.setMapOutputKeyClass(Text.class);
        //设置mapper端单词输出个数格式
        job.setMapOutputValueClass(IntWritable.class);
        //设置Reducer端单词输出格式
        job.setOutputKeyClass(Text.class);
        //设置Reducer单词输出个数格式
        job.setOutputValueClass(IntWritable.class);
        //设置job的输入路径,多次add可以设置多个输入路径
        //FileInputFormat.addInputPath(job,new Path(otherArgs[0]));
        //设置输出路径
        //FileOutputFormat.setOutputPath(job,new Path(otherArgs[0]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}

由于是从数据库读取数据，并将计算结果写入到数据表里，因此该工程不用配置输入与输出路径。

编译运行工程，然后进入mysql数据库查看输出结果，可以看到它是与hadoop读取数据库内容该文讲的读取结果一致

该工程计算结果：