将MapReduce的结果存储到MySQL中

在前两天的一个小Demo中,我遇到了这样的一个需求。将MapReduce的结果存储到MySQL中。
一般情况下,如果我会直接将数据的结果写到文件中,然后通过直接查看文件来得知分析的结果。

 // 指定写出到哪里(写到HDFS上的文件中)
  FileOutputFormat.setOutputPath(job,"hdfs://master:9000/out.txt");

但现在,我需要将结果写到数据库中,以供之后其他程序获取这些结果,而那边定的是MySQL,所以就需要修改配置了。

首先在你的 job 中连接你的数据库

DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",  
                "jdbc:mysql://localhost:3306/test_mr?characterEncoding=UTF-8", "root", "password");

重写 DBWritable 和 OutputFormat
LoginOutputFormat:

package cloud.login;

import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class LoginOutputFormat<K extends DBWritable, V> extends
        OutputFormat<K, V> {

    public void checkOutputSpecs(JobContext context) throws IOException,
            InterruptedException {
    }

    public OutputCommitter getOutputCommitter(TaskAttemptContext context)
            throws IOException, InterruptedException {
        return new FileOutputCommitter(FileOutputFormat.getOutputPath(context),
                context);
    }

    public class DBRecordWriter extends RecordWriter<K, V> {
        private Connection connection;
        private PreparedStatement statement;

        public DBRecordWriter() throws SQLException {
        }

        public DBRecordWriter(Connection connection, PreparedStatement statement) throws SQLException {
            this.connection = connection;
            this.statement = statement;
            this.connection.setAutoCommit(false);
        }

        public Connection getConnection() {
            return connection;
        }

        public PreparedStatement getStatement() {
            return statement;
        }

        public void close(TaskAttemptContext context) throws IOException {
            try {
                statement.executeBatch();
                connection.commit();
            } catch (SQLException e) {
                try {
                    connection.rollback();
                } catch (SQLException ex) {
                }
                throw new IOException(e.getMessage());
            } finally {
                try {
                    statement.close();
                    connection.close();
                } catch (SQLException ex) {
                    throw new IOException(ex.getMessage());
                }
            }
        }

        public void write(K key, V value) throws IOException {
            try {
                key.write(statement);
                statement.addBatch();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
    }

    public String constructQuery(String table, String[] fieldNames) {
        if (fieldNames == null) {
            throw new IllegalArgumentException("Field names is null");
        }
        StringBuilder query = new StringBuilder();
        query.append("replace into ").append(table);  
        System.err.println("fieldNames.length:" + fieldNames.length);  
        if (fieldNames.length > 0) {  
            query.append(" (" + fieldNames[0] + ",");
            query.append(fieldNames[1] + ","); 
            query.append(fieldNames[2] + ")");  
            query.append(" values ");  
            query.append(" (?,?,?) ");  
            System.err.println(query.toString());  
            return query.toString();  
        } else {
            return null;
        }
    }
    public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
        DBConfiguration dbConf = new DBConfiguration(context.getConfiguration());
        String tableName = dbConf.getOutputTableName();
        String[] fieldNames = dbConf.getOutputFieldNames();
        if (fieldNames == null) {
            fieldNames = new String[dbConf.getOutputFieldCount()];
        }
        try {
            Connection connection = dbConf.getConnection();
            PreparedStatement statement = null;
            statement = connection.prepareStatement(constructQuery(tableName, fieldNames));
            return new DBRecordWriter(connection, statement);
        } catch (Exception ex) {
            throw new IOException(ex.getMessage());
        }
    }

    public static void setOutput(Job job, String tableName, String... fieldNames) throws IOException {
        if (fieldNames.length > 0 && fieldNames[0] != null) {
            DBConfiguration dbConf = setOutput(job, tableName);
            dbConf.setOutputFieldNames(fieldNames);
        } else {
            if (fieldNames.length > 0) {
                setOutput(job, tableName, fieldNames.length);
            } else {
                throw new IllegalArgumentException("Field names must be greater than 0");
            }
        }
    }

    public static void setOutput(Job job, String tableName, int fieldCount) throws IOException {
        DBConfiguration dbConf = setOutput(job, tableName);
        dbConf.setOutputFieldCount(fieldCount);
    }

    private static DBConfiguration setOutput(Job job, String tableName) throws IOException {
        job.setOutputFormatClass(LoginOutputFormat.class);
        job.setReduceSpeculativeExecution(false);
        DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
        dbConf.setOutputTableName(tableName);
        return dbConf;
    }
}

LoginWritable:

package cloud.login;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;

public class LoginWritable implements Writable, DBWritable {  
    String tbl_month_time;
    String tbl_day_time;
    int tbl_number;

    public LoginWritable() {  
    }  
    public LoginWritable(String month, String day, int number) {  
        this.tbl_day_time = day; 
        this.tbl_month_time = month;
        this.tbl_number = number;  
    }   
    public void write(PreparedStatement statement) throws SQLException {  
        statement.setString(1, this.tbl_month_time);
        statement.setString(2, this.tbl_day_time);
        statement.setInt(3, this.tbl_number);  
    }  
    public void readFields(ResultSet resultSet) throws SQLException {  
        this.tbl_month_time = resultSet.getString(1);
        this.tbl_day_time = resultSet.getString(2); 
        this.tbl_number = resultSet.getInt(3);  
    }  
    public void write(DataOutput out) throws IOException {  
        out.writeUTF(this.tbl_month_time);
        out.writeUTF(this.tbl_day_time);
        out.writeInt(this.tbl_number);  
    }  
    public void readFields(DataInput in) throws IOException {  
        this.tbl_month_time = in.readUTF();
        this.tbl_day_time = in.readUTF();
        this.tbl_number = in.readInt();  
    }  
    public String toString() {  
        return new String(this.tbl_month_time + " " + this.tbl_day_time + " " + this.tbl_number);  
    }  
}  

在job中设置输出格式

// 输出格式
job.setOutputFormatClass(LoginOutputFormat.class); 
//指明表名和字段名
LoginOutputFormat.setOutput(job, "day_number", "month", "day", "login");

这样你就可以完成了,运行这个程序,就将结果存储到MySQL中了

如果你想获得完整的代码和测试数据,请进入下方地址
码云: trendhu
GitHub: trendhu

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值