二十(续)、自定义Inputformat读取mysql中的数据

分为两种加载方式:一种整个数据切片,一种是按照指定数据条数切片有点类似NLine方式

下面就来进行代码实操:读取mysql数据写入写入本地文件不做计算,不需要用到redece阶段,只需map即可

1、既然是加载mysql必然少不了数据库访问
添加mysql连接依赖
   <dependency>
       <groupId>mysql</groupId>
       <artifactId>mysql-connector-java</artifactId>
       <version>5.1.47</version>
   </dependency>
2、JDBC编写
public class JDBCUtil {
    private static final String DRIVER = "com.mysql.jdbc.Driver";
    private static final String URL = "jdbc:mysql://localhost:3306/xxx?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true";
    private static final String USERNAME = "root";
    private static final String PASSWORD = "root";

    public static Connection getConnection() {
        Connection conn = null;
        try {
            Class.forName(DRIVER);
            conn = DriverManager.getConnection(URL, USERNAME, PASSWORD);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return conn;
    }

    public static Integer queryCount(){
        Connection conn = JDBCUtil.getConnection();
        Integer count = 0;
        String sql = "SELECT COUNT(*) FROM t_order";
        try {
            PreparedStatement pre = conn.prepareStatement(sql);
            ResultSet res = pre.executeQuery();
            res.next();
            count = res.getInt(1);
        } catch (SQLException e) {
            e.printStackTrace();
        }finally {
            try {
                conn.close();
            } catch (SQLException e) {
                e.printStackTrace();
            }
        }
        return count;
    }
}
3、数据封装实体类-序列化
package com.cjy.mr.mysqlformat;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Order implements WritableComparable<Order> {

    private String id;
    private String userCode;
    private String productCode;
    private Integer value;

    public Order() {

    }

    public Order(String id, String userCode, String productCode, Integer value) {
        this.id = id;
        this.userCode = userCode;
        this.productCode = productCode;
        this.value = value;
    }


    public void setInfo(String id, String userCode, String productCode, Integer value){
        this.id = id;
        this.userCode = userCode;
        this.productCode = productCode;
        this.value = value;
    }
    public String getId() {
        return id;
    }

    public void setId(String id) {
        this.id = id;
    }

    public String getUserCode() {
        return userCode;
    }

    public void setUserCode(String userCode) {
        this.userCode = userCode;
    }

    public String getProductCode() {
        return productCode;
    }

    public void setProductCode(String productCode) {
        this.productCode = productCode;
    }

    public Integer getValue() {
        return value;
    }

    public void setValue(Integer value) {
        this.value = value;
    }



    //排序
    @Override
    public int compareTo(Order o) {
        int res=0;
        if(userCode.compareTo(o.getUserCode()) > 0){
            res= 1;
        }else if(userCode.compareTo(o.getUserCode()) < 0){
            res= -1;
        }else{
            if(value > o.getValue()){
                res= 1;
            }
        }
        return res;
    }

    //序列化操作
    @Override
    public void write(DataOutput out) throws IOException {
            out.writeUTF(id);
            out.writeUTF(userCode);
            out.writeUTF(productCode);
            out.writeInt(value);
    }
    @Override
    public void readFields(DataInput in) throws IOException {
        id = in.readUTF();
        userCode = in.readUTF();
        productCode = in.readUTF();
        value = in.readInt();
    }

    @Override
    public String toString() {
        return id+"\t"+userCode+"\t"+productCode+"\t"+value;
    }
}

4、自定义Inputformat
package com.cjy.mr.mysqlformat;

import com.sun.tools.corba.se.idl.constExpr.Or;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.*;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

//1.继承InputFormat
public class MysqlInputFormat extends InputFormat<Order, NullWritable> {


    //2.修改切片规则,这里是全加载
    public static class MySqlInputSplit extends InputSplit implements Writable {

        private long start;
        private long end;

        public MySqlInputSplit() {

        }

        public MySqlInputSplit(long start, long end) {
            this.start = start;
            this.end = end;
        }

        public long getStart() {
            return start;
        }

        public void setStart(long start) {
            this.start = start;
        }

        public long getEnd() {
            return end;
        }

        public void setEnd(long end) {
            this.end = end;
        }

        //先写出,先读进
        @Override
        public void write(DataOutput out) throws IOException {
            out.writeLong(this.start);
            out.writeLong(this.end);
        }

        //先写出,先读进
        @Override
        public void readFields(DataInput in) throws IOException {
            this.start = in.readLong();
            this.end = in.readLong();
        }

        @Override
        public long getLength() throws IOException, InterruptedException {
            return this.end - this.start;
        }

        @Override
        public String[] getLocations() throws IOException, InterruptedException {
            return new String[0];
        }


    }

    //3,定义数据加载规则
    private class RecordReaderMysqlData extends RecordReader<Order, NullWritable> {

        private Connection conn = null; //数据库连接
        private List<Order> list = null; //初始化数据,从mysql中读取数据
        private Order key = null; //每次写出的key
        private NullWritable value = null; //每次写出的v
        private Integer index = 0; //用于终止迭代数据

        public RecordReaderMysqlData() throws IOException, InterruptedException {

        }

        public RecordReaderMysqlData(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
            super();
            initialize(split, context);
        }

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
            //初始化,由于测试一次数据加载,所以不用你切片信息
            this.key =  new Order();
            this.value = NullWritable.get();
        }
        //将数据写出
        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            //加载数据
            if(list == null){
                list = new ArrayList();
                conn = JDBCUtil.getConnection();
                //从mysql中加载数据
                String sql = "SELECT id,user_code,product_code,value FROM t_order;";
                try {
   PreparedStatement pre = conn.prepareStatement(sql);
       ResultSet resultSet = pre.executeQuery();
       while (resultSet.next()) {
           String id = resultSet.getString(1);
           String usercode = resultSet.getString(2);
           String productCode = resultSet.getString(3);
           String value = resultSet.getString(4);
           Order  order = new Order(id, usercode,productCode,Integer.parseInt(value));
           list.add(order);
       }
                    resultSet.close();
                    pre.close();
                } catch (SQLException e) {
                    e.printStackTrace();
                }
            }
            
            //将数据写出
            if (index < list.size()) {
                key = list.get(index);
                index++;
                return true;
            }
            //返回结果等于false时,数据读入结束,所以需要list中所有数据取出才行
            return false;
        }

        @Override
        public Order getCurrentKey() throws IOException, InterruptedException {
            return key;
        }

        @Override
        public NullWritable getCurrentValue() throws IOException, InterruptedException {
            return value;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }
        //关闭资源
        @Override
        public void close() throws IOException {
            if (conn != null) {
                try {
                    conn.close();
                } catch (SQLException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    //重写切片回去函数
    @Override
    public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
        List<InputSplit> splits = new ArrayList<InputSplit>();
        MySqlInputSplit inputSplit = new MySqlInputSplit();
        splits.add(inputSplit);
        return splits;
    }
    //重写数据加载方式
    @Override
    public RecordReader<Order, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
        return new RecordReaderMysqlData(split, context);
    }
}

5、Mapper
public class MysqlMapper extends Mapper<Order,NullWritable,Order,NullWritable> {
    @Override
    protected void map(Order key, NullWritable value, Context context) throws IOException, InterruptedException {
        context.write(key,value);
        }
}

7、driver

public class MysqlDriver {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        // 1 获取配置信息以及封装任务
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);

        // 2 设置jar加载路径
        job.setJarByClass(MysqlDriver.class);

        // 3 设置map和reduce类
        job.setMapperClass(MysqlMapper.class);

        // 4 设置map输出
        job.setMapOutputKeyClass(Order.class);
        job.setMapOutputValueClass(NullWritable.class);

        // 6 设置输入和输出路径
        //读取mysql中的数据写入到文件中
job.setInputFormatClass(MysqlInputFormatSplit.class);

        FileOutputFormat.setOutputPath(job, new Path("d:/out2"));

        // 7 提交
        boolean result = job.waitForCompletion(true);
        System.exit(result ? 0 : 1);
    }
}
6、测试结果
切片信息:
2020-05-14 14:42:46,942 WARN [org.apache.hadoop.mapreduce.JobResourceUploader] - No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
2020-05-14 14:42:46,981 INFO [org.apache.hadoop.mapreduce.JobSubmitter] - number of splits:1

输入数据
Map-Reduce Framework
Map input records=20

在这里插入图片描述

处理结果

在这里插入图片描述

7、切片方式的MysqlInputformap
import com.sun.tools.corba.se.idl.constExpr.Or;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.*;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;

//1.继承InputFormat
public class MysqlInputFormat extends InputFormat<Order, NullWritable> {


    //2.修改切片规则,这里是全加载
    public static class MySqlInputSplit extends InputSplit implements Writable {

        private long start;
        private long end;

        public MySqlInputSplit() {

        }

        public MySqlInputSplit(long start, long end) {
            this.start = start;
            this.end = end;
        }

        public long getStart() {
            return start;
        }

        public void setStart(long start) {
            this.start = start;
        }

        public long getEnd() {
            return end;
        }

        public void setEnd(long end) {
            this.end = end;
        }

        //先写出,先读进
        @Override
        public void write(DataOutput out) throws IOException {
            out.writeLong(this.start);
            out.writeLong(this.end);
        }

        //先写出,先读进
        @Override
        public void readFields(DataInput in) throws IOException {
            this.start = in.readLong();
            this.end = in.readLong();
        }

        @Override
        public long getLength() throws IOException, InterruptedException {
            return this.end - this.start;
        }

        @Override
        public String[] getLocations() throws IOException, InterruptedException {
            return new String[0];
        }


    }

    //3,定义数据加载规则
    private class RecordReaderMysqlData extends RecordReader<Order, NullWritable> {

        private Connection conn = null; //数据库连接
        private List<Order> list = null; //初始化数据,从mysql中读取数据
        private Order key = null; //每次写出的key
        private NullWritable value = null; //每次写出的v
        private Integer index = 0; //用于终止迭代数据

        public RecordReaderMysqlData() throws IOException, InterruptedException {

        }

        public RecordReaderMysqlData(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
            super();
            initialize(split, context);
        }

        @Override
        public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
            //初始化,由于测试一次数据加载,所以不用切片信息
            //假设没有数据时,null会报错,所以初始化kv
            this.key =  new Order();
            this.value = NullWritable.get();
        }
        //将数据写出
        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            //加载数据
            if(list == null){
                list = new ArrayList();
                conn = JDBCUtil.getConnection();
                //从mysql中加载数据
                String sql = "SELECT id,user_code,product_code,value FROM t_order;";
                try {
                    PreparedStatement pre = conn.prepareStatement(sql);
                    ResultSet resultSet = pre.executeQuery();
                    while (resultSet.next()) {
                        String id = resultSet.getString(1);
                        String usercode = resultSet.getString(2);
                        String productCode = resultSet.getString(3);
                        String value = resultSet.getString(4);
                        Order  order = new Order(id, usercode,productCode,Integer.parseInt(value));
                        list.add(order);
                    }
                    resultSet.close();
                    pre.close();
                } catch (SQLException e) {
                    e.printStackTrace();
                }
            }

            //将数据写出
            if (index < list.size()) {
                key = list.get(index);
                index++;
                return true;
            }
            //返回结果等于false时,数据读入结束,所以需要list中所有数据取出才行
            return false;
        }

        @Override
        public Order getCurrentKey() throws IOException, InterruptedException {
            return key;
        }

        @Override
        public NullWritable getCurrentValue() throws IOException, InterruptedException {
            return value;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }
        //关闭资源
        @Override
        public void close() throws IOException {
            if (conn != null) {
                try {
                    conn.close();
                } catch (SQLException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    //重写切片回去函数
    @Override
    public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
        List<InputSplit> splits = new ArrayList<InputSplit>();
        MySqlInputSplit inputSplit = new MySqlInputSplit();
        splits.add(inputSplit);
        return splits;
    }
    //重写数据加载方式
    @Override
    public RecordReader<Order, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
        return new RecordReaderMysqlData(split, context);
    }
}

8、切片调用

使用时,只需将此位置改变即可

// 6 设置输入和输出路径
//读取mysql中的数据写入到文件中
//        job.setInputFormatClass(MysqlInputFormat.class);
//带切片格式
job.setInputFormatClass(MysqlInputFormatSplit.class);

FileOutputFormat.setOutputPath(job, new Path("d:/out2"));

9、结果
一共二十条数据,按照十条一个切片,如下信息开启了两个maptask

Thu May 14 15:16:20 CST 2020 WARN: Establishing SSL connection without server's identity verification is not recommended. According to MySQL 5.5.45+, 5.6.26+ and 5.7.6+ requirements SSL connection must be established by default if explicit option isn't set. For compliance with existing applications not using SSL the verifyServerCertificate property is set to 'false'. You need either to explicitly disable SSL by setting useSSL=false, or set useSSL=true and provide truststore for server certificate verification.
2020-05-14 15:16:20,691 INFO [org.apache.hadoop.mapreduce.JobSubmitter] - number of splits:2
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值