MapReduce直接连接Mysql获取数据（新API写法）

最新推荐文章于 2021-02-07 02:24:37 发布

我爱大海V5

最新推荐文章于 2021-02-07 02:24:37 发布

阅读量1.6k

点赞数 1

分类专栏： Hadoop 文章标签： mapreduce

本文链接：https://blog.csdn.net/whb123andy/article/details/17122977

版权

Hadoop 专栏收录该内容

39 篇文章 0 订阅

订阅专栏

创建表：

DROP TABLE IF EXISTS `sqooptest`.`lxw_tabls`;
CREATE TABLE `sqooptest`.`lxw_tabls` (
`TBL_NAME` varchar(20) default NULL,
`TBL_TYPE` varchar(20) default NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

创建hdfs输入文件:

[hadoop@slave-245 file]$ vi test3.txt
abc x
def y
chd z

[hadoop@slave-245 file]$ hadoop fs -mkdir in
Warning: $HADOOP_HOME is deprecated.

[hadoop@slave-245 file]$ hadoop fs -ls
Warning: $HADOOP_HOME is deprecated.

Found 1 items
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:03 /user/hadoop/in
[hadoop@slave-245 file]$ hadoop fs -put test3.txt in
Warning: $HADOOP_HOME is deprecated.

[hadoop@slave-245 file]$ hadoop fs -ls
Warning: $HADOOP_HOME is deprecated.

Found 1 items
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:03 /user/hadoop/in
[hadoop@slave-245 file]$ hadoop fs -ls in
Warning: $HADOOP_HOME is deprecated.

Found 1 items
-rw-r--r--   1 hadoop supergroup         18 2013-12-04 17:03 /user/hadoop/in/test3.txt
[hadoop@slave-245 file]$ hadoop fs -mkdir jar
Warning: $HADOOP_HOME is deprecated.

[hadoop@slave-245 file]$ hadoop fs -mkdir ls
Warning: $HADOOP_HOME is deprecated.

[hadoop@slave-245 file]$ hadoop fs -ls
Warning: $HADOOP_HOME is deprecated.

Found 3 items
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:03 /user/hadoop/in
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:47 /user/hadoop/jar
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:47 /user/hadoop/ls
[hadoop@slave-245 file]$ hadoop fs -rmr ls
Warning: $HADOOP_HOME is deprecated.

Deleted hdfs://slave-245:9000/user/hadoop/ls
[hadoop@slave-245 file]$ hadoop fs -ls
Warning: $HADOOP_HOME is deprecated.

Found 2 items
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:03 /user/hadoop/in
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:47 /user/hadoop/jar
[hadoop@slave-245 file]$ hadoop fs -put /usr/hadoop/lib/mysql-connector-java-5.1.6-bin.jar jar
Warning: $HADOOP_HOME is deprecated.

[hadoop@slave-245 file]$ hadoop fs -ls jar
Warning: $HADOOP_HOME is deprecated.

Found 1 items
-rw-r--r--   1 hadoop supergroup     703265 2013-12-04 17:48 /user/hadoop/jar/mysql-connector-java-5.1.6-bin.jar

程序代码：

package dbinput;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Iterator;

import mapr.EJob;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

/**
 * 将mapreduce的结果数据写入mysql中
 * 
 * @author administrator
 * 
 */
public class WriteDataToMysql {
	/**
	 * 重写DBWritable
	 * 
	 * @author asheng TblsWritable需要向mysql中写入数据
	 */
	public static class TblsWritable implements Writable, DBWritable {
		String tbl_name;
		String tbl_type;

		public TblsWritable() {

		}

		public TblsWritable(String tbl_name, String tab_type) {
			this.tbl_name = tbl_name;
			this.tbl_type = tab_type;
		}

		@Override
		public void readFields(ResultSet resultSet) throws SQLException {
			this.tbl_name = resultSet.getString(1);
			this.tbl_type = resultSet.getString(2);
		}

		@Override
		public void write(PreparedStatement statement) throws SQLException {
			statement.setString(1, this.tbl_name);
			statement.setString(2, this.tbl_type);
		}

		@Override
		public void readFields(DataInput in) throws IOException {
			this.tbl_name = in.readUTF();
			this.tbl_type = in.readUTF();
		}

		@Override
		public void write(DataOutput out) throws IOException {
			out.writeUTF(this.tbl_name);
			out.writeUTF(this.tbl_type);
		}

		public String toString() {
			return new String(this.tbl_name + " " + this.tbl_type);
		}
	}

	public static class ConnMysqlMapper extends
			Mapper<LongWritable, Text, Text, Text> {
		// TblsRecord是自定义的类型，也就是上面重写的DBWritable类
		public void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			// <首字母偏移量,该行内容>接收进来，然后处理value，将abc和x作为map的输出
			// key对于本程序没有太大的意义，没有使用
			String name = value.toString().split(" ")[0];
			String type = value.toString().split(" ")[1];
			context.write(new Text(name), new Text(type));
		}
	}

	public static class ConnMysqlReducer extends
			Reducer<Text, Text, TblsWritable, TblsWritable> {
		public void reduce(Text key, Iterable<Text> values, Context context)
				throws IOException, InterruptedException {
			// 接收到的key value对即为要输入数据库的字段，所以在reduce中：
			// wirte的第一个参数，类型是自定义类型TblsWritable，利用key和value将其组合成TblsWritable，
			// wirte的第二个参数，wirte的第一个参数已经涵盖了要输出的类型，所以第二个类型没有用，设为null
			for (Iterator<Text> itr = values.iterator(); itr.hasNext();) {
				context.write(new TblsWritable(key.toString(), itr.next().toString()), null);
			}
		}
	}

	public static void main(String[] args) throws IOException,
			InterruptedException, ClassNotFoundException {
		File jarfile = EJob.createTempJar("bin");
		EJob.addClasspath("usr/hadoop/conf");

		ClassLoader classLoader = EJob.getClassLoader();
		Thread.currentThread().setContextClassLoader(classLoader);
		
		Configuration conf = new Configuration();
//		DistributedCache.a
		DistributedCache.addFileToClassPath(new Path(
				"hdfs://172.30.1.245:9000/user/hadoop/jar/mysql-connector-java-5.1.6-bin.jar"), conf);
		// 这句话很关键
		conf.set("mapred.job.tracker", "172.30.1.245:9001");
		DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",
				"jdbc:mysql://172.30.1.245:3306/sqooptest", "sqoop", "sqoop");
		Job job = new Job(conf, "test mysql connection");
		((JobConf)job.getConfiguration()).setJar(jarfile.toString());
//		job.setJarByClass(WriteDataToMysql.class);

		job.setMapperClass(ConnMysqlMapper.class);
		job.setReducerClass(ConnMysqlReducer.class);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(DBOutputFormat.class);
		FileInputFormat.addInputPath(job, new Path("hdfs://172.30.1.245:9000/user/hadoop/in"));

		DBOutputFormat.setOutput(job, "lxw_tabls", "TBL_NAME", "TBL_TYPE");
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}

运行结果：

mysql> use sqooptest;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A

Database changed
mysql> show tables;
+---------------------+
| Tables_in_sqooptest |
+---------------------+
| lxw_tabls           |
| tb1                 |
| wordcount           |
+---------------------+
3 rows in set (0.00 sec)

mysql> select * from lxw_tables;
ERROR 1146 (42S02): Table 'sqooptest.lxw_tables' doesn't exist
mysql> select * from lxw_tabls;
+----------+----------+
| TBL_NAME | TBL_TYPE |
+----------+----------+
| abc      | x        |
| chd      | z        |
| def      | y        |
+----------+----------+
3 rows in set (0.00 sec)