【第22期】观点:IT 行业加班,到底有没有价值?

MapReduce直接连接Mysql获取数据 (新API写法)

原创 2013年12月04日 18:09:43

创建表:

DROP TABLE IF EXISTS `sqooptest`.`lxw_tabls`;
CREATE TABLE `sqooptest`.`lxw_tabls` (
`TBL_NAME` varchar(20) default NULL,
`TBL_TYPE` varchar(20) default NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

创建hdfs输入文件:

[hadoop@slave-245 file]$ vi test3.txt
abc x
def y
chd z

[hadoop@slave-245 file]$ hadoop fs -mkdir in
Warning: $HADOOP_HOME is deprecated.

[hadoop@slave-245 file]$ hadoop fs -ls
Warning: $HADOOP_HOME is deprecated.

Found 1 items
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:03 /user/hadoop/in
[hadoop@slave-245 file]$ hadoop fs -put test3.txt in
Warning: $HADOOP_HOME is deprecated.

[hadoop@slave-245 file]$ hadoop fs -ls
Warning: $HADOOP_HOME is deprecated.

Found 1 items
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:03 /user/hadoop/in
[hadoop@slave-245 file]$ hadoop fs -ls in
Warning: $HADOOP_HOME is deprecated.

Found 1 items
-rw-r--r--   1 hadoop supergroup         18 2013-12-04 17:03 /user/hadoop/in/test3.txt
[hadoop@slave-245 file]$ hadoop fs -mkdir jar
Warning: $HADOOP_HOME is deprecated.

[hadoop@slave-245 file]$ hadoop fs -mkdir ls
Warning: $HADOOP_HOME is deprecated.

[hadoop@slave-245 file]$ hadoop fs -ls
Warning: $HADOOP_HOME is deprecated.

Found 3 items
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:03 /user/hadoop/in
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:47 /user/hadoop/jar
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:47 /user/hadoop/ls
[hadoop@slave-245 file]$ hadoop fs -rmr ls
Warning: $HADOOP_HOME is deprecated.

Deleted hdfs://slave-245:9000/user/hadoop/ls
[hadoop@slave-245 file]$ hadoop fs -ls
Warning: $HADOOP_HOME is deprecated.

Found 2 items
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:03 /user/hadoop/in
drwxr-xr-x   - hadoop supergroup          0 2013-12-04 17:47 /user/hadoop/jar
[hadoop@slave-245 file]$ hadoop fs -put /usr/hadoop/lib/mysql-connector-java-5.1.6-bin.jar jar
Warning: $HADOOP_HOME is deprecated.

[hadoop@slave-245 file]$ hadoop fs -ls jar
Warning: $HADOOP_HOME is deprecated.

Found 1 items
-rw-r--r--   1 hadoop supergroup     703265 2013-12-04 17:48 /user/hadoop/jar/mysql-connector-java-5.1.6-bin.jar

程序代码:

package dbinput;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Iterator;

import mapr.EJob;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

/**
 * 将mapreduce的结果数据写入mysql中
 * 
 * @author administrator
 * 
 */
public class WriteDataToMysql {
	/**
	 * 重写DBWritable
	 * 
	 * @author asheng TblsWritable需要向mysql中写入数据
	 */
	public static class TblsWritable implements Writable, DBWritable {
		String tbl_name;
		String tbl_type;

		public TblsWritable() {

		}

		public TblsWritable(String tbl_name, String tab_type) {
			this.tbl_name = tbl_name;
			this.tbl_type = tab_type;
		}

		@Override
		public void readFields(ResultSet resultSet) throws SQLException {
			this.tbl_name = resultSet.getString(1);
			this.tbl_type = resultSet.getString(2);
		}

		@Override
		public void write(PreparedStatement statement) throws SQLException {
			statement.setString(1, this.tbl_name);
			statement.setString(2, this.tbl_type);
		}

		@Override
		public void readFields(DataInput in) throws IOException {
			this.tbl_name = in.readUTF();
			this.tbl_type = in.readUTF();
		}

		@Override
		public void write(DataOutput out) throws IOException {
			out.writeUTF(this.tbl_name);
			out.writeUTF(this.tbl_type);
		}

		public String toString() {
			return new String(this.tbl_name + " " + this.tbl_type);
		}
	}

	public static class ConnMysqlMapper extends
			Mapper<LongWritable, Text, Text, Text> {
		// TblsRecord是自定义的类型,也就是上面重写的DBWritable类
		public void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			// <首字母偏移量,该行内容>接收进来,然后处理value,将abc和x作为map的输出
			// key对于本程序没有太大的意义,没有使用
			String name = value.toString().split(" ")[0];
			String type = value.toString().split(" ")[1];
			context.write(new Text(name), new Text(type));
		}
	}

	public static class ConnMysqlReducer extends
			Reducer<Text, Text, TblsWritable, TblsWritable> {
		public void reduce(Text key, Iterable<Text> values, Context context)
				throws IOException, InterruptedException {
			// 接收到的key value对即为要输入数据库的字段,所以在reduce中:
			// wirte的第一个参数,类型是自定义类型TblsWritable,利用key和value将其组合成TblsWritable,
			// wirte的第二个参数,wirte的第一个参数已经涵盖了要输出的类型,所以第二个类型没有用,设为null
			for (Iterator<Text> itr = values.iterator(); itr.hasNext();) {
				context.write(new TblsWritable(key.toString(), itr.next().toString()), null);
			}
		}
	}

	public static void main(String[] args) throws IOException,
			InterruptedException, ClassNotFoundException {
		File jarfile = EJob.createTempJar("bin");
		EJob.addClasspath("usr/hadoop/conf");

		ClassLoader classLoader = EJob.getClassLoader();
		Thread.currentThread().setContextClassLoader(classLoader);
		
		Configuration conf = new Configuration();
//		DistributedCache.a
		DistributedCache.addFileToClassPath(new Path(
				"hdfs://172.30.1.245:9000/user/hadoop/jar/mysql-connector-java-5.1.6-bin.jar"), conf);
		// 这句话很关键
		conf.set("mapred.job.tracker", "172.30.1.245:9001");
		DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",
				"jdbc:mysql://172.30.1.245:3306/sqooptest", "sqoop", "sqoop");
		Job job = new Job(conf, "test mysql connection");
		((JobConf)job.getConfiguration()).setJar(jarfile.toString());
//		job.setJarByClass(WriteDataToMysql.class);

		job.setMapperClass(ConnMysqlMapper.class);
		job.setReducerClass(ConnMysqlReducer.class);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);

		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(DBOutputFormat.class);
		FileInputFormat.addInputPath(job, new Path("hdfs://172.30.1.245:9000/user/hadoop/in"));

		DBOutputFormat.setOutput(job, "lxw_tabls", "TBL_NAME", "TBL_TYPE");
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}


运行结果:

mysql> use sqooptest;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A

Database changed
mysql> show tables;
+---------------------+
| Tables_in_sqooptest |
+---------------------+
| lxw_tabls           |
| tb1                 |
| wordcount           |
+---------------------+
3 rows in set (0.00 sec)

mysql> select * from lxw_tables;
ERROR 1146 (42S02): Table 'sqooptest.lxw_tables' doesn't exist
mysql> select * from lxw_tabls;
+----------+----------+
| TBL_NAME | TBL_TYPE |
+----------+----------+
| abc      | x        |
| chd      | z        |
| def      | y        |
+----------+----------+
3 rows in set (0.00 sec)


版权声明:本文为博主原创文章,未经博主允许不得转载。 举报

相关文章推荐

Hadoop 中利用 mapreduce 读写 mysql 数据

有时候我们在项目中会遇到输入结果集很大,但是输出结果很小,比如一些 pv、uv 数据,然后为了实时查询的需求,或者一些 OLAP 的需求,我们需要 mapreduce 与 mysql 进行数据的交互,...

一个程序员多年的收藏

2010 - 01 - 15 缩略显示 [置顶] 一个程序员的多年珍藏(1月23日最新更新) 文章分类:Java编程 程序员珍藏的东西会是什么?呵呵,除了平时写的代码,就是那些百看不...

程序员升职加薪指南!还缺一个“证”!

CSDN出品,立即查看!

MapReduce直接连接Mysql获取数据

Mysql中数据:   mysql&gt; select * from lxw_tbls; +---------------------+----------------+ | TBL_NAME | TBL_TYPE | +-------...

我的json获取数据实例,直接运行就可以看到了

开始一直用jq的ajax接收值也是字符但是没用过json数据。(所以一直都用切割的方法来做)听朋友们说JSon处理数据比较好,所以也加入了json这个行列了。快来看看吧。对AJAX比较接收处理数据比较好!力荐!... 来自( http://www.ok22.org/art_detail.aspx...

转的一杂谈

网站架构(页面静态化,图片服务器分离,负载均衡)方案全解析 文章分类:综合技术 1、HTML静态化其实大家都知道,效率最高、消耗最小的就是纯静态化的html页面,所以我们尽可能使我们的网站上的页面采用...
收藏助手
不良信息举报
您举报文章:深度学习:神经网络中的前向传播和反向传播算法推导
举报原因:
原因补充:

(最多只允许输入30个字)