创建表:
DROP TABLE IF EXISTS `sqooptest`.`lxw_tabls`;
CREATE TABLE `sqooptest`.`lxw_tabls` (
`TBL_NAME` varchar(20) default NULL,
`TBL_TYPE` varchar(20) default NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
创建hdfs输入文件:
[hadoop@slave-245 file]$ vi test3.txt
abc x
def y
chd z
[hadoop@slave-245 file]$ hadoop fs -mkdir in
Warning: $HADOOP_HOME is deprecated.
[hadoop@slave-245 file]$ hadoop fs -ls
Warning: $HADOOP_HOME is deprecated.
Found 1 items
drwxr-xr-x - hadoop supergroup 0 2013-12-04 17:03 /user/hadoop/in
[hadoop@slave-245 file]$ hadoop fs -put test3.txt in
Warning: $HADOOP_HOME is deprecated.
[hadoop@slave-245 file]$ hadoop fs -ls
Warning: $HADOOP_HOME is deprecated.
Found 1 items
drwxr-xr-x - hadoop supergroup 0 2013-12-04 17:03 /user/hadoop/in
[hadoop@slave-245 file]$ hadoop fs -ls in
Warning: $HADOOP_HOME is deprecated.
Found 1 items
-rw-r--r-- 1 hadoop supergroup 18 2013-12-04 17:03 /user/hadoop/in/test3.txt
[hadoop@slave-245 file]$ hadoop fs -mkdir jar
Warning: $HADOOP_HOME is deprecated.
[hadoop@slave-245 file]$ hadoop fs -mkdir ls
Warning: $HADOOP_HOME is deprecated.
[hadoop@slave-245 file]$ hadoop fs -ls
Warning: $HADOOP_HOME is deprecated.
Found 3 items
drwxr-xr-x - hadoop supergroup 0 2013-12-04 17:03 /user/hadoop/in
drwxr-xr-x - hadoop supergroup 0 2013-12-04 17:47 /user/hadoop/jar
drwxr-xr-x - hadoop supergroup 0 2013-12-04 17:47 /user/hadoop/ls
[hadoop@slave-245 file]$ hadoop fs -rmr ls
Warning: $HADOOP_HOME is deprecated.
Deleted hdfs://slave-245:9000/user/hadoop/ls
[hadoop@slave-245 file]$ hadoop fs -ls
Warning: $HADOOP_HOME is deprecated.
Found 2 items
drwxr-xr-x - hadoop supergroup 0 2013-12-04 17:03 /user/hadoop/in
drwxr-xr-x - hadoop supergroup 0 2013-12-04 17:47 /user/hadoop/jar
[hadoop@slave-245 file]$ hadoop fs -put /usr/hadoop/lib/mysql-connector-java-5.1.6-bin.jar jar
Warning: $HADOOP_HOME is deprecated.
[hadoop@slave-245 file]$ hadoop fs -ls jar
Warning: $HADOOP_HOME is deprecated.
Found 1 items
-rw-r--r-- 1 hadoop supergroup 703265 2013-12-04 17:48 /user/hadoop/jar/mysql-connector-java-5.1.6-bin.jar
程序代码:
package dbinput;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.File;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Iterator;
import mapr.EJob;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
/**
* 将mapreduce的结果数据写入mysql中
*
* @author administrator
*
*/
public class WriteDataToMysql {
/**
* 重写DBWritable
*
* @author asheng TblsWritable需要向mysql中写入数据
*/
public static class TblsWritable implements Writable, DBWritable {
String tbl_name;
String tbl_type;
public TblsWritable() {
}
public TblsWritable(String tbl_name, String tab_type) {
this.tbl_name = tbl_name;
this.tbl_type = tab_type;
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
this.tbl_name = resultSet.getString(1);
this.tbl_type = resultSet.getString(2);
}
@Override
public void write(PreparedStatement statement) throws SQLException {
statement.setString(1, this.tbl_name);
statement.setString(2, this.tbl_type);
}
@Override
public void readFields(DataInput in) throws IOException {
this.tbl_name = in.readUTF();
this.tbl_type = in.readUTF();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(this.tbl_name);
out.writeUTF(this.tbl_type);
}
public String toString() {
return new String(this.tbl_name + " " + this.tbl_type);
}
}
public static class ConnMysqlMapper extends
Mapper<LongWritable, Text, Text, Text> {
// TblsRecord是自定义的类型,也就是上面重写的DBWritable类
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// <首字母偏移量,该行内容>接收进来,然后处理value,将abc和x作为map的输出
// key对于本程序没有太大的意义,没有使用
String name = value.toString().split(" ")[0];
String type = value.toString().split(" ")[1];
context.write(new Text(name), new Text(type));
}
}
public static class ConnMysqlReducer extends
Reducer<Text, Text, TblsWritable, TblsWritable> {
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
// 接收到的key value对即为要输入数据库的字段,所以在reduce中:
// wirte的第一个参数,类型是自定义类型TblsWritable,利用key和value将其组合成TblsWritable,
// wirte的第二个参数,wirte的第一个参数已经涵盖了要输出的类型,所以第二个类型没有用,设为null
for (Iterator<Text> itr = values.iterator(); itr.hasNext();) {
context.write(new TblsWritable(key.toString(), itr.next().toString()), null);
}
}
}
public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException {
File jarfile = EJob.createTempJar("bin");
EJob.addClasspath("usr/hadoop/conf");
ClassLoader classLoader = EJob.getClassLoader();
Thread.currentThread().setContextClassLoader(classLoader);
Configuration conf = new Configuration();
// DistributedCache.a
DistributedCache.addFileToClassPath(new Path(
"hdfs://172.30.1.245:9000/user/hadoop/jar/mysql-connector-java-5.1.6-bin.jar"), conf);
// 这句话很关键
conf.set("mapred.job.tracker", "172.30.1.245:9001");
DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",
"jdbc:mysql://172.30.1.245:3306/sqooptest", "sqoop", "sqoop");
Job job = new Job(conf, "test mysql connection");
((JobConf)job.getConfiguration()).setJar(jarfile.toString());
// job.setJarByClass(WriteDataToMysql.class);
job.setMapperClass(ConnMysqlMapper.class);
job.setReducerClass(ConnMysqlReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(DBOutputFormat.class);
FileInputFormat.addInputPath(job, new Path("hdfs://172.30.1.245:9000/user/hadoop/in"));
DBOutputFormat.setOutput(job, "lxw_tabls", "TBL_NAME", "TBL_TYPE");
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
运行结果:
mysql> use sqooptest;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
Database changed
mysql> show tables;
+---------------------+
| Tables_in_sqooptest |
+---------------------+
| lxw_tabls |
| tb1 |
| wordcount |
+---------------------+
3 rows in set (0.00 sec)
mysql> select * from lxw_tables;
ERROR 1146 (42S02): Table 'sqooptest.lxw_tables' doesn't exist
mysql> select * from lxw_tabls;
+----------+----------+
| TBL_NAME | TBL_TYPE |
+----------+----------+
| abc | x |
| chd | z |
| def | y |
+----------+----------+
3 rows in set (0.00 sec)