用hadoop的API从数据库读取数据到HDFS
package com.avgdate;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class T extends Configured {
static String driverClassName = "oracle.jdbc.driver.OracleDriver";
static String url = "jdbc:oracle:thin:@10.10.35.65:1521/crm_standby";
static String username = "crmdev";
static String password = "crmdev";
static class AccessRecord implements DBWritable, Writable {
long clubid;
String clubname;
Date daytime;
@Override
public void write(PreparedStatement statement) throws SQLException {
statement.setLong(1, clubid);
statement.setString(2, clubname);
statement.setDate(3, daytime);
System.out.println("PreparedStatement");
}
@Override
public void readFields(ResultSet resultSet) throws SQLException { //(1) SQL 从数据库查询数据
this.clubid = resultSet.getLong(1);
this.clubname=resultSet.getString(2);
this.daytime=resultSet.getDate(3);
System.out.println("ResultSet");
}
@Override
public void write(DataOutput out) throws IOException { //(2) Mapper 将查询的数据记录 输出到Combiner
out.writeLong(clubid);
Text.writeString(out,clubname);
out.writeLong(daytime.getTime()); //以时间戳格式输出到hdfs
System.out.println("DataOutput");
}
@Override
public void readFields(DataInput in) throws IOException { //(3) Reduce 从Combiner 读取数据
this.clubid = in.readLong();
this.clubname = Text.readString(in);
this.daytime =new Date( in.readLong());
System.out.println("DataInput");
}
public String toString() { //(4) 写入HDFS文件
System.out.println("===");
return new String(this.clubid+"\t"+this.clubname+"\t"+this.daytime);
}
}
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws Exception {
Long timpt = new Date().getTime();
if (args.length != 2) {
args = new String[2];
args[0] = " "; // 此参数无用
args[1] = "d:\\output\\db\\" + timpt;
}
Configuration conf = new Configuration();
DBConfiguration.configureDB(conf, driverClassName, url, username, password);
Job job = new Job(conf, "t");
// 设置map 和 reduce
job.setInputFormatClass(DBInputFormat.class);
DBInputFormat.setInput(job, AccessRecord.class, "SELECT clubid ,nickname,daytime FROM CUST_CLUB WHERE rownum<100", "SELECT count(clubid) FROM CUST_CLUB");
// DBInputFormat.setInput(job, AccessRecord.class, "CUST_CLUB",
// "rownum<10", "clubid", new String[] { "clubid"});
// 设置输出类型
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(AccessRecord.class);
// 临时目录
Path path = new Path(args[1] + "\\tmp");
// 设置输入输出路径
FileOutputFormat.setOutputPath(job, path);
// DBOutputFormat.setOutput(job, "tableName", "fieldNames");
job.waitForCompletion(true);
}
}