public class MYSQL_TO_HDFS
{
private final static Logger log = Logger.getLogger(KC86DATA_HDFS.class);
/*private static final String out_path = "hdfs:// IP:9000/home/kc86_intput";
private static final String driver = "oracle.jdbc.driver.OracleDriver";
private static final String url = "jdbc:oracle:thin:@IP:1521:orcl";
private static final String username = "CZ_ALL";
private static final String password = "123456";*/
//连接数据库的参数
private static final String out_path = "hdfs:// IP:9000/home/kc86_intput";
private static final String driver = "com.mysql.jdbc.Driver";
private static final String url = "jdbc:mysql://localhost:3306/zicontd";
private static final String username = "root";
private static final String password = "root";
/**
* 读取数据库表中返回的实体类
* */
public static class KC86Entity implements Writable, DBWritable
{
private String AAZ217;
private String AAC001;
/**
* 无参构造器
*/
public KC86Entity() {
super();
// TODO Auto-generated constructor stub
}
public KC86Entity(String aAZ217, String aAC001) {
super();
AAZ217 = aAZ217;
AAC001 = aAC001;
}
@Override
public String toString()
{
return "KC86Entity [AAZ217=" + AAZ217 + ", AAC001=" + AAC001 + "]";
}
@Override
public void write(PreparedStatement statement) throws SQLException {
// TODO Auto-generated method stub
statement.setString(1, AAZ217);
statement.setString(2, AAC001);
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
// TODO Auto-generated method stub
AAZ217 = resultSet.getString(1);
AAC001 = resultSet.getString(2);
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
AAZ217 =in.readUTF();
AAC001 =in.readUTF();
}
}
/**
* Mapper函数,LongWritable, KC86Entity, NullWritable, KC86Entity为输入键、输入值;输出键,输出值
* */
static class MyDBInputFormatMRMapper extends Mapper<LongWritable, KC86Entity, NullWritable, KC86Entity>
{
@Override
protected void map(LongWritable key, KC86Entity value,Context context)
throws IOException, InterruptedException {
context.write(NullWritable.get(), value);
}
}
/**
* main的主函数
* @throws IOException
* @throws InterruptedException
* @throws ClassNotFoundException
* */
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException
{
//当前运行环境中配置的HADOOP_USER_NAME属性值
String hadoop_user_name = System.getenv("HADOOP_USER_NAME");
System.setProperty("HADOOP_USER_NAME", hadoop_user_name);
Configuration conf = new Configuration();
//在获取job对象之前,一定要先指定数据库的链接信息
DBConfiguration.configureDB(conf, driver, url, username, password);
Job job = Job.getInstance(conf);
String[] fieldNames = new String[] {"AAZ217","AAC001"};
DBInputFormat.setInput(job, KC86Entity.class, "KC86_test", null, null, fieldNames);
job.setJarByClass(MYSQL_TO_HDFS .class);
job.setMapperClass(MyDBInputFormatMRMapper.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(KC86Entity.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(DBInputFormat.class);
//FileSystem fs = FileSystem.get(conf);
FileSystem fs = FileSystem.get(URI.create(out_path), conf,"root");
Path p = new Path(out_path);
if(fs.exists(p)){
fs.delete(p,true);
//System.out.println("输出路径存在,已删除!");
log.info("输出路径存在,已删除!");
}
FileOutputFormat.setOutputPath(job,p);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
===============================================================
以下是maven的依赖pom
javax.activation activation 1.1 org.apache.directory.server apacheds-i18n 2.0.0-M15 org.apache.directory.server apacheds-kerberos-codec 2.0.0-M15 org.apache.directory.api api-asn1-api 1.0.0-M20 org.apache.directory.api api-util 1.0.0-M20 asm asm 3.2 org.apache.avro avro 1.7.4 <dependency>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils-core</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.4.1</version>
</dependency>
<dependency>
<groupId>commons-configuration</groupId>
<artifactId>commons-configuration</artifactId>
<version>1.6</version>
</dependency>
<dependency>
<groupId>commons-daemon</groupId>
<artifactId>commons-daemon</artifactId>
<version>1.0.13</version>
</dependency>
<dependency>
<groupId>commons-digester</groupId>
<artifactId>commons-digester</artifactId>
<version>1.8</version>
</dependency>
<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.6</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>3.1.1</version>
</dependency>
<dependency>
<groupId>commons-net</groupId>
<artifactId>commons-net</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-client</artifactId>
<version>2.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-framework</artifactId>
<version>2.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-recipes</artifactId>
<version>2.7.1</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>2.7.2</version>
</dependency>
<!--不能用hadoop-core 1.2.1包 -->
<!-- <dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.2.1</version>
</dependency> -->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>11.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-annotations</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.2</version>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>org.apache.htrace</groupId>
<artifactId>htrace-core</artifactId>
<version>3.1.0-incubating</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.2.5</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpcore</artifactId>
<version>4.2.5</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
<version>1.9.13</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-jaxrs</artifactId>
<version>1.9.13</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
<version>1.9.13</version>
</dependency>
<dependency>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-xc</artifactId>
<version>1.9.13</version>
</dependency>
<dependency>
<groupId>com.jamesmurty.utils</groupId>
<artifactId>java-xmlbuilder</artifactId>
<version>0.4</version>
</dependency>
<dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-impl</artifactId>
<version>2.2.3</version>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-core</artifactId>
<version>1.9</version>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-json</artifactId>
<version>1.9</version>
</dependency>
<dependency>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-server</artifactId>
<version>1.9</version>
</dependency>
<dependency>
<groupId>net.java.dev.jets3t</groupId>
<artifactId>jets3t</artifactId>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
<version>1.1</version>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty</artifactId>
<version>6.1.26</version>
</dependency>
<dependency>
<groupId>org.mortbay.jetty</groupId>
<artifactId>jetty-util</artifactId>
<version>6.1.26</version>
</dependency>
<dependency>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
<version>0.1.42</version>
</dependency>
<dependency>
<groupId>javax.servlet.jsp</groupId>
<artifactId>jsp-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
<version>3.0.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.fusesource.leveldbjni</groupId>
<artifactId>leveldbjni-all</artifactId>
<version>1.8</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.8.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
<version>3.6.2.Final</version>
</dependency>
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
<version>4.0.23.Final</version>
</dependency>
<dependency>
<groupId>com.thoughtworks.paranamer</groupId>
<artifactId>paranamer</artifactId>
<version>2.3</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>2.5.0</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<version>2.5</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
<version>1.0.4.1</version>
</dependency>
<dependency>
<groupId>javax.xml.stream</groupId>
<artifactId>stax-api</artifactId>
<version>1.0-2</version>
</dependency>
<dependency>
<groupId>xerces</groupId>
<artifactId>xercesImpl</artifactId>
<version>2.9.1</version>
</dependency>
<dependency>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
<version>1.3.04</version>
</dependency>
<dependency>
<groupId>xmlenc</groupId>
<artifactId>xmlenc</artifactId>
<version>0.52</version>
</dependency>
<dependency>
<groupId>org.tukaani</groupId>
<artifactId>xz</artifactId>
<version>1.0</version>
</dependency>
<dependency>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
<version>3.4.6</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>com.oracle</groupId>
<artifactId>ojdbc6</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.44</version>
</dependency>
</dependencies>