package com.peidw.hive2es;
import com.alibaba.fastjson.JSONObject;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.htrace.commons.logging.Log;
import org.apache.htrace.commons.logging.LogFactory;
import org.elasticsearch.hadoop.mr.EsOutputFormat;
import java.io.IOException;
/**
* 实现mydb.tmp_stud1表同步至es
* 该表位置: hdfs://192.168.177.139:8888/user/hive/warehouse/mydb.db/tmp_peidw
*/
public class Hive2Es2TmpStud1 {
private static final Log LOG = LogFactory.getLog(Hive2Es2TmpStud1.class);
public static class MyMapper extends Mapper<LongWritable, Text, NullWritable, Text> { //<Object, Text, NullWritable, BytesWritable>
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
}
@Override
public void run(Context context) throws IOException, InterruptedException {
super.run(context);
}
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
LOG.info("value.toString().trim().getBytes()--->"+value.toString().trim());
String[] _ary= StringUtils.split(value.toString(), '\t' );
JSONObject jo=new JSONObject();
jo.put( "name", _ary[0]);
jo.put( "vage", _ary[1]);
jo.put( "height", _ary[2]);
jo.put( "isok", _ary[3]);
context.write(NullWritable.get(), new Text(jo.toJSONString()) );
}
@Override
protected void cleanup(Context context) throws IOException,InterruptedException {
super.cleanup(context);
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
conf.setBoolean("mapred.map.tasks.speculative.execution", false);
conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
conf.set("es.nodes", "centos.hadoop:9200");
conf.set("es.resource", "tmp_stud1/tmp_stud1");
conf.set("es.mapping.id", "name");
conf.set("es.input.json", "true");
conf.setInt("es.mapred.number_of_shards",2);
conf.setInt("es.mapred.number_of_replicas",0);
Job job = Job.getInstance(conf, "hadoop es write test");
job.setMapperClass(Hive2Es2TmpStud1.MyMapper.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(EsOutputFormat.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(Text.class);
// 设置输入路径
FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.177.139:8888/user/hive/warehouse/mydb.db/tmp_stud1"));
//job.waitForCompletion(true);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
pom
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.peidw</groupId>
<artifactId>mr</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hadoop.version>2.7.7</hadoop.version>
<hbase.version>1.4.13</hbase.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-minicluster</artifactId>
<version>${hadoop.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-hadoop</artifactId>
<version>5.5.0</version>
<exclusions>
<exclusion>
<artifactId>cascading-hadoop</artifactId>
<groupId>cascading</groupId>
</exclusion>
<exclusion>
<artifactId>cascading-local</artifactId>
<groupId>cascading</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.5</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.1.1</version>
<configuration>
<descriptors>
<descriptor>src/main/resources/assembly.xml</descriptor>
</descriptors>
<archive>
<manifest>
<!--<mainClass>com.peidw.ch1.Driver</mainClass> -->
<mainClass>com.peidw.hive2es.Hive2Es3</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
assembly.xml
<assembly>
<id>job</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<unpack>false</unpack>
<scope>runtime</scope>
<outputDirectory>lib</outputDirectory>
<excludes>
<exclude>${groupId}:${artifactId}</exclude>
</excludes>
</dependencySet>
<dependencySet>
<unpack>true</unpack>
<includes>
<include>${groupId}:${artifactId}</include>
</includes>
</dependencySet>
</dependencySets>
</assembly>