一、生产者主类:读取hbase数据发送kafka消息
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.concurrent.Future;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
import org.apache.hadoop.hbase.util.Base64;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import scala.Tuple2;
public class SendKafkaMsg {
static final Logger logger = LogManager.getLogger(SendKafkaMsg.class);
public final static String KAFKA_TOPICS = "kafka.update.topic";// 监听
public static void main(String[] args) throws IOException {
HashMap<String, String> argsMap = getArgsMap(args);
// argsMap.put("tableName", "SRV:XIAJIA_TEST");
String tableName = argsMap.get("tableName");
SparkConf conf = new SparkConf();
conf.setAppName("kafka_批量插入ES7_" + tableName);
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
// conf.setMaster("local");
SparkContext sc = SparkContext.getOrCreate(conf);
JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);
JavaPairRDD<ImmutableBytesWritable, Result> newAPIHadoopRDD = getRddDate(jsc, tableName);
newAPIHadoopRDD.foreachPartition(new VoidFunction<Iterator<Tuple2<ImmutableBytesWritable, Result>>>() {
private static final long serialVersionUID = 8229313151000588858L;
@Override
public void call(Iterator<Tuple2<ImmutableBytesWritable, Result>> t) throws Exception {
KafkaProducer<String, String> productor = KafkaUtil.getKafkaProducer();
JSONObject json;
String key;
while (t.hasNext()) {
json = new JSONObject();
Tuple2<ImmutableBytesWritable, Result> next = t.next();
Result r = next._2;
byte[] rowKey = r.getRow();
key = Bytes.toString(rowKey);
json.put("entryId", key);
json.put("tableName", tableName);
try {
Future<RecordMetadata> result = productor
.send(new ProducerRecord<String, String>(KAFKA_TOPICS, key, json.toString()));
// 如果发送异常,调用get就会抛出异常
result.get();
} catch (Exception e) {
logger.error(e);
try {
//todo something
} catch (IOException ee) {
logger.error("保存kafka错误记录异常", ee);
}
}
}
}
});
}
private static JavaPairRDD<ImmutableBytesWritable, Result> getRddDate(JavaSparkContext jsc, String sourceHbase)
throws IOException {
// 初始化HBase相关信息
String zookeeperQuorum = PropertyManager.getInstance().getValue("hbase.zookeeper.quorum");
String zookeeperPort = PropertyManager.getInstance().getValue("hbase.zookeeper.clientPort");
System.out.println(zookeeperQuorum);
System.out.println(zookeeperPort);
// 从HBase采集表读取数据
Configuration readConf = HBaseConfiguration.create();
readConf.set("hbase.zookeeper.quorum", zookeeperQuorum);
readConf.set("hbase.zookeeper.property.clientPort", zookeeperPort);
readConf.set(TableInputFormat.INPUT_TABLE, sourceHbase);
// 过滤器
Scan scan = new Scan();
ClientProtos.Scan proto = ProtobufUtil.toScan(scan);
String scanToString = Base64.encodeBytes(proto.toByteArray(), Base64.DONT_BREAK_LINES);
readConf.set(TableInputFormat.SCAN, scanToString);
return jsc.newAPIHadoopRDD(readConf, TableInputFormat.class, ImmutableBytesWritable.class, Result.class);
}
private static HashMap<String, String> getArgsMap(String[] args) {
HashMap<String, String> result = new HashMap<String, String>();
if (args != null && args.length != 0) {
for (String a : args) {
String[] split = a.split("=");
result.put(split[0], split[1]);
}
}
logger.info("parse args as ==>" + JSON.toJSONString(result));
return result;
}
}
- Kafka工具类:创建topic
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.kafka.clients.admin.AdminClient;
import org.apache.kafka.clients.admin.CreateTopicsResult;
import org.apache.kafka.clients.admin.DeleteTopicsResult;
import org.apache.kafka.clients.admin.NewTopic;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
public class KafkaUtil {
static final Logger logger = LogManager.getLogger(KafkaUtil.class);
public static AdminClient adminClient;
public static final int MAX_NUM = 8;
private static volatile List<KafkaProducer<String, String>> PRODUCERS = new ArrayList<>();
private static AtomicInteger PRODUCERS_GET_INDEX = new AtomicInteger(0);
private static final Object PRODUCER_LOCK = new Object();
private KafkaUtil() {
}
static {
//程序退出时,关闭生产者、管理客户端
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
long time = System.currentTimeMillis();
if(!PRODUCERS.isEmpty()) {
for(KafkaProducer<String,String> p : PRODUCERS) {
try {
p.close();
}catch (Exception e) {
logger.error("kafka producer close关闭异常",e);
}
}
}
logger.info("kafka PRODUCERS 关闭完成");
if(adminClient != null) {
try {
adminClient.close();
}catch (Exception e) {
logger.error("kafka admin close关闭异常",e);
}
}
logger.info("kafka adminClient 关闭完成");
logger.info("kafka资源关闭完成,用时:"+(System.currentTimeMillis()-time));
}));
}
/**
* 生产者 调用程序不许关闭该生产者
*
* @return
*/
public static KafkaProducer<String, String> getKafkaProducer() {
if(PRODUCERS.size() >= MAX_NUM) {
return PRODUCERS.get(PRODUCERS_GET_INDEX.getAndIncrement() & (MAX_NUM-1));
}
Map<String, Object> props = new HashMap<String, Object>();
String broker = PropertyManager.getInstance().getValue("kafka.broker");
props.put("bootstrap.servers", broker);
props.put("acks", "all");
props.put("retries", 0);
props.put("batch.size", 16384);
props.put("linger.ms", 1);
props.put("buffer.memory", 33554432);
props.put("key.serializer", StringSerializer.class);
props.put("value.serializer", StringSerializer.class);
synchronized (PRODUCER_LOCK) {
if(PRODUCERS.size() >= MAX_NUM) {
return PRODUCERS.get(PRODUCERS_GET_INDEX.getAndIncrement() & (MAX_NUM-1));
}
PRODUCERS.add(new KafkaProducer<String,String>(props));
}
return PRODUCERS.get(PRODUCERS_GET_INDEX.getAndIncrement() & (MAX_NUM-1));
}
/**
* 创建主题
* @param name
* @param numPartitions
* @param replicationFactor
*/
public static CreateTopicsResult createTopic(String name,int numPartitions, short replicationFactor) {
return createTopic(new NewTopic(name,numPartitions,replicationFactor));
}
public static CreateTopicsResult createTopic(String name) {
return createTopic(new NewTopic(name,8,(short) 1));
}
/**
* 创建主题
* @param topic
* @return
*/
public static CreateTopicsResult createTopic(NewTopic topic) {
if(adminClient == null) {
adminClient = AdminClientHolder.getAdminClient();
}
List<NewTopic> newTopics = new ArrayList<NewTopic>(4);
newTopics.add(topic);
return adminClient.createTopics(newTopics);
}
public static boolean existTopic(String topic) {
if(adminClient == null) {
adminClient = AdminClientHolder.getAdminClient();
}
try {
return adminClient.listTopics().names().get().stream().map(i->topic.equals(i)?1:0).reduce((a,b)->a+b).orElse(0)>=1;
} catch (InterruptedException e) {
logger.error("[ERROR]",e);
} catch (ExecutionException e) {
logger.error("[ERROR]",e);
}
return false;
}
/**
* 删除主题
* @param topicName
* @return
*/
public static DeleteTopicsResult deleteTopic(String topicName) {
if(adminClient == null) {
adminClient = AdminClientHolder.getAdminClient();
}
List<String> topics = new ArrayList<>();
topics.add(topicName);
return adminClient.deleteTopics(topics);
}
private static class AdminClientHolder{
private static final AdminClient client;
static {
Map<String, Object> props = new HashMap<String, Object>();
String broker = PropertyManager.getInstance().getValue("kafka.broker");
props.put("bootstrap.servers", broker);
client = AdminClient.create(props);
}
public static AdminClient getAdminClient() {
return client;
}
}
public static void main(String[] args) {
new KafkaUtil().dowork();
}
private void dowork() {//创建toppic
createTopic("SRV_UPDATE_ES",4,(short)1);
}
}
- Hbase工具类
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionConfiguration;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;
public class HBaseUtil {
private static final Logger LOGGER = Logger.getLogger(HBaseUtil.class);
private static Connection CONN;
private static Map<String, Table> TABLES;
static {
init();
// 程序退出关闭连接
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
long time = System.currentTimeMillis();
closeAllTable();
LOGGER.info("hbase table 关闭完成");
if (CONN != null) {
try {
CONN.close();
} catch (IOException e) {
}
}
LOGGER.info("hbase conn 关闭完成");
LOGGER.info("hbase资源关闭完成,用时:" + (System.currentTimeMillis() - time));
}));
}
private static void init() {
try {
String zookeeperQuorum = PropertyManager.getInstance().getValue("hbase.zookeeper.quorum");
String zookeeperPort = PropertyManager.getInstance().getValue("hbase.zookeeper.clientPort");
// 从HBase采集表读取数据
Configuration readConf = HBaseConfiguration.create();
readConf.set("hbase.zookeeper.quorum", zookeeperQuorum);
readConf.set("hbase.zookeeper.property.clientPort", zookeeperPort);
readConf.set(ConnectionConfiguration.MAX_KEYVALUE_SIZE_KEY, String.valueOf((1 << 30)));//1g
CONN = ConnectionFactory.createConnection(readConf);
TABLES = new HashMap<>();
} catch (IOException e) {
CONN = null;
LOGGER.error(e);
}
}
/**
* 获取hbase连接,不用关闭,全局公用一个连接
*
* @return
*/
public static Connection getConnection() {
if (CONN == null || CONN.isClosed()) {
synchronized (HBaseUtil.class) {
if (CONN == null || CONN.isClosed()) {
init();
}
}
}
return CONN;
}
/**
* 保存数据
*
* @param tableName
* @param puts
* @throws IOException
*/
public static void saveData(String tableName, List<Put> puts) throws IOException {
Table table = getTable(tableName);
table.put(puts);
}
/**
* 保存数据
*
* @param tableName
* @param puts
* @throws IOException
*/
public static void saveData(String tableName, Put put) throws IOException {
Table table = getTable(tableName);
table.put(put);
}
/**
* 根据rowkey获取数据
*
* @param tableName
* @param rowKey
* @return
* @throws IOException
*/
public static Result getData(String tableName, String rowKey) throws IOException {
Table table = getTable(tableName);
Get get = new Get(Bytes.toBytes(rowKey));
get.setCacheBlocks(false);
return table.get(get);
}
/**
*
* 执行admin相关的操作
*
* @param
* @throws IOException
*/
public static <T> T admin(AdminAction<T> action) {
Admin admin = null;
T result = null;
try {
admin = CONN.getAdmin();
result = action.exec(admin);
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
try {
if (admin != null)
admin.close();
} catch (IOException e) {
LOGGER.error("关闭admin报错。", e);
}
}
return result;
}
@FunctionalInterface
public static interface AdminAction<T> {
T exec(Admin admin);
}
public static <T> void scanForEach(String tableName, Scan scan, ScanForEachAction action) {
Table table = null;
try {
table = getTable(tableName);
} catch (IOException e) {
LOGGER.error(e);
}
ResultScanner scanner = null;
try {
scanner = table.getScanner(scan);
boolean flg=true;
while (flg) {
Result next = null;
try {
next = scanner.next();
} catch (IOException e) {
LOGGER.error(e);
}
if (next == null) {
break;
}
flg=action.foreach(next);
}
} catch (IOException e) {
LOGGER.error(e);
}finally {
if(scanner!=null) {
scanner.close();
}
}
}
@FunctionalInterface
public static interface ScanForEachAction {
boolean foreach(Result result);
}
public static Table getTable(String tableName) throws IOException {
Table table = TABLES.get(tableName);
if (table == null) {
synchronized (HBaseUtil.class) {
if (table == null) {
table = CONN.getTable(TableName.valueOf(tableName));
TABLES.put(tableName, table);
}
}
}
return table;
}
/**
* 关闭所有的table连接
*/
private static void closeAllTable() {
if (TABLES != null && !TABLES.isEmpty()) {
TABLES.values().forEach(t -> {
try {
t.close();
} catch (IOException e) {
}
});
TABLES.clear();
}
}
public static void main(String[] args) throws Exception {
}
/**
* 建表
*/
private static void createTable() {
init();
try {
Admin admin = CONN.getAdmin();
TableName tableName = TableName.valueOf("ENT:GS_ITJUZI");
if (admin.tableExists(tableName)) {
System.out.println("talbe is exists!");
} else {
//表描述器构造器
TableDescriptorBuilder tdb = TableDescriptorBuilder.newBuilder(tableName);
//列族描述器构造器
ColumnFamilyDescriptorBuilder cdb = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("base_info"));
//获得列描述器
ColumnFamilyDescriptor cfd = cdb.build();
//添加列族
tdb.setColumnFamily(cfd);
//获得表描述器
TableDescriptor td = tdb.build();
//创建表
admin.createTable(td);
}
} catch (IOException e) {
e.printStackTrace();
}
}
/*
* 查看hbase所有表
*/
private static void showAllTable() {
init();
try {
Admin admin = CONN.getAdmin();
TableName[] listTableNames = admin.listTableNames();
System.out.println("==========all table here============");
for (TableName tn : listTableNames) {
System.out.println(tn.getNameAsString());
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static FilterList getColFilterList(String ...cols ) {
FilterList list=new FilterList(Operator.MUST_PASS_ONE);
for(String col:cols) {
list.addFilter(new QualifierFilter(CompareOperator.EQUAL, new BinaryComparator(Bytes.toBytes(col))));
}
return list;
}
}
二、消费者
运行主类:消费数据
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.log4j.Logger;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.CanCommitOffsets;
import org.apache.spark.streaming.kafka010.ConsumerStrategies;
import org.apache.spark.streaming.kafka010.HasOffsetRanges;
import org.apache.spark.streaming.kafka010.KafkaUtils;
import org.apache.spark.streaming.kafka010.LocationStrategies;
import org.apache.spark.streaming.kafka010.OffsetRange;
public class TestRun {
private static final Object KAFKA_BOOTSTRAP_SERVERS = "ip:host";
private static final Object GROUP_ID = "消费小组";
private static final String KAFKA_TOPICS = "监控主题";
public final static Logger logger = Logger.getLogger(TestRun.class);
public static void main(String[] args) {
String taskId = "spark-kafka-taskId-" + System.currentTimeMillis();
SparkConf conf = new SparkConf().setAppName("spark-kafka-test");
// conf.setMaster("local");
// 控制每秒读取Kafka每个Partition最大消息数(500*3*10=15000),若Streaming批次为10秒,topic最大分区为3,则每批次最大接收消息数为15000
conf.set("spark.streaming.kafka.maxRatePerPartition", "500");
// 第二个参数代表每隔多长时间拉取一次
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(Long.valueOf(60)));
// 构建kafka参数map
Map<String, Object> kafkaParams = new HashMap<>();
kafkaParams.put("bootstrap.servers", KAFKA_BOOTSTRAP_SERVERS);
kafkaParams.put("key.deserializer", StringDeserializer.class);
kafkaParams.put("value.deserializer", StringDeserializer.class);
kafkaParams.put("group.id", GROUP_ID);
// 指定从latest(最新,其他版本的是largest这里不行)还是smallest(最早)处开始读取数据
kafkaParams.put("auto.offset.reset", "earliest"); // earliest latest
// 如果true,consumer定期地往zookeeper写入每个分区的offset
kafkaParams.put("enable.auto.commit", false);
// 构建topic set
String kafkaTopics = KAFKA_TOPICS;
String[] kafkaTopicsSplited = kafkaTopics.split(",");
Collection<String> topics = new HashSet<>();
for (String kafkaTopic : kafkaTopicsSplited) {
topics.add(kafkaTopic);
}
final JavaInputDStream<ConsumerRecord<String, String>> stream = KafkaUtils.createDirectStream(jssc,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.<String, String>Subscribe(topics, kafkaParams));
stream.foreachRDD(new VoidFunction<JavaRDD<ConsumerRecord<String, String>>>() {
private static final long serialVersionUID = -1942055064446124058L;
@Override
public void call(JavaRDD<ConsumerRecord<String, String>> t) throws Exception {
long count = t.count();
logger.info("stream rdd id [" + t.id() + "] count[" + count + "]");
if (count > 0) {
OffsetRange[] offsetRanges = ((HasOffsetRanges) t.rdd()).offsetRanges();
t.foreachPartitionAsync(new HandleFunction(taskId));
((CanCommitOffsets) stream.inputDStream()).commitAsync(offsetRanges);
}
}
});
jssc.start();
boolean isStop = false;
if (isStop) {
logger.info("【实时】kafka协处理器消息处理程序退出 :-) " + taskId);
} else {
logger.info("监测到停止信息/secordIndex/stop,将停止程序");
jssc.stop(true, true);
logger.info("JavaStreamingContext stopped");
logger.info("【实时】kafka协处理器消息处理程序退出 :-) " + taskId);
}
}
}
消费数据类
import java.util.Iterator;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.function.VoidFunction;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
public class HandleFunction implements VoidFunction<Iterator<ConsumerRecord<String, String>>> {
private static final long serialVersionUID = -7710721110303417789L;
private static final Logger LOGGER = Logger.getLogger(HandleFunction.class);
private String taskId;
public HandleFunction(String taskId) {
this.taskId = taskId;
}
@Override
public void call(Iterator<ConsumerRecord<String, String>> t) throws Exception {
LOGGER.info("开始处理kafka消息!");
ConsumerRecord<String, String> record;
JSONObject data;
while (t.hasNext()) {
record = t.next();
// kafka:key
String key = record.key();
try {
// kafka:value
data = JSON.parseObject(record.value());
} catch (Exception e) {
continue;
}
System.out.println("kafka-key:" + key);
System.out.println("kafka-value:" + data);
}
}
}
三、所需jar包
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<env>dev</env>
<hadoop.version>3.0.0</hadoop.version>
<hbase.version>2.1.0</hbase.version>
<spark-scala.version>2.11</spark-scala.version>
<spark.version>2.4.0</spark.version>
<kafka.version>2.1.0</kafka.version>
<hive.version>2.1.1-cdh6.2.0</hive.version>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.glassfish</groupId>
<artifactId>javax.el</artifactId>
<version>3.0.1-b06</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-mapreduce</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${spark-scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${spark-scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<!-- spark-sql的pom已经添加依赖了,但是未引用。特此添加spark-catalyst -->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-catalyst_${spark-scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>7.2.0</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.2.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.58</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.9</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_${spark-scala.version}</artifactId>
<version>${kafka.version}</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>${kafka.version}</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-streams</artifactId>
<version>${kafka.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_${spark-scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_${spark-scala.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.mongodb.mongo-hadoop</groupId>
<artifactId>mongo-hadoop-core</artifactId>
<version>1.4.1</version>
</dependency>
<dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongo-java-driver</artifactId>
<version>3.6.3</version>
</dependency>
<dependency>
<groupId>org.mongodb.spark</groupId>
<artifactId>mongo-spark-connector_${spark-scala.version}</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<!--<version>2.7.9</version> -->
<version>2.6.6</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<!--<version>2.7.9.4</version> -->
<version>2.6.5</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<!--<version>2.7.9</version> -->
<version>2.6.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.16</version>
</dependency>
<!-- email -->
<dependency>
<groupId>com.sun.mail</groupId>
<artifactId>javax.mail</artifactId>
<version>1.6.0</version>
</dependency>
<dependency>
<groupId>dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>1.6.1</version>
</dependency>
<dependency>
<groupId>jaxen</groupId>
<artifactId>jaxen</artifactId>
<version>1.1.6</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-jobclient -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>3.0.0-cdh6.2.0</version>
</dependency>
<!-- hive -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-cli</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>${hive.version}</version>
</dependency>
<!-- 导入POI -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.3</version>
</dependency>
<dependency>
<groupId>com.monitorjbl</groupId>
<artifactId>xlsx-streamer</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>openxml4j</artifactId>
<version>1.0-beta</version>
</dependency>
<dependency>
<groupId>com.belerweb</groupId>
<artifactId>pinyin4j</artifactId>
<version>2.5.1</version>
</dependency>
<dependency>
<groupId>xml-apis</groupId>
<artifactId>xml-apis</artifactId>
<version>1.4.01</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-hbase-handler</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase.connectors.spark</groupId>
<artifactId>hbase-spark</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-spark-20_2.11</artifactId>
<version>7.3.2</version>
</dependency>
</dependencies>