java想kafka写数据_数据从kafka写入hdfs的java笔记

最新推荐文章于 2023-09-07 14:49:45 发布

十年君子剑

最新推荐文章于 2023-09-07 14:49:45 发布

阅读量386

点赞数

文章标签： java想kafka写数据

本文链接：https://blog.csdn.net/weixin_34307867/article/details/114763922

版权

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IOUtils;

import java.io.*;

import java.text.SimpleDateFormat;

import java.util.*;

import kafka.consumer.*;

import kafka.javaapi.consumer.ConsumerConnector;

public class KafkaToHdfs extends Thread {

private static String kafkaHost = null;

private static String kafkaGroup = null;

private static String kafkaTopic = null;

private static String hdfsUri = null;

private static String hdfsDir = null;

private static String hadoopUser = null;

private static Boolean isDebug = false;

private ConsumerConnector consumer = null;

private static Configuration hdfsConf = null;

private static FileSystem hadoopFS = null;

public static void main(String[] args) {

if (args.length < 6) {

useage();

System.exit(0);

}

Map user = new HashMap();

user = System.getenv();

if (user.get("HADOOP_USER_NAME") == null) {

System.out.println("请设定hadoop的启动的用户名，环境变量名称：HADOOP_USER_NAME，对应的值是hadoop的启动的用户名");

System.exit(0);

} else {

hadoopUser = user.get("HADOOP_USER_NAME");

}

init(args);

System.out.println("开始启动服务...");

hdfsConf = new Configuration();

hdfsConf.set("fs.defaultFS", hdfsUri);

hdfsConf.set("dfs.support.append", "true");

hdfsConf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");

hdfsConf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true");

//创建好相应的目录

try {

hadoopFS = FileSystem.get(hdfsConf);

//如果hdfs的对应的目录不存在，则进行创建

if (!hadoopFS.exists(new Path("/" + hdfsDir))) {

hadoopFS.mkdirs(new Path("/" + hdfsDir));

}

hadoopFS.close();

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

KafkaToHdfs selfObj = new KafkaToHdfs();

selfObj.start();

System.out.println("服务启动完毕，监听执行中");

}

public void run() {

Properties props = new Properties();

props.put("zookeeper.connect", kafkaHost);

props.put("group.id", kafkaGroup);

props.put("zookeeper.session.timeout.ms", "4000");

props.put("zookeeper.sync.time.ms", "200");

props.put("auto.commit.interval.ms", "1000");

props.put("auto.offset.reset", "smallest");

props.put("format", "binary");

props.put("auto.commit.enable", "true");

props.put("serializer.class", "kafka.serializer.StringEncoder");

ConsumerConfig consumerConfig = new ConsumerConfig(props);

this.consumer = Consumer.createJavaConsumerConnector(consumerConfig);

Map topicCountMap = new HashMap();

topicCountMap.put(kafkaTopic, new Integer(1));

Map>> consumerMap = consumer.createMessageStreams(topicCountMap);

KafkaStream stream = consumerMap.get(kafkaTopic).get(0);

ConsumerIterator it = stream.iterator();

while (it.hasNext()) {

String tmp = new String(it.next().message());

String fileContent = null;

if (!tmp.endsWith("\n"))

fileContent = new String (tmp + "\n");

else

fileContent = tmp;

if (isDebug) {

debug("receive: " + fileContent);

}

try {

hadoopFS = FileSystem.get(hdfsConf);

String fileName = "/" + hdfsDir + "/" +

(new SimpleDateFormat("yyyy-MM-dd").format(Calendar.getInstance().getTime())) + ".txt";

Path dst = new Path(fileName);

if (!hadoopFS.exists(dst)) {

FSDataOutputStream output = hadoopFS.create(dst);

output.close();

}

InputStream in = new ByteArrayInputStream(fileContent.getBytes("UTF-8"));

OutputStream out = hadoopFS.append(dst);

IOUtils.copyBytes(in, out, 4096, true);

} catch (IOException e) {

e.printStackTrace();

} finally {

try {

hadoopFS.close();

} catch (IOException e) {

e.printStackTrace();

}

consumer.shutdown();

}

private static void init(String[] args) {

kafkaHost = args[0];

kafkaGroup = args[1];

kafkaTopic = args[2];

hdfsUri = args[3];

hdfsDir = args[4];

if (args.length > 5) {

if (args[5].equals("true")) {

isDebug = true;

}

debug("初始化服务参数完毕,参数信息如下");

debug("KAFKA_HOST: " + kafkaHost);

debug("KAFKA_GROUP: " + kafkaGroup);

debug("KAFKA_TOPIC: " + kafkaTopic);

debug("HDFS_URI: " + hdfsUri);

debug("HDFS_DIRECTORY: " + hdfsDir);

debug("HADOOP_USER: " + hadoopUser);

debug("IS_DEBUG: " + isDebug);

}

private static void debug(String str) {

if (isDebug) {

System.out.println(str);

}

private static void useage() {

System.out.println("* kafka写入到hdfs的Java工具使用说明 ");

System.out.println("# java -cp kafkatohdfs.jar KafkaToHdfs KAFKA_HOST KAFKA_GROUP KAFKA_TOPIC HDFS_URI HDFS_DIRECTORY HADOOP_USER");

System.out.println("* 参数说明:");

System.out.println("* KAFKA_HOST : 代表kafka的主机名或IP:port，例如：namenode:2181,datanode1:2181,datanode2:2181");

System.out.println("* KAFKA_GROUP : 代表kafka的组，例如：test-consumer-group");

System.out.println("* KAFKA_TOPIC : 代表kafka的topic名称，例如：usertags");

System.out.println("* HDFS_URI : 代表hdfs链接uri ，例如：hdfs://namenode:9000");

System.out.println("* HDFS_DIRECTORY : 代表hdfs目录名称，例如：usertags");

System.out.println("* 可选参数:");

System.out.println("* IS_DEBUG : 代表是否开启调试模式，true是，false否，默认为false");

}

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IOUtils;

import java.io.*;

import java.text.SimpleDateFormat;

import java.util.*;

import kafka.consumer.*;

import kafka.javaapi.consumer.ConsumerConnector;

public class KafkaToHdfs extends Thread {

private static String kafkaHost = null;

private static String kafkaGroup = null;

private static String kafkaTopic = null;

private static String hdfsUri = null;

private static String hdfsDir = null;

private static String hadoopUser = null;

private static Boolean isDebug = false;

private ConsumerConnector consumer = null;

private static Configuration hdfsConf = null;

private static FileSystem hadoopFS = null;

public static void main(String[] args) {

if (args.length < 6) {

useage();

System.exit(0);

}

Map user = new HashMap();

user = System.getenv();

if (user.get("HADOOP_USER_NAME") == null) {

System.out.println("请设定hadoop的启动的用户名，环境变量名称：HADOOP_USER_NAME，对应的值是hadoop的启动的用户名");

System.exit(0);

} else {

hadoopUser = user.get("HADOOP_USER_NAME");

}

init(args);

System.out.println("开始启动服务...");

hdfsConf = new Configuration();

hdfsConf.set("fs.defaultFS", hdfsUri);

hdfsConf.set("dfs.support.append", "true");

hdfsConf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");

hdfsConf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true");

//创建好相应的目录

try {

hadoopFS = FileSystem.get(hdfsConf);

//如果hdfs的对应的目录不存在，则进行创建

if (!hadoopFS.exists(new Path("/" + hdfsDir))) {

hadoopFS.mkdirs(new Path("/" + hdfsDir));

}

hadoopFS.close();

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

KafkaToHdfs selfObj = new KafkaToHdfs();

selfObj.start();

System.out.println("服务启动完毕，监听执行中");

}

public void run() {

Properties props = new Properties();

props.put("zookeeper.connect", kafkaHost);

props.put("group.id", kafkaGroup);

props.put("zookeeper.session.timeout.ms", "4000");

props.put("zookeeper.sync.time.ms", "200");

props.put("auto.commit.interval.ms", "1000");

props.put("auto.offset.reset", "smallest");

props.put("format", "binary");

props.put("auto.commit.enable", "true");

props.put("serializer.class", "kafka.serializer.StringEncoder");

ConsumerConfig consumerConfig = new ConsumerConfig(props);

this.consumer = Consumer.createJavaConsumerConnector(consumerConfig);

Map topicCountMap = new HashMap();

topicCountMap.put(kafkaTopic, new Integer(1));

Map>> consumerMap = consumer.createMessageStreams(topicCountMap);

KafkaStream stream = consumerMap.get(kafkaTopic).get(0);

ConsumerIterator it = stream.iterator();

while (it.hasNext()) {

String tmp = new String(it.next().message());

String fileContent = null;

if (!tmp.endsWith("\n"))

fileContent = new String (tmp + "\n");

else

fileContent = tmp;

if (isDebug) {

debug("receive: " + fileContent);

}

try {

hadoopFS = FileSystem.get(hdfsConf);

String fileName = "/" + hdfsDir + "/" +

(new SimpleDateFormat("yyyy-MM-dd").format(Calendar.getInstance().getTime())) + ".txt";

Path dst = new Path(fileName);

if (!hadoopFS.exists(dst)) {

FSDataOutputStream output = hadoopFS.create(dst);

output.close();

}

InputStream in = new ByteArrayInputStream(fileContent.getBytes("UTF-8"));

OutputStream out = hadoopFS.append(dst);

IOUtils.copyBytes(in, out, 4096, true);

} catch (IOException e) {

e.printStackTrace();

} finally {

try {

hadoopFS.close();

} catch (IOException e) {

e.printStackTrace();

}

consumer.shutdown();

}

private static void init(String[] args) {

kafkaHost = args[0];

kafkaGroup = args[1];

kafkaTopic = args[2];

hdfsUri = args[3];

hdfsDir = args[4];

if (args.length > 5) {

if (args[5].equals("true")) {

isDebug = true;

}

debug("初始化服务参数完毕,参数信息如下");

debug("KAFKA_HOST: " + kafkaHost);

debug("KAFKA_GROUP: " + kafkaGroup);

debug("KAFKA_TOPIC: " + kafkaTopic);

debug("HDFS_URI: " + hdfsUri);

debug("HDFS_DIRECTORY: " + hdfsDir);

debug("HADOOP_USER: " + hadoopUser);

debug("IS_DEBUG: " + isDebug);

}

private static void debug(String str) {

if (isDebug) {

System.out.println(str);

}

private static void useage() {

System.out.println("* kafka写入到hdfs的Java工具使用说明 ");

System.out.println("# java -cp kafkatohdfs.jar KafkaToHdfs KAFKA_HOST KAFKA_GROUP KAFKA_TOPIC HDFS_URI HDFS_DIRECTORY HADOOP_USER");

System.out.println("* 参数说明:");

System.out.println("* KAFKA_HOST : 代表kafka的主机名或IP:port，例如：namenode:2181,datanode1:2181,datanode2:2181");

System.out.println("* KAFKA_GROUP : 代表kafka的组，例如：test-consumer-group");

System.out.println("* KAFKA_TOPIC : 代表kafka的topic名称，例如：usertags");

System.out.println("* HDFS_URI : 代表hdfs链接uri ，例如：hdfs://namenode:9000");

System.out.println("* HDFS_DIRECTORY : 代表hdfs目录名称，例如：usertags");

System.out.println("* 可选参数:");

System.out.println("* IS_DEBUG : 代表是否开启调试模式，true是，false否，默认为false");

}

十年君子剑

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
java想kafka写数据_数据从kafka写入hdfs的java笔记

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IOUtils;i...
复制链接

扫一扫