java想kafka写数据_数据从kafka写入hdfs的java笔记

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IOUtils;

import java.io.*;

import java.text.SimpleDateFormat;

import java.util.*;

import kafka.consumer.*;

import kafka.javaapi.consumer.ConsumerConnector;

public class KafkaToHdfs extends Thread {

private static String kafkaHost = null;

private static String kafkaGroup = null;

private static String kafkaTopic = null;

private static String hdfsUri = null;

private static String hdfsDir = null;

private static String hadoopUser = null;

private static Boolean isDebug = false;

private ConsumerConnector consumer = null;

private static Configuration hdfsConf = null;

private static FileSystem hadoopFS = null;

public static void main(String[] args) {

if (args.length < 6) {

useage();

System.exit(0);

}

Map user = new HashMap();

user = System.getenv();

if (user.get("HADOOP_USER_NAME") == null) {

System.out.println("请设定hadoop的启动的用户名,环境变量名称:HADOOP_USER_NAME,对应的值是hadoop的启动的用户名");

System.exit(0);

} else {

hadoopUser = user.get("HADOOP_USER_NAME");

}

init(args);

System.out.println("开始启动服务...");

hdfsConf = new Configuration();

hdfsConf.set("fs.defaultFS", hdfsUri);

hdfsConf.set("dfs.support.append", "true");

hdfsConf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");

hdfsConf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true");

//创建好相应的目录

try {

hadoopFS = FileSystem.get(hdfsConf);

//如果hdfs的对应的目录不存在,则进行创建

if (!hadoopFS.exists(new Path("/" + hdfsDir))) {

hadoopFS.mkdirs(new Path("/" + hdfsDir));

}

hadoopFS.close();

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

KafkaToHdfs selfObj = new KafkaToHdfs();

selfObj.start();

System.out.println("服务启动完毕,监听执行中");

}

public void run() {

Properties props = new Properties();

props.put("zookeeper.connect", kafkaHost);

props.put("group.id", kafkaGroup);

props.put("zookeeper.session.timeout.ms", "4000");

props.put("zookeeper.sync.time.ms", "200");

props.put("auto.commit.interval.ms", "1000");

props.put("auto.offset.reset", "smallest");

props.put("format", "binary");

props.put("auto.commit.enable", "true");

props.put("serializer.class", "kafka.serializer.StringEncoder");

ConsumerConfig consumerConfig = new ConsumerConfig(props);

this.consumer = Consumer.createJavaConsumerConnector(consumerConfig);

Map topicCountMap = new HashMap();

topicCountMap.put(kafkaTopic, new Integer(1));

Map>> consumerMap = consumer.createMessageStreams(topicCountMap);

KafkaStream stream = consumerMap.get(kafkaTopic).get(0);

ConsumerIterator it = stream.iterator();

while (it.hasNext()) {

String tmp = new String(it.next().message());

String fileContent = null;

if (!tmp.endsWith("\n"))

fileContent = new String (tmp + "\n");

else

fileContent = tmp;

if (isDebug) {

debug("receive: " + fileContent);

}

try {

hadoopFS = FileSystem.get(hdfsConf);

String fileName = "/" + hdfsDir + "/" +

(new SimpleDateFormat("yyyy-MM-dd").format(Calendar.getInstance().getTime())) + ".txt";

Path dst = new Path(fileName);

if (!hadoopFS.exists(dst)) {

FSDataOutputStream output = hadoopFS.create(dst);

output.close();

}

InputStream in = new ByteArrayInputStream(fileContent.getBytes("UTF-8"));

OutputStream out = hadoopFS.append(dst);

IOUtils.copyBytes(in, out, 4096, true);

} catch (IOException e) {

e.printStackTrace();

} finally {

try {

hadoopFS.close();

} catch (IOException e) {

e.printStackTrace();

}

}

}

consumer.shutdown();

}

private static void init(String[] args) {

kafkaHost = args[0];

kafkaGroup = args[1];

kafkaTopic = args[2];

hdfsUri = args[3];

hdfsDir = args[4];

if (args.length > 5) {

if (args[5].equals("true")) {

isDebug = true;

}

}

debug("初始化服务参数完毕,参数信息如下");

debug("KAFKA_HOST: " + kafkaHost);

debug("KAFKA_GROUP: " + kafkaGroup);

debug("KAFKA_TOPIC: " + kafkaTopic);

debug("HDFS_URI: " + hdfsUri);

debug("HDFS_DIRECTORY: " + hdfsDir);

debug("HADOOP_USER: " + hadoopUser);

debug("IS_DEBUG: " + isDebug);

}

private static void debug(String str) {

if (isDebug) {

System.out.println(str);

}

}

private static void useage() {

System.out.println("* kafka写入到hdfs的Java工具使用说明 ");

System.out.println("# java -cp kafkatohdfs.jar KafkaToHdfs KAFKA_HOST KAFKA_GROUP KAFKA_TOPIC HDFS_URI HDFS_DIRECTORY HADOOP_USER");

System.out.println("* 参数说明:");

System.out.println("* KAFKA_HOST : 代表kafka的主机名或IP:port,例如:namenode:2181,datanode1:2181,datanode2:2181");

System.out.println("* KAFKA_GROUP : 代表kafka的组,例如:test-consumer-group");

System.out.println("* KAFKA_TOPIC : 代表kafka的topic名称 ,例如:usertags");

System.out.println("* HDFS_URI : 代表hdfs链接uri ,例如:hdfs://namenode:9000");

System.out.println("* HDFS_DIRECTORY : 代表hdfs目录名称 ,例如:usertags");

System.out.println("* 可选参数:");

System.out.println("* IS_DEBUG : 代表是否开启调试模式,true是,false否,默认为false");

}

}

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IOUtils;

import java.io.*;

import java.text.SimpleDateFormat;

import java.util.*;

import kafka.consumer.*;

import kafka.javaapi.consumer.ConsumerConnector;

public class KafkaToHdfs extends Thread {

private static String kafkaHost = null;

private static String kafkaGroup = null;

private static String kafkaTopic = null;

private static String hdfsUri = null;

private static String hdfsDir = null;

private static String hadoopUser = null;

private static Boolean isDebug = false;

private ConsumerConnector consumer = null;

private static Configuration hdfsConf = null;

private static FileSystem hadoopFS = null;

public static void main(String[] args) {

if (args.length < 6) {

useage();

System.exit(0);

}

Map user = new HashMap();

user = System.getenv();

if (user.get("HADOOP_USER_NAME") == null) {

System.out.println("请设定hadoop的启动的用户名,环境变量名称:HADOOP_USER_NAME,对应的值是hadoop的启动的用户名");

System.exit(0);

} else {

hadoopUser = user.get("HADOOP_USER_NAME");

}

init(args);

System.out.println("开始启动服务...");

hdfsConf = new Configuration();

hdfsConf.set("fs.defaultFS", hdfsUri);

hdfsConf.set("dfs.support.append", "true");

hdfsConf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");

hdfsConf.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true");

//创建好相应的目录

try {

hadoopFS = FileSystem.get(hdfsConf);

//如果hdfs的对应的目录不存在,则进行创建

if (!hadoopFS.exists(new Path("/" + hdfsDir))) {

hadoopFS.mkdirs(new Path("/" + hdfsDir));

}

hadoopFS.close();

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

KafkaToHdfs selfObj = new KafkaToHdfs();

selfObj.start();

System.out.println("服务启动完毕,监听执行中");

}

public void run() {

Properties props = new Properties();

props.put("zookeeper.connect", kafkaHost);

props.put("group.id", kafkaGroup);

props.put("zookeeper.session.timeout.ms", "4000");

props.put("zookeeper.sync.time.ms", "200");

props.put("auto.commit.interval.ms", "1000");

props.put("auto.offset.reset", "smallest");

props.put("format", "binary");

props.put("auto.commit.enable", "true");

props.put("serializer.class", "kafka.serializer.StringEncoder");

ConsumerConfig consumerConfig = new ConsumerConfig(props);

this.consumer = Consumer.createJavaConsumerConnector(consumerConfig);

Map topicCountMap = new HashMap();

topicCountMap.put(kafkaTopic, new Integer(1));

Map>> consumerMap = consumer.createMessageStreams(topicCountMap);

KafkaStream stream = consumerMap.get(kafkaTopic).get(0);

ConsumerIterator it = stream.iterator();

while (it.hasNext()) {

String tmp = new String(it.next().message());

String fileContent = null;

if (!tmp.endsWith("\n"))

fileContent = new String (tmp + "\n");

else

fileContent = tmp;

if (isDebug) {

debug("receive: " + fileContent);

}

try {

hadoopFS = FileSystem.get(hdfsConf);

String fileName = "/" + hdfsDir + "/" +

(new SimpleDateFormat("yyyy-MM-dd").format(Calendar.getInstance().getTime())) + ".txt";

Path dst = new Path(fileName);

if (!hadoopFS.exists(dst)) {

FSDataOutputStream output = hadoopFS.create(dst);

output.close();

}

InputStream in = new ByteArrayInputStream(fileContent.getBytes("UTF-8"));

OutputStream out = hadoopFS.append(dst);

IOUtils.copyBytes(in, out, 4096, true);

} catch (IOException e) {

e.printStackTrace();

} finally {

try {

hadoopFS.close();

} catch (IOException e) {

e.printStackTrace();

}

}

}

consumer.shutdown();

}

private static void init(String[] args) {

kafkaHost = args[0];

kafkaGroup = args[1];

kafkaTopic = args[2];

hdfsUri = args[3];

hdfsDir = args[4];

if (args.length > 5) {

if (args[5].equals("true")) {

isDebug = true;

}

}

debug("初始化服务参数完毕,参数信息如下");

debug("KAFKA_HOST: " + kafkaHost);

debug("KAFKA_GROUP: " + kafkaGroup);

debug("KAFKA_TOPIC: " + kafkaTopic);

debug("HDFS_URI: " + hdfsUri);

debug("HDFS_DIRECTORY: " + hdfsDir);

debug("HADOOP_USER: " + hadoopUser);

debug("IS_DEBUG: " + isDebug);

}

private static void debug(String str) {

if (isDebug) {

System.out.println(str);

}

}

private static void useage() {

System.out.println("* kafka写入到hdfs的Java工具使用说明 ");

System.out.println("# java -cp kafkatohdfs.jar KafkaToHdfs KAFKA_HOST KAFKA_GROUP KAFKA_TOPIC HDFS_URI HDFS_DIRECTORY HADOOP_USER");

System.out.println("* 参数说明:");

System.out.println("* KAFKA_HOST : 代表kafka的主机名或IP:port,例如:namenode:2181,datanode1:2181,datanode2:2181");

System.out.println("* KAFKA_GROUP : 代表kafka的组,例如:test-consumer-group");

System.out.println("* KAFKA_TOPIC : 代表kafka的topic名称 ,例如:usertags");

System.out.println("* HDFS_URI : 代表hdfs链接uri ,例如:hdfs://namenode:9000");

System.out.println("* HDFS_DIRECTORY : 代表hdfs目录名称 ,例如:usertags");

System.out.println("* 可选参数:");

System.out.println("* IS_DEBUG : 代表是否开启调试模式,true是,false否,默认为false");

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值