flume-NG采用syslogtcp方式监听端

一、采集说明 
本采集方式使用socket通信的方式,向flume发送数据进行采集,采集后的数据存储到hdfs和kafka。 
二、配置文件 
1、source采集层 

port.properties

# set agent name
agent.sources = r1
agent.channels = c_kafka c_hdfs  
agent.sinks = s_kafka_k1 s_kafka_k2 s_kafka_k3 s_hdfs_k1 s_hdfs_k2

# set group
agent1.sinkgroups = g_kafka g_hdfs

# set sources
# 采用tcp的方式收集数据
agent.sources.r1.type =syslogtcp
agent.sources.r1.bind=10.0.2.6
agent.sources.r1.port=44444
agent.sources.r1.channels =c_kafka c_hdfs

# set kafka channels
agent.channels.c_kafka.type = file
agent.channels.c_kafka.checkpointDir = /usr/local/flume-1.7.0-bin/spool/checkpoint
agent.channels.c_kafka.dataDirs = /usr/local/flume-1.7.0-bin/spool/data
agent.channels.c_kafka.capacity = 200000000
agent.channels.c_kafka.keep-alive = 30
agent.channels.c_kafka.write-timeout = 30
agent.channels.c_kafka.checkpoint-timeout=600

# set hdfs channels
agent.channels.c_hdfs.type = memory
agent.channels.c_hdfs.capacity = 100000
agent.channels.c_hdfs.transactionCapacity = 1000

# set kafka sink1
agent.sinks.s_kafka_k1.channel = c_kafka
agent.sinks.s_kafka_k1.type = avro
agent.sinks.s_kafka_k1.hostname = 10.0.2.8
agent.sinks.s_kafka_k1.port = 52021

# set kafka sink2
agent.sinks.s_kafka_k2.channel = c_kafka
agent.sinks.s_kafka_k2.type = avro
agent.sinks.s_kafka_k2.hostname = 10.0.2.9
agent.sinks.s_kafka_k2.port = 52021

# set kafka sink3
agent.sinks.s_kafka_k3.channel = c_kafka
agent.sinks.s_kafka_k3.type = avro
agent.sinks.s_kafka_k3.hostname = 10.0.2.10
agent.sinks.s_kafka_k3.port = 52021

# set hdfs sink1
agent.sinks.s_hdfs_k1.channel = c_hdfs
agent.sinks.s_hdfs_k1.type = avro
agent.sinks.s_hdfs_k1.hostname = 10.0.2.7
agent.sinks.s_hdfs_k1.port = 52020

# set hdfs sink2
agent.sinks.s_hdfs_k1.channel = c_hdfs
agent.sinks.s_hdfs_k1.type = avro
agent.sinks.s_hdfs_k1.hostname = 10.0.2.8
agent.sinks.s_hdfs_k1.port = 52020

# set sink group
agent.sinkgroups.g_kafka.sinks = s_kafka_k1 s_kafka_k2 s_kafka_k3
agent.sinkgroups.g_hdfs.sinks = s_hdfs_k1 s_hdfs_k2

# set failover_kafka
agent.sinkgroups.g_kafka.processor.type = failover
agent.sinkgroups.g_kafka.processor.priority.s_kafka_k1 = 1
agent.sinkgroups.g_kafka.processor.priority.s_kafka_k2 = 10
agent.sinkgroups.g_kafka.processor.priority.s_kafka_k3 = 100
agent.sinkgroups.g_kafka.processor.maxpenalty = 10000

# set failover_hdfs
agent.sinkgroups.g_hdfs.processor.type = failover
agent.sinkgroups.g_hdfs.processor.priority.s_hdfs_k1 = 1
agent.sinkgroups.g_hdfs.processor.priority.s_kafka_k2 = 10
agent.sinkgroups.g_hdfs.processor.maxpenalty = 10000

2、sink数据落地层 
配置文件详见我的其它博客

三、java发送数据代码

import java.io.OutputStream;
import java.net.Socket;

/**
 * 数据写入flume
 * Created by home on 2017/7/18.
 */
public class Flume {
    static Socket s = null;
    static OutputStream out = null;

    public void dataToFlume(String line) {
        try {
            s = new Socket("192.168.183.101", 44444);
            writeData(line);
        } catch (Exception e) {
            dataToFlume1(line);
            System.out.println("----------------------------------------------------");
            System.out.println("192.168.183.101 lost connect!");
            e.printStackTrace();
            System.out.println("----------------------------------------------------");
        }
    }

    private void dataToFlume1(String line) {
        try {
            s = new Socket("192.168.183.102", 44444);
            writeData(line);
        } catch (Exception e) {
            dataToFlume2(line);
            System.out.println("----------------------------------------------------");
            System.out.println("192.168.183.102 lost connect!");
            e.printStackTrace();
            System.out.println("----------------------------------------------------");
        }
    }

    private void dataToFlume2(String line) {
        try {
            s = new Socket("192.168.183.103", 44444);
            writeData(line);
        } catch (Exception e) {
            System.out.println("----------------------------------------------------");
            System.out.println("192.168.183.103 lost connect!");
            e.printStackTrace();
            System.out.println("----------------------------------------------------");
        }
    }

    private void writeData(String line) {
        try {
            out = s.getOutputStream();
            out.write((line + "\n").getBytes());
            out.flush();
            out.close();
            s.close();
        } catch (Exception e) {
            System.out.println("----------------------------------------------------");
            System.out.println("Data write failed!");
            e.printStackTrace();
            System.out.println("----------------------------------------------------");
        }
    }
}

测试代码

/**
 * Created by home on 2017/7/19.
 */
public class TestFlume {
    public static void main(String args[]){
        String line="hello word!";
        Flume fl = new Flume();
        fl.dataToFlume(line);
    }
}

如果,你想防止flume宕机不丢失数据的话,可以在java数据写入flume失败的情况下,把使用java代码将文件写到本地磁盘上,使用spooldir或exec的flume采集方式搭配使用。

阅读更多

扫码向博主提问

菜鸟级的IT之路

非学,无以致疑;非问,无以广识
  • 擅长领域:
  • Hadoop
  • Spark
  • Java后端
  • HBase
去开通我的Chat快问
想对作者说点什么?

博主推荐

换一批

没有更多推荐了,返回首页