flume+kafka+flink 实时处理日志 (单机)

一、准备环境

下载apache-flume-1.8.0-bin.tar.gz、kafka_2.11-0.11.0.3.tgz、flink-1.6.1-bin-hadoop26-scala_2.11.tgz、zookeeper-3.4.13.tar.gz

二、配置环境

1、安装flume

cd /usr/local
tar -zxvf apache-flume-1.8.0-bin.tar.gz
vim apache-flume-1.8.0/conf/kafka.properties

agent.sources = s1
agent.channels = c1
agent.sinks = k1

# 从指定文件读取数据
agent.sources.s1.type = exec
agent.sources.s1.command = tail -f /usr/local/test.log
agent.sources.s1.channels = c1

# 配置传输通道
agent.channels.c1.type = memory
agent.channels.c1.capacity = 10000
agent.channels.c1.transactionCapacity = 100

# 配置kafka接收数据
agent.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
agent.sinks.k1.brokerList = 192.168.56.101:9092
agent.sinks.k1.topic = test
agent.sinks.k1.serializer.class = kafka.serializer.StringEncoder
agent.sinks.k1.channel = c1

2、安装kafka

cd /usr/local
tar -zxvf kafka_2.11-0.11.0.3.tgz
vim kafka_2.11-0.11.0.3/config/server.properties

broker.id=1
advertised.listeners=PLAINTEXT://192.168.56.101:9092
zookeeper.connect=192.168.56.101:2181

3、安装flink

cd /usr/local
tar -zxvf flink-1.6.1-bin-hadoop26-scala_2.11.tgz

4、安装zookeeper

cd /usr/local
tar -zxvf zookeeper-3.4.13.tar.gz
vim zookeeper-3.4.13/conf/zoo.cfg

tickTime=2000 initLimit=10
syncLimit=5
dataDir=/tmp/zookeeper
clientPort=2181

三、启动

1、启动zk

cd /usr/local/zookeeper-3.4.13/bin
./zkServer.sh start &

2、启动kafka

cd /usr/local/kafka_2.11-0.11.0.3
./bin/kafka-server-start.sh config/server.properties &
./bin/kafka-console-producer.sh --broker-list 192.168.56.101:9092 --topic test

3、启动 flume

cd /usr/local/apache-flume-1.8.0
./bin/flume-ng agent --conf-file ./conf/kafka.properties -c conf/ --name agent -Dflume.root.logger=DEBUG,console

4、启动flink

cd /usr/local/flink-1.6.1
./bin/start-cluster.sh &

在浏览器中 输入 http://192.168.56.101:8081 可看到界面

5、IDEA 开发程序 新建maven 工程

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.tonytaotao</groupId>
    <artifactId>flink</artifactId>
    <version>1.0-SNAPSHOT</version>

    <description>flink 实战</description>

    <dependencies>

        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-java -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.6.1</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.11</artifactId>
            <version>1.6.1</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.11</artifactId>
            <version>1.6.1</version>
           <!-- <scope>provided</scope>-->
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka-0.11 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.11_2.11</artifactId>
            <version>1.6.1</version>
        </dependency>


        <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.25</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/log4j/log4j -->
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>

    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.2</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>


            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-dependency-plugin</artifactId>
                <executions>
                    <execution>
                        <id>copy-dependencies</id>
                        <phase>test</phase>
                        <goals>
                            <goal>copy-dependencies</goal>
                        </goals>
                        <configuration>
                            <outputDirectory>
                                target/classes/lib
                            </outputDirectory>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-jar-plugin</artifactId>
                <configuration>
                    <archive>
                        <manifest>
                            <addClasspath>true</addClasspath>
                            <mainClass>
                                com.tonytaotao.flink.FlinkKafka
                            </mainClass>
                            <classpathPrefix>lib/</classpathPrefix>
                        </manifest>
                        <manifestEntries>
                            <Class-Path>.</Class-Path>
                        </manifestEntries>
                    </archive>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>

FlinkKafka.java

package com.tonytaotao.flink;

import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;

import javax.annotation.Nullable;
import java.util.Properties;

public class FlinkKafka {

    public static void main(String[] args) throws Exception{

        // 引入Flink StreamExecutionEnvironment
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 设置监控数据流时间间隔(官方叫状态与检查点)
        env.enableCheckpointing(1000);

        // 配置kafka和zookeeper的ip和端口
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "192.168.56.101:9092");
        properties.setProperty("zookeeper.connect", "192.168.56.101:2181");
        properties.setProperty("group.id", "test");

        // 记载kafka和zookeeper的配置
        FlinkKafkaConsumer011<String> consumer = new FlinkKafkaConsumer011<String>("test", new SimpleStringSchema(), properties);

        consumer.assignTimestampsAndWatermarks(new LineSplitter());

        // 转换kafka数据类型为flink的dataStream类型
        DataStream<String> stream = env.addSource(consumer);
        stream.print();
        env.execute("WordCount from kafka data");
   }

    public static final class LineSplitter implements AssignerWithPunctuatedWatermarks<String> {

        private static final long serialVersionUID = 1L;


        @Nullable
        public Watermark checkAndGetNextWatermark(String arg0, long arg1) {
            if (null != arg0 && arg0.contains(",")) {
                String parts[] = arg0.split(",");
                return new Watermark(Long.parseLong(parts[0]));
            }
            return null;
        }

        public long extractTimestamp(String arg0, long arg1) {
            if (null != arg0 && arg0.contains(",")) {
                String parts[] = arg0.split(",");
                return Long.parseLong(parts[0]);
            }
            return 0;
        }
    }

}

6、打包程序为jar包,上传到服务器

cd /usr/local/flink-1.6.1
./bin/flink run -c com.tonytaotao.flink.FlinkKafak flink-1.0-SNAPSHOT.jar

7、在/usr/local/ 下新建test.log 文件,同时新建脚本 kafkaoutput.sh 文件,脚本内容为

for((i=0;i<=1000;i++));

do echo "kafka_test-"+$i>>/usr/local/test.log;

done

然后授权 chmod 777 kafkaoutput.sh

8、执行脚本,如果没有问题,则会在 /usr/local/flink-1.6.1/log/flink-root-taskexecutor-0-master.out 看到脚本生成的内容

转载于:https://my.oschina.net/TonyTaotao/blog/2979089

  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值