Flink之快速上手--2

1、创建一个maven 项目,添加依赖

 <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <flink-version>1.13.0</flink-version>
        <java-version>1.8</java-version>
        <scala.binary.version>2.12</scala.binary.version>
        <slf4j.version>1.7.30</slf4j.version>
    </properties>
<dependencies>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-java</artifactId>
        <version>${flink-version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
        <version>${flink-version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-clients_${scala.binary.version}</artifactId>
        <version>${flink-version}</version>
    </dependency>
    <dependency>
        <groupId>org.slf4j</groupId>
        <artifactId>slf4j-log4j12</artifactId>
        <version>${slf4j.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.logging.log4j</groupId>
        <artifactId>log4j-to-slf4j</artifactId>
        <version>2.14.0</version>
    </dependency>
</dependencies>

2、配置文件log4j.properties

log4j.rootLogger=error, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%-4r [%t] %-5p %c %x -  %m%n

3、创建一个input文件夹,以及一个words.txt

hello world
hello flink
hello java

4、批处理,注意导的包

package com.lx.wc;

import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.FlatMapOperator;
import org.apache.flink.api.java.operators.UnsortedGrouping;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;

/**
 * 批处理
 */
public class BatchWordCount  {
    public static void main(String[] args)  throws Exception{
        //1、创建执行的环境
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        //2、从文件中读取数据   得到数据源
        DataSource<String> lineDataSource = env.readTextFile("input/words.txt");
        //3、将每行数据进行分词,,转换成二元组的类型
        FlatMapOperator<String, Tuple2<String, Long>> wordAndOneTuple = lineDataSource.flatMap((String line, Collector<Tuple2<String, Long>> out) -> {
        //将一行文本进行分词
            String[] words = line.split(" ");
            for (String word : words) {
                //将每个分词转换成二元组输出返回
                out.collect(Tuple2.of(word, 1L));
            }
        }).returns(Types.TUPLE(Types.STRING, Types.LONG));
        //4、按照word进行分组
        UnsortedGrouping<Tuple2<String, Long>> wordAndOneGroup = wordAndOneTuple.groupBy(0);
        //5、分组内进行聚合统计
        AggregateOperator<Tuple2<String, Long>> sum = wordAndOneGroup.sum(1);
        sum.print();

    }

}

5、流处理(有界)

package com.lx.wc;

import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

/**
 * 流处理
 * 有界的
 */
public class BoundedStreamWordCount {
    public static void main(String[] args) throws Exception {
//        1、创建流式的执行环境(与批处理区别在于:环境的创建不同)
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//        2、读取文件
        DataStreamSource<String> lineDataStreamSource = env.readTextFile("input/words.txt");
//        3、进行转换
        SingleOutputStreamOperator<Tuple2<String,Long>> wordAndOneTuple = lineDataStreamSource.flatMap((String line, Collector<Tuple2<String,Long>> out) -> {
            //将一行文本进行分词
            String[] words = line.split(" ");
            for (String word : words) {
                //将每个分词转换成二元组输出返回
                out.collect(Tuple2.of(word, 1L));
            }
        })
                .returns(Types.TUPLE(Types.STRING, Types.LONG));
//        4、分组操作
        KeyedStream<Tuple2<String, Long>, String> wordAndOneKeyedStream = wordAndOneTuple.keyBy(data -> data.f0);
//         5、求和
        SingleOutputStreamOperator<Tuple2<String, Long>> sum = wordAndOneKeyedStream.sum(1);
//        6、打印输出
        sum.print();
//    每来一个数据,就执行的操作
//        7、启动执行
        env.execute();
//        下面结果前面的数字代表个人电脑内核的大小,假如是八核的,那么前面的数字就会是1-8之间的数字
    }
}

 

6、流处理(无界)

package com.lx.wc;

import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

/**
 * 真正的流处理
 * 无界处理
 */
public class StreamWordCount {
    public static void main(String[] args) throws Exception {
//     1、创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//      2、创建文本流   在服务器上部署navicat ,进行连接。在这里我就不展示了,只把核心代码展示一下,当然在这里只是把他写死了,平常在项目文件里面通过配置文件进行读取或者设置参数的形式进行
//        DataStreamSource<String> lineDataStram = env.socketTextStream("Hadoop02", 222);
//        从参数中提取主机号和端口号  需要配置program  arguments:--host hadoop012  --port 222
        ParameterTool parameterTool = ParameterTool.fromArgs(args);
        String hostName=parameterTool.get("host");
        Integer port=parameterTool.getInt("port");
        DataStreamSource<String> lineDataStram = env.socketTextStream("hostName", port);

//        3、进行转换
        SingleOutputStreamOperator<Tuple2<String,Long>> wordAndOneTuple = lineDataStram .flatMap((String line, Collector<Tuple2<String,Long>> out) -> {
                    //将一行文本进行分词
                    String[] words = line.split(" ");
                    for (String word : words) {
                        //将每个分词转换成二元组输出返回
                        out.collect(Tuple2.of(word, 1L));
                    }
                })
                .returns(Types.TUPLE(Types.STRING, Types.LONG));
//        4、分组操作
        KeyedStream<Tuple2<String, Long>, String> wordAndOneKeyedStream = wordAndOneTuple.keyBy(data -> data.f0);
//         5、求和
        SingleOutputStreamOperator<Tuple2<String, Long>> sum = wordAndOneKeyedStream.sum(1);
//        6、打印输出
        sum.print();
//    每来一个数据,就执行的操作
//        7、启动执行
        env.execute();
//        启动成功之后,在服务里面进行输入 比如hello world,这边控制就会解析出你输入的信息,跟有界流输出的结果是一个样子

    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值