(二) flink 快速入门之 wordcount scala/java 代码

目录

flink wordcount scala代码

pom配置

批处理wordcount

流处理wordcout

flink wordcount java代码

pom依赖

批处理wordcount

流处理wordCount


flink wordcount scala代码

pom配置

<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-scala_2.12</artifactId>
    <version>1.12.1</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala -->
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-streaming-scala_2.12</artifactId>
    <version>1.12.1</version>
    <scope>provided</scope>
</dependency>
注:flink1.11及以上版本,需要加上flink-client不然执行DataSteam API时报错 java.lang.IlleagalStateException: No ExecutorFactory found to execute the application
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-clients_2.12</artifactId>
    <version>1.12.1</version>
</dependency>

批处理wordcount

import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.api.scala._
/**批处理 wordcount*/
object WordCount {

  def main(args: Array[String]):Unit={

   val env= ExecutionEnvironment.getExecutionEnvironment
   val dataset=env.readTextFile("src\\main\\resources\\hadoop.txt");
   val wordCountDs=dataset
     .flatMap(_.split(" "))
     .map((_,1)) //变成k,v结构
     .groupBy(0)//直接flatmap后没办法groupBy,因为不是k,v结构,传二元组中的位置
     .sum(1)//sum(),传二元组中的位置
    wordCountDs.print();
  }
}

流处理wordcout

import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.api.scala._
object StreamWordCount {

  def main(args: Array[String]): Unit = {

    val env=StreamExecutionEnvironment.getExecutionEnvironment
    val dataStreamDatasource=env.socketTextStream("192.168.101.51",7777)
    val dataStream=dataStreamDatasource.flatMap(_.split(",")).map((_,1)).keyBy(0).sum(1)
    dataStream.print()
    env.execute("STREAM") //StreamExecutionEnvironment 要加execute,不然不执行
  }

}

输出:

flink wordcount java代码

pom依赖

 <properties>
        <flink.version>1.12.0</flink.version>
        <java.version>1.8</java.version>
        <scala.binary.version>2.11</scala.binary.version>
        <slf4j.version>1.7.30</slf4j.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-runtime-web_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>${slf4j.version}</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>${slf4j.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-to-slf4j</artifactId>
            <version>2.14.0</version>
        </dependency>
    </dependencies>

批处理wordcount

    public static void getWordCountBatch() throws Exception {
       final ExecutionEnvironment env= ExecutionEnvironment.getExecutionEnvironment();
        DataSource<String> dataSource= env.readTextFile("D:\\flink2021\\src\\main\\resources\\wordcount");
        FlatMapOperator<String,String> flatMapOperator=dataSource.flatMap(new myFlatMapFunction());
//        MapOperator<String,Tuple2<String,Integer>> mapOperator =
//                flatMapOperator.map(new MapFunction<String, Tuple2<String,Integer>>() {
//            @Override
//            public Tuple2<String, Integer> map(String value) throws Exception {
//                return Tuple2.of(value,1);
//            }
//        });
        MapOperator<String,Tuple2<String,Integer>> mapOperator =
                flatMapOperator.map((MapFunction<String, Tuple2<String,Integer>>) value-> {return Tuple2.of(value,1);}).returns(Types.TUPLE(Types.STRING,Types.INT));
        //groupBy
        UnsortedGrouping<Tuple2<String,Integer>> group=mapOperator.groupBy(0);

        AggregateOperator<Tuple2<String, Integer>> result = group.sum(1);

        result.print();


    }

    public static class myFlatMapFunction implements FlatMapFunction<String,String>{
        @Override
        public void flatMap(String in, Collector<String> out) throws Exception {
            String[] keys=in.split(" ");
            for (String key : keys) {
                out.collect(key);
            }
        }
    }

流处理wordCount

  public static void getWordCountStream() throws Exception {

        StreamExecutionEnvironment env= StreamExecutionEnvironment.getExecutionEnvironment();

        DataStreamSource<String>  datasource = env.readTextFile("D:\\flink2021\\src\\main\\resources\\wordcount");

        SingleOutputStreamOperator<Tuple2<String, Integer>> flatMap = datasource.flatMap(new flatMapFunc());

        KeyedStream<Tuple2<String, Integer>, String> keyBy = flatMap.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
            @Override
            public String getKey(Tuple2<String, Integer> value) throws Exception {
                return value.f0;
            }
        });

        keyBy.sum(1).print();
        env.execute();
    }

    public static class flatMapFunc implements  FlatMapFunction<String,Tuple2<String,Integer>>{

        @Override
        public void flatMap(String in, Collector<Tuple2<String, Integer>> out) throws Exception {

           String[] words= in.split(" ");
            for (String word : words) {
                out.collect(new Tuple2<>(word,1));
            }

        }
    }

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值