Flink批处理练习

idea、Flink、maven

 

 

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>groupId</groupId>
    <artifactId>Flink</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
            <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
            <flink.version>1.7.2</flink.version>
            <java.version>1.8</java.version>
            <scala.binary.version>2.11</scala.binary.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.11</version>
            <scope>test</scope>
        </dependency>
<!--https://mvnrepository.com/artifact/org.apache.flink/flink-java-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.7.2</version>
            <!--下面命令provided只有在编译的时候才会生效,运行和打包的时候不使用-->
            <!--<scope>provided</scope>-->
        </dependency>
<!--https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
            <!--<scope>provided</scope>-->
        </dependency>
    </dependencies>
        <build>
            <pluginManagement>
                <plugins>
                    <!--java编译的插件-->
                    <plugin>
                        <groupId>org.apache.maven.plugins</groupId>
                        <artifactId>maven-compiler-plugin</artifactId>
                        <version>3.6.0</version>
                        <configuration>
                            <!--指定JDK的编译版本-->
                            <source>${java.version}</source>
                            <target>${java.version}</target>
                            <encoding>UTF-8</encoding>
                        </configuration>
                    </plugin>
                    <!--打jar包插件(会包含所有依赖)-->
                    <plugin>
                        <groupId>org.apache.maven.plugins</groupId>
                        <artifactId>maven-assembly-plugin</artifactId>
                        <version>2.6</version>
                        <configuration>
                            <descriptorRefs>
                                <descriptorRef>jar-with-dependencies</descriptorRef>
                            </descriptorRefs>
                            <archive>
                                <manifest>
                                    <!--可以设置jar包的入口类(可选)-->
                                    <mainClass>batch_example.SocketWindowWordCountJava</mainClass>
                                </manifest>
                            </archive>
                        </configuration>
                        <executions>
                            <execution>
                                <id>make-assembly</id>
                                <phase>package</phase>
                                <goals>
                                    <goal>single</goal>
                                </goals>
                            </execution>
                        </executions>
                    </plugin>

                </plugins>
            </pluginManagement>
        </build>
</project>

代码:

package Flink_Batch;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.AggregateOperator;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;

public class FlinkBach {
    //单词统计
    public static void main(String[] args)throws Exception{
        //获取flink的运行环境
        ExecutionEnvironment env=ExecutionEnvironment.getExecutionEnvironment();
        //指定输入文件路径
        String inPath="E:\\Flink\\src\\main\\java\\Flink_Batch\\word.txt";
        //指定输出文件路径
        String outPath="E:\\Flink\\src\\main\\java\\Flink_Batch\\out.txt";

        DataSource<String> textFile = env.readTextFile(inPath);
        AggregateOperator<Tuple2<String,Integer>> sumData = textFile.flatMap(new FlatMapFunction<String, Tuple2<String,Integer>>() {
            @Override
            public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
                    String[] splited =s.split("\\W+");
                    for(String word : splited){
                        collector.collect(new Tuple2<>(word,1));
                    }
            }
        }).groupBy(0).sum(1);

        sumData.print();

        //写入保存
        sumData.writeAsCsv(outPath,"\n","\t", org.apache.flink.core.fs.FileSystem.WriteMode.OVERWRITE).setParallelism(1);

        env.execute("FlinkBach");
    }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值