一、maven依赖
<properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <flink.version>1.6.3</flink.version> <java.version>1.8</java.version> <scala.version>2.11.8</scala.version> <hbase.version>1.2.4</hbase.version> <scala.binary.version>2.11</scala.binary.version> <maven.compiler.source>${java.version}</maven.compiler.source> <maven.compiler.target>${java.version}</maven.compiler.target> </properties>
<dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_${scala.binary.version}</artifactId> <version>${flink.version}</version> </dependency>
二、本地执行
import org.apache.flink.api.common.functions.FilterFunction; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.common.JobExecutionResult; import org.apache.flink.api.java.ExecutionEnvironment;
public class FlinkReadTextFile { public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(); DataSet<String> data = env.readTextFile("file:///Users/***/Documents/test.txt"); data.filter(new FilterFunction<String>() { @Override public boolean filter(String value) throws Exception { return value.startsWith("五芳斋美"); } }) .writeAsText("file:///Users/***/Documents/test01.txt"); JobExecutionResult res = env.execute(); } }
三、
import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.api.scala._
object SocketWindowWordCount { /** Main program method */ def main(args: Array[String]): Unit ={ // the port to connect to // val port: Int = try { // ParameterTool.fromArgs(args).getInt("port") // } catch { // case e: Exception => { // System.err.println("No port specified. Please run 'SocketWindowWordCount --port <port>'") // return // } // } // get the execution environment val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment // get input data by connecting to the socket val text = env.socketTextStream("localhost", 9000, '\n') // parse the data, group it, window it, and aggregate the counts val windowCounts = text .flatMap { w => w.split("\\s") } .map { w => WordWithCount(w, 1) } .keyBy("word") .timeWindow(Time.seconds(5), Time.seconds(1)) .sum("count") // print the results with a single thread, rather than in parallel windowCounts.print().setParallelism(1) env.execute("Socket Window WordCount") } // Data type for words with count case class WordWithCount(word: String, count: Long) }