要求:将nc -lk 9999指令用Java的ServerSocket实现:即通过Java程序实现ServerSocket的9999端监听,作为Spark Streaming的Socket流,用户可以通过控制台输入信息后发送给Sparkstreaming进行处理。
import java.io.*;
import java.net.ServerSocket;
import java.net.Socket;
public class SparkStreamingTest {
public static void main(String args[]) {
sendTCP();
}
public static void sendTCP() {
try {
int port = 9999;
ServerSocket server = new ServerSocket(port);
//只有服务器被客户端连接后才会执行后面的语句
System.out.println("服务器正在监听");
Socket client = server.accept();
System.out.println(client.getInetAddress() + "已建立连接! ");
//输入流
// InputStream is = client.getInputStream();
//使用System.in 创建BufferedReader
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
//输出流
OutputStream os = client.getOutputStream();
PrintWriter pw = new PrintWriter(os,true);
System.out.println("Enter lines of text.");
String str;
do {
str = br.readLine();
// PrintWriter把数据写到目的地
pw.println(str);
System.out.println(str);
} while (!str.equals("end"));
} catch (Exception e) {
System.out.println("connection exit!");
System.out.println();
}
}
}
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
/**
* @官方入门案例a-quick-example: http://spark.apache.org/docs/latest/streaming-programming-guide.html#a-quick-example
*/
object NetworkWordCount {
def main(args: Array[String]): Unit = {
//0编程入口StreamingContext:A StreamingContext object can be created from a SparkConf object.
val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
//实时数据分析环境对象
//以指定的时间为周期采集实时数据
val ssc = new StreamingContext(conf, Seconds(4))
// 1.Define the input sources by creating input DStreams.
// Create a DStream that will connect to hostname:port, like localhost:9999
//从指定端口中采集数据
val lines:ReceiverInputDStream[String] = ssc.socketTextStream("127.0.0.1", 9999)
// 2.Define the streaming computations by applying transformation and output operations to DStreams.
// Split each line into words
//将采集的数据进行分解
val words = lines.flatMap(_.split(" "))
import org.apache.spark.streaming.StreamingContext._ // not necessary since Spark 1.3
// Count each word in each batch
//将转换结构后的数据进行聚合处理map
val pairs = words.map(word => (word, 1))
val wordCounts = pairs.reduceByKey(_ + _)
// Print the first ten elements of each RDD generated in this DStream to the console
wordCounts.print()
// 3.Start receiving data and processing it using streamingContext.start().
ssc.start() // Start the computation
// 4. Wait for the processing to be stopped (manually or due to any error) using streamingContext.awaitTermination().
ssc.awaitTermination() // Wait for the computation to terminate
// 5不会被执行. The processing can be manually stopped using streamingContext.stop().
ssc.stop(true)
}
}