注意事项
* 1、监控目录下的文件的格式必须是统一的
* 2、不支持嵌入文件目录
* 3、一旦文件移动到这个监控目录下,是不能变的,往文件中追加的数据是不会被读取的
编写spark程序,实时读取某个目录的数据
package cn.taobao;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import scala.Tuple2;
import java.util.Arrays;
import java.util.Iterator;
import java.util.regex.Pattern;
public class hdfs_streaming_Test {
public static void main(String[] args) {
// StreamingContext 编程入口
JavaStreamingContext ssc = new JavaStreamingContext(
"local[2]",
"JavaLocalNetworkWordCount",
Durations.seconds(3),
System.getenv("SPARK_HOME"),
JavaStreamingContext.jarOfClass(hdfs_streaming_Test.class.getClass()));
ssc.sparkContext().setLogLevel("ERROR");
//从某个文件目录中读取
JavaDStream<String> lines = ssc.textFileStream("file:///C:/Users/need/Desktop/ha");
JavaDStream<String> flatMapDStream = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterator<String> call(String x) throws Exception {
String[] s1 = Pattern.compile(" ").split(x);
return Arrays.asList(s1).iterator();
}
});
JavaPairDStream<String, Integer> mapToPairDStream = flatMapDStream.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(String s) throws Exception {
return new Tuple2<>(s, 1);
}
});
JavaPairDStream<String, Integer> reduceDStream = mapToPairDStream.reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer i1, Integer i2) throws Exception {
return i1 + i2;
}
});
//将结果输出到控制台
reduceDStream.print();
//显式的启动数据接收
ssc.start();
try {
//来等待计算完成
ssc.awaitTermination();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
用另个程序模拟往目录中添加文件【可选,方便测试】
package cn.taobao;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
public class XieFile {
public static void main(String[] args) {
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd_HHmmss");
String fileName="C:\\Users\\need\\Desktop\\ha\\runoob"+simpleDateFormat.format(new Date()) +".txt";
System.out.println(fileName);
try {
File file = new File(fileName);
if (!file.exists())
{
file.createNewFile();
}
BufferedWriter out = new BufferedWriter(new FileWriter(file,true));
for (int i = 1; i <1000 ; i++) {
out.write("菜鸟教程"+(i%10)+" ");
}
out.close();
System.out.println("文件创建成功!");
} catch (IOException e) {
e.printStackTrace();
}
}
}