import java.io.PrintWriter
import java.text.SimpleDateFormat
import java.util.Date
import scala.collection.mutable.ArrayBuffer
import scala.io.Source
import scala.util.Random
/**
* Created by sicong on 16/9/19.
*/objectmy {def main(args: Array[String]): Unit = {
val sourceFileLines = Source.fromFile("/Users/sicong/sparkStream/data.txt")
val dataLines =sourceFileLines.getLines().toList
val datalenth = dataLines.length
while (true) {
val randomMu = (new util.Random).nextInt(datalenth)
val randomList = randomNew(randomMu, datalenth)
var syt = "";
for (i <- 0 to (randomList.length - 1)) {
syt += (dataLines(randomList(i)) + "\n")
}
val dataRaw = new PrintWriter("/Users/sicong/sparkStream/data/"+getNowDate()+"txt")
dataRaw.print(syt)
dataRaw.close()
Thread.sleep(1000)
}
}
//生成时间字符串def getNowDate():String={
var now:Date = new Date()
var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyyMMddHHmmss")
var hehe = dateFormat.format( now )
hehe
}
//生成随机的长度的List聚合def randomNew(n:Int,x:Int)={
var resultList:List[Int]=Nil
while(resultList.length<n){
val randomNum=(new Random).nextInt(x)
if(!resultList.exists(s=>s==randomNum)){
resultList=resultList:::List(randomNum)
}
}
resultList
}
}
sparkstreaming 监测目前的文件的变化 来进行监测进来的ip量
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext}
import scala.util.parsing.json.JSON/*
* Created by sicong on 16/9/19.
*/
object sparkHDFLdata {
def main(args: Array[String]): Unit = {
val sparkconf = new SparkConf().setAppName("hello").setMaster("local[4]")
val scc = new StreamingContext(sparkconf,Seconds(10))//这里是没十秒的数据监测一次 十秒内如果有达到ip请求为n的你可以将其置为攻击ip
val data =scc.textFileStream("file:Users/sicong/sparkStream/data/")
val lines = data.flatMap(_.split("\n"))//以每行分隔
val dataOfRaw = lines.map(x=>x.split(" ")).map(x=>(x(0),1))//以每个空格切分成数组
val dataOfReduce =dataOfRaw.reduceByKey(_+_).map(x=>(x._2,x._1))//将reduce完的数据进行位置的置换 方便排序
val dataOfSort =dataOfReduce.transform(x=>x.sortByKey(false)) //将数据进行排序 按照key降序
dataOfSort.saveAsTextFiles("file:Users/sicong/sparkStream/out") //将最终数据保存到文件夹
scc.start()
scc.awaitTermination()
}
}