项目支持实时,主要实时把数据存到redis中,并可以进行geo查询
环境(maven管理):
<dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>2.10.6</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.10</artifactId><version>1.6.1</version></dependency><dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.6.4</version></dependency><dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming_2.10</artifactId> <version>1.6.1</version>
</dependency><dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka_2.10</artifactId> <version>1.6.1</version></dependency><dependency> <groupId>redis.clients</groupId> <artifactId>jedis</artifactId> <version>2.8.1</version></dependency>
代码如下:
import redis.clients.jedis.{JedisPoolConfig, JedisPool, Jedis} /** * Created by Administrator on 2016/8/5. */ object RedisUtils { def getJedis(): Jedis={ //jedis配置 var pool: JedisPool = null val config: JedisPoolConfig = new JedisPoolConfig config.setMaxIdle(1000) config.setMaxTotal(10240) if (pool == null) { pool = new JedisPool(config, "192.168.199.7", 6379, 0, "123") } pool.getResource } }
只能做简单空间距离查询。希望能用上。import org.apache.spark.{HashPartitioner, SparkConf} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream} import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.streaming.{Seconds, StreamingContext} /** * Created by Administrator on 2016/8/4. */ object KafkaStreamingHandle { val updateFunc=(iter:Iterator[(String,Seq[Int],Option[Int])]) =>{ iter.flatMap{case (x,y,z) => Some(y.sum+z.getOrElse(0)).map(i => (x,i))} } def main(args: Array[String]) { LoggerLevels.setStreamingLogLevels() //转入参数 //zkQurnum,group,topic,numThreads //shizhanmini:2181,mini02:2181,mini03:2181 pp4 test3 2 var Array(zkQurnum,group,topic,numThreads)=args //创建sparkconf 测试本地模式 val sparkConf= new SparkConf().setAppName("ksh") .setMaster("local[2]") //创建流式处理对象 val ssc= new StreamingContext(sparkConf,Seconds(3)) ssc.checkpoint("c://ck2") //将topic转为map val topicMap= topic.split(",").map((_,numThreads.toInt)).toMap //根据参数,创建Stream val lines= KafkaUtils.createStream(ssc,zkQurnum,group,topicMap,StorageLevel.MEMORY_AND_DISK).map(_._2) val users = lines.map(x => (x.split(",")(3),x.split(",")(7),x.split(",")(8))) users.print() users.foreachRDD(rdd => { rdd.foreachPartition(par =>{ if(!par.isEmpty) { par.foreach(pair => { val qw= pair._1 val vname = pair._2 val ls = pair._3 val lat=ls.split(" ")(0) val lon=ls.split(" ")(1) val jedis= RedisUtils.getJedis() // return jedis.eval("return redis.call('GEOADD',KEYS[1],KEYS[2],KEYS[3],KEYS[4])", 4, key, String.valueOf(longitude), String.valueOf(latitude), dName).asInstanceOf[Long] jedis.eval("return redis.call('GEOADD',KEYS[1],KEYS[2],KEYS[3],KEYS[4])",4,"Guangdong",String.valueOf(lat),String.valueOf(lon),vname) }) } }) }) ssc.start() ssc.awaitTermination() } }