这里用到了json4s,具体的用法,可以参考github中的说明,地址https://github.com/json4s/json4s
还用到了Scala操作redis的一个库,具体的用法,可以参考github中的说明,地址https://github.com/etaty/rediscala
测试环境如果需要安装redis,可以用yum进行安装,安装方式说明如下
yum install tcl
yum install epel-release
yum install redis
#修改一下/etc/redis.conf将bind设置为ip,而不是127.0.0.1否则分布式上不好用哦
service redis start
yum install tcl
yum install epel-release
yum install redis
#修改一下/etc/redis.conf将bind设置为ip,而不是127.0.0.1否则分布式上不好用哦
service redis start
然后附上对应的编译的sbt代码,build.sbt由于不需要经常改它,这里包含了很多没用到的扩展jar包
name := "SparkSqlToRedis"
version := "1.0"
scalaVersion := "2.10.4"
resolvers += "rediscala" at "http://dl.bintray.com/etaty/maven"
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "1.5.1" % "provided",
"org.apache.spark" %% "spark-sql" % "1.5.1" % "provided",
"org.apache.spark" %% "spark-hive" % "1.5.1" % "provided",
"org.apache.spark" %% "spark-streaming" % "1.5.1" % "provided",
"org.apache.spark" %% "spark-streaming-kafka" % "1.5.1",
"com.etaty.rediscala" %% "rediscala" % "1.5.0",
"mysql" % "mysql-connector-java" % "5.1.37",
"org.json4s" %% "json4s-native" % "3.2.10",
"org.json4s" %% "json4s-jackson" % "3.2.10"
)
mergeStrategy in assembly := {
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard
case "log4j.properties" => MergeStrategy.discard
case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines
case "reference.conf" => MergeStrategy.concat
case _ => MergeStrategy.first
}
name := "SparkSqlToRedis"
version := "1.0"
scalaVersion := "2.10.4"
resolvers += "rediscala" at "http://dl.bintray.com/etaty/maven"
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "1.5.1" % "provided",
"org.apache.spark" %% "spark-sql" % "1.5.1" % "provided",
"org.apache.spark" %% "spark-hive" % "1.5.1" % "provided",
"org.apache.spark" %% "spark-streaming" % "1.5.1" % "provided",
"org.apache.spark" %% "spark-streaming-kafka" % "1.5.1",
"com.etaty.rediscala" %% "rediscala" % "1.5.0",
"mysql" % "mysql-connector-java" % "5.1.37",
"org.json4s" %% "json4s-native" % "3.2.10",
"org.json4s" %% "json4s-jackson" % "3.2.10"
)
mergeStrategy in assembly := {
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard
case "log4j.properties" => MergeStrategy.discard
case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines
case "reference.conf" => MergeStrategy.concat
case _ => MergeStrategy.first
}
然后是对应的具体的统计的代码实现,也非常的简单HiveFromSparkToRedis.scala
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql._
import org.apache.spark.sql.hive.HiveContext
import redis.RedisClient
import scala.concurrent.Await
import scala.concurrent.duration._
import scala.concurrent.ExecutionContext.Implicits.global
import org.json4s._
import org.json4s.jackson.JsonMethods._
import org.json4s.JsonDSL._
object HiveFromSparkToRedis {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("HiveFromSparkToRedis")
val sc = new SparkContext(sparkConf)
val hiveContext = new HiveContext(sc)
import hiveContext.implicits._
import hiveContext.sql
sql("SELECT name,age FROM wyp where age>10").collect().foreach(rs => insertRedis(rs.getString(0), rs.getInt(1)))
sc.stop()
}
def insertRedis(name: String, age: Int) {
implicit val akkaSystem = akka.actor.ActorSystem()
val redis = RedisClient("192.168.163.214", 6379)
val json = ("name" -> name) ~ ("age" -> age.toString)
val futurePong = redis.set("dbsave", compact(render(json)))
Await.result(futurePong, 1 seconds)
akkaSystem.shutdown()
}
}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql._
import org.apache.spark.sql.hive.HiveContext
import redis.RedisClient
import scala.concurrent.Await
import scala.concurrent.duration._
import scala.concurrent.ExecutionContext.Implicits.global
import org.json4s._
import org.json4s.jackson.JsonMethods._
import org.json4s.JsonDSL._
object HiveFromSparkToRedis {
def main(args: Array[String]) {
val sparkConf = new SparkConf().setAppName("HiveFromSparkToRedis")
val sc = new SparkContext(sparkConf)
val hiveContext = new HiveContext(sc)
import hiveContext.implicits._
import hiveContext.sql
sql("SELECT name,age FROM wyp where age>10").collect().foreach(rs => insertRedis(rs.getString(0), rs.getInt(1)))
sc.stop()
}
def insertRedis(name: String, age: Int) {
implicit val akkaSystem = akka.actor.ActorSystem()
val redis = RedisClient("192.168.163.214", 6379)
val json = ("name" -> name) ~ ("age" -> age.toString)
val futurePong = redis.set("dbsave", compact(render(json)))
Await.result(futurePong, 1 seconds)
akkaSystem.shutdown()
}
}
然后是相关的提交到分布式系统的命令
/usr/local/spark-1.5.1-bin-hadoop2.6/bin/spark-submit –class HiveFromSparkToRedis –master spark://namenode:7077 –executor-memory 1g /home/hadoop/test_sparktoredis/target/scala-2.10/SparkSqlToRedis-assembly-1.0.jar
最后附上一个完整的测试代码,包含目录结构的test_sparktoredis.tar
做一个简单的笔记