sparksql到redis到mysql_spark查询hive结果json化保存到redis提供接口查询

这里用到了json4s,具体的用法,可以参考github中的说明,地址https://github.com/json4s/json4s

还用到了Scala操作redis的一个库,具体的用法,可以参考github中的说明,地址https://github.com/etaty/rediscala

测试环境如果需要安装redis,可以用yum进行安装,安装方式说明如下

yum install tcl

yum install epel-release

yum install redis

#修改一下/etc/redis.conf将bind设置为ip,而不是127.0.0.1否则分布式上不好用哦

service redis start

yum install tcl

yum install epel-release

yum install redis

#修改一下/etc/redis.conf将bind设置为ip,而不是127.0.0.1否则分布式上不好用哦

service redis start

然后附上对应的编译的sbt代码,build.sbt由于不需要经常改它,这里包含了很多没用到的扩展jar包

name := "SparkSqlToRedis"

version := "1.0"

scalaVersion := "2.10.4"

resolvers += "rediscala" at "http://dl.bintray.com/etaty/maven"

libraryDependencies ++= Seq(

"org.apache.spark" %% "spark-core" % "1.5.1" % "provided",

"org.apache.spark" %% "spark-sql" % "1.5.1" % "provided",

"org.apache.spark" %% "spark-hive" % "1.5.1" % "provided",

"org.apache.spark" %% "spark-streaming" % "1.5.1" % "provided",

"org.apache.spark" %% "spark-streaming-kafka" % "1.5.1",

"com.etaty.rediscala" %% "rediscala" % "1.5.0",

"mysql" % "mysql-connector-java" % "5.1.37",

"org.json4s" %% "json4s-native" % "3.2.10",

"org.json4s" %% "json4s-jackson" % "3.2.10"

)

mergeStrategy in assembly := {

case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard

case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard

case "log4j.properties" => MergeStrategy.discard

case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines

case "reference.conf" => MergeStrategy.concat

case _ => MergeStrategy.first

}

name := "SparkSqlToRedis"

version := "1.0"

scalaVersion := "2.10.4"

resolvers += "rediscala" at "http://dl.bintray.com/etaty/maven"

libraryDependencies ++= Seq(

"org.apache.spark" %% "spark-core" % "1.5.1" % "provided",

"org.apache.spark" %% "spark-sql" % "1.5.1" % "provided",

"org.apache.spark" %% "spark-hive" % "1.5.1" % "provided",

"org.apache.spark" %% "spark-streaming" % "1.5.1" % "provided",

"org.apache.spark" %% "spark-streaming-kafka" % "1.5.1",

"com.etaty.rediscala" %% "rediscala" % "1.5.0",

"mysql" % "mysql-connector-java" % "5.1.37",

"org.json4s" %% "json4s-native" % "3.2.10",

"org.json4s" %% "json4s-jackson" % "3.2.10"

)

mergeStrategy in assembly := {

case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard

case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard

case "log4j.properties" => MergeStrategy.discard

case m if m.toLowerCase.startsWith("meta-inf/services/") => MergeStrategy.filterDistinctLines

case "reference.conf" => MergeStrategy.concat

case _ => MergeStrategy.first

}

然后是对应的具体的统计的代码实现,也非常的简单HiveFromSparkToRedis.scala

import org.apache.spark.{SparkConf, SparkContext}

import org.apache.spark.sql._

import org.apache.spark.sql.hive.HiveContext

import redis.RedisClient

import scala.concurrent.Await

import scala.concurrent.duration._

import scala.concurrent.ExecutionContext.Implicits.global

import org.json4s._

import org.json4s.jackson.JsonMethods._

import org.json4s.JsonDSL._

object HiveFromSparkToRedis {

def main(args: Array[String]) {

val sparkConf = new SparkConf().setAppName("HiveFromSparkToRedis")

val sc = new SparkContext(sparkConf)

val hiveContext = new HiveContext(sc)

import hiveContext.implicits._

import hiveContext.sql

sql("SELECT name,age FROM wyp where age>10").collect().foreach(rs => insertRedis(rs.getString(0), rs.getInt(1)))

sc.stop()

}

def insertRedis(name: String, age: Int) {

implicit val akkaSystem = akka.actor.ActorSystem()

val redis = RedisClient("192.168.163.214", 6379)

val json = ("name" -> name) ~ ("age" -> age.toString)

val futurePong = redis.set("dbsave", compact(render(json)))

Await.result(futurePong, 1 seconds)

akkaSystem.shutdown()

}

}

import org.apache.spark.{SparkConf, SparkContext}

import org.apache.spark.sql._

import org.apache.spark.sql.hive.HiveContext

import redis.RedisClient

import scala.concurrent.Await

import scala.concurrent.duration._

import scala.concurrent.ExecutionContext.Implicits.global

import org.json4s._

import org.json4s.jackson.JsonMethods._

import org.json4s.JsonDSL._

object HiveFromSparkToRedis {

def main(args: Array[String]) {

val sparkConf = new SparkConf().setAppName("HiveFromSparkToRedis")

val sc = new SparkContext(sparkConf)

val hiveContext = new HiveContext(sc)

import hiveContext.implicits._

import hiveContext.sql

sql("SELECT name,age FROM wyp where age>10").collect().foreach(rs => insertRedis(rs.getString(0), rs.getInt(1)))

sc.stop()

}

def insertRedis(name: String, age: Int) {

implicit val akkaSystem = akka.actor.ActorSystem()

val redis = RedisClient("192.168.163.214", 6379)

val json = ("name" -> name) ~ ("age" -> age.toString)

val futurePong = redis.set("dbsave", compact(render(json)))

Await.result(futurePong, 1 seconds)

akkaSystem.shutdown()

}

}

然后是相关的提交到分布式系统的命令

/usr/local/spark-1.5.1-bin-hadoop2.6/bin/spark-submit –class HiveFromSparkToRedis –master spark://namenode:7077 –executor-memory 1g /home/hadoop/test_sparktoredis/target/scala-2.10/SparkSqlToRedis-assembly-1.0.jar

最后附上一个完整的测试代码,包含目录结构的test_sparktoredis.tar

做一个简单的笔记

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值