kafka+spark+phoenix 数据传递

1.在IDEA新建一个maven项目:
pom.xml

<properties>
    <scala.version>2.11.8</scala.version>
    <spark.version>2.3</spark.version>
    <spark.artifact>2.11</spark.artifact>
    <dependency.scope>compile</dependency.scope>
</properties>

<repositories>
    <repository>
        <id>scala-tools.org</id>
        <name>Scala-Tools Maven2 Repository</name>
        <url>http://scala-tools.org/repo-releases</url>
    </repository>
</repositories>

<pluginRepositories>
    <pluginRepository>
        <id>scala-tools.org</id>
        <name>Scala-Tools Maven2 Repository</name>
        <url>http://scala-tools.org/repo-releases</url>
    </pluginRepository>
</pluginRepositories>

<dependencies>
    <dependency>
        <groupId>log4j</groupId>
        <artifactId>log4j</artifactId>
        <version>1.2.17</version>
    </dependency>
    <dependency>
        <groupId>org.scala-lang</groupId>
        <artifactId>scala-library</artifactId>
        <version>${scala.version}</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.11</artifactId>
        <version>2.3.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-sql_2.11</artifactId>
        <version>2.3.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-hive -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-hive_2.11</artifactId>
        <version>2.3.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-mllib -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-mllib_2.11</artifactId>
        <version>2.3.0</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming-kafka-0-8 -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
        <version>2.3.0</version>
    </dependency>

    <dependency>
        <groupId>org.apache.kafka</groupId>
        <artifactId>kafka-clients</artifactId>
        <version>0.9.0.1</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-streaming-kafka-0-10 -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
        <version>2.3.0</version>
    </dependency>

    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
        <version>4.4</version>
        <scope>test</scope>
    </dependency>
    <dependency>
        <groupId>org.specs</groupId>
        <artifactId>specs</artifactId>
        <version>1.2.5</version>
        <scope>test</scope>
    </dependency>
    <dependency>
        <groupId>io.spray</groupId>
        <artifactId>spray-json_2.10</artifactId>
        <version>1.3.2</version>
    </dependency>
</dependencies>

<build>
    <sourceDirectory>target/main/scala</sourceDirectory>
    <testSourceDirectory>src/test/scala</testSourceDirectory>
    <plugins>
        <plugin>
            <groupId>org.scala-tools</groupId>
            <artifactId>maven-scala-plugin</artifactId>
            <executions>
                <execution>
                    <goals>
                        <goal>compile</goal>
                        <goal>testCompile</goal>
                    </goals>
                </execution>
            </executions>
            <configuration>
                <scalaVersion>2.11.8</scalaVersion>
                <args>
                    <arg>-target:jvm-1.5</arg>
                </args>
            </configuration>
        </plugin>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-eclipse-plugin</artifactId>
            <configuration>
                <downloadSources>true</downloadSources>
                <buildcommands>
                    <buildcommand>ch.epfl.lamp.sdt.core.scalabuilder</buildcommand>
                </buildcommands>
                <additionalProjectnatures>
                    <projectnature>ch.epfl.lamp.sdt.core.scalanature</projectnature>
                </additionalProjectnatures>
                <classpathContainers>
                    <classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
                    <classpathContainer>ch.epfl.lamp.sdt.launching.SCALA_CONTAINER</classpathContainer>
                </classpathContainers>
            </configuration>
        </plugin>
    </plugins>
</build>
<reporting>
    <plugins>
        <plugin>
            <groupId>org.scala-tools</groupId>
            <artifactId>maven-scala-plugin</artifactId>
            <configuration>
                <scalaVersion>${scala.version}</scalaVersion>
            </configuration>
        </plugin>
    </plugins>
</reporting>

2.新建一个主类

 import kafka.serializer.StringDecoder
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.streaming.{Duration, Seconds, StreamingContext}
 import org.apache.spark.streaming.kafka._

object SparkKafkascamer {

 def main(args: Array[String]): Unit = {
   //设置为本地模式
    val sparkConf = new  	SparkConf().setMaster("local[5]").setAppName("SparkStreamingKafka_direct").set("spark.streaming.backpressure.enabled","true")
     //val sparkContext = new SparkContext(sparkConf)
    sparkConf.set("spark.streaming.backpressure.enabled","true")

//运行时间间隔为 5s
val ssc = new StreamingContext(sparkConf,Seconds(5))
val topics = Set("sptest")

//接受 node1 的 kafka 传过来的数据,指定 groupid
val kafkaParams = Map("bootstrap.servers"->"node1.etonedu.cn:6667","group.id"->"sptest")
//调用 createDirectStream 方法
val messages = KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder](ssc,kafkaParams,topics)


val lines = messages.map(_._2)


//调用指定的  SparkConnectionScalaNew.runmain 方法
lines.foreachRDD(
  rdd => {
    val data = rdd.collect
    data.foreach(record => {
      SparkConnectionScalaNew.runmain(record,ssc)
    })
  }

)

ssc.start()  // 真正启动程序
ssc.awaitTermination()  //阻塞等待

}

}

3.新建一个类:SparkConnectionScalaNew

import java.util.UUID

import org.apache.spark.streaming.StreamingContext

object SparkConnectionScalaNew {
  def runmain(keyValues:String,ssc:StreamingContext): Unit ={
val uuid = UUID.randomUUID().toString
SparkKafkaMessNew.messMain(keyValues,ssc,uuid)
  }
}

4.新建一个类:SparkKafkaMessNew

import java.util.Date
import java.text.SimpleDateFormat

import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.StreamingContext

import scala.util.parsing.json.JSON
import org.apache.phoenix.spark._

object SparkKafkaMessNew {

def regJson(json:Option[Any]) = json match {
case Some(map: Map[String,Any]) => map
 }

//取出 json 格式的数据,用函数进行处理,mess = {"UID":"2019","FAMILY":"new","NAME":"2019-03-15 15:05:19","VALUE1":"12421312421"}
//                                      first = Map(UID -> 2019, FAMILY -> new, NAME -> 2019-03-15 15:05:19, VALUE1 -> 	12421312421)
 def messMain(mess: String, ssc: StreamingContext, uuid: String): Unit = {
    val jsonS = JSON.parseFull(mess)
    val first = regJson(jsonS)


println(mess)
println(first)


  //把函数解析出来的数据 把列名提取出来 相对应的数据进行修改,写为需要的格式,赋予对象,列名大小写要和 json 格式数据中的列名保持一致
  //  Some(new) => new
val res = first.get("NAME").toString
val ress = first.get("FAMILY").toString
val name = res.replace("Some(","").replace(")","").trim
val family = ress.replace("Some(","").replace(")","").trim
   //  val family = res.split(",",1)
  // val json = JSON.parse(res)
    println(ress)



val date = new Date()
val df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
val dateRe = df.format(date)

val sc = ssc.sparkContext
val sqlContext = new SQLContext(sc)

//输入需要写入的对象,相对应的是表的列名
val dataSet = List((uuid,family,name,dateRe))
//sc.parallelize(dataSet).saveToPhoenix("ddd",Seq())
sc.parallelize(dataSet).saveToPhoenix("LOG_ANA.LOGFILE_TEST",
  Seq("UID","FAMILY","NAME","VALUE1"),
  zkUrl = Some("node1.etonedu.cn:2181")
)
  }

}

参考文档:https://www.cnblogs.com/zzmmyy/p/10338261.html

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值