实时保存数据到Mysql(基于Receiver的方式)

集群分配如下:

192.168.58.11        spark01
192.168.58.12        spark02
192.168.58.13        spark03
spark版本:spark-2.1.0-bin-hadoop2.7
kafka版本:kafka_2.11-2.0.0

Spark Streaming程序

package com.kk.sparkstreaming.kafka

import org.apache.spark.SparkConf
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.log4j.Level
import org.apache.log4j.Logger
import java.sql.DriverManager
import java.sql.PreparedStatement
import java.sql.Connection

object KafkaReceiver {
   def main(args: Array[String]): Unit = {

     // 减少日志输出
     Logger.getLogger("org.apache.spark").setLevel(Level.ERROR)
     Logger.getLogger("org.eclipse.jetty.server").setLevel(Level.OFF)

     val sparkConf = new SparkConf().setAppName("KafkaReceivers").setMaster("local[2]")

     val sparkStreaming = new StreamingContext(sparkConf,Seconds(3))

     // 创建topic名称,1表示一次从这个topic中获取一条记录
     val topics = Map("kevin" -> 1)

     // 创建Kafka的输入流,制定ZooKeeper地址
     val kafkaStream = KafkaUtils.createStream(sparkStreaming, "192.168.58.11:2181", "mygroup", topics)

     // 处理每次接受的数据
     val lineDStream = kafkaStream.map(e => {
        new String(e.toString())
     })

     // kafka接收的数据是<key,value>形式,key为null,取出value
     val logRDD = kafkaStream.map(_._2)

     lineDStream.print()

     // 对接受的数据进行分词处理
     val datas = logRDD.map(line => {
        // 1,201.105.101.108,http://mystore.jsp/?productid=1,2017020029,2,1 
        val index: Array[String] = line.split(",")
        val ip = index(1);
        (ip, 1)
     })

    // 打印在屏幕
    datas.print()

    // 将数据保存在mysql数据库
    datas.foreachRDD(cs => {
      var conn: Connection = null;
      var ps: PreparedStatement = null;
      try {
        Class.forName("com.mysql.jdbc.Driver").newInstance();
        cs.foreachPartition(f => {
          conn = DriverManager.getConnection("jdbc:mysql://192.168.58.14:3306/storm?useUnicode=true&characterEncoding=utf8", "root", "kevin");
          ps = conn.prepareStatement("insert into result values(?,?)");
          f.foreach(s => {
            ps.setString(1, s._1);
            ps.setInt(2, s._2);
            ps.executeUpdate();
          })
        })
      } catch {
        case t: Throwable => t.printStackTrace() // TODO: handle error
      } finally {
        if (ps != null) {
          ps.close()
        }
        if (conn != null) {
          conn.close();
        }
      }
    })

    sparkStreaming.start()
    sparkStreaming.awaitTermination()

   }
}

pom.xml文件

<properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <spark.version>2.2.1</spark.version>
        <scala.version>2.11.1</scala.version>
</properties>

<dependencies>

        <dependency>
                <groupId>org.scala-lang</groupId>
                <artifactId>scala-library</artifactId>
                <version>${scala.version}</version>
        </dependency>
        
        <dependency>
                <groupId>org.scala-lang</groupId>
                <artifactId>scala-compiler</artifactId>
                <version>${scala.version}</version>
        </dependency>
        
        <dependency>
                <groupId>org.scala-lang</groupId>
                <artifactId>scala-reflect</artifactId>
                <version>${scala.version}</version>
        </dependency>
        
        <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-core_2.11</artifactId>
                <version>${spark.version}</version>
        </dependency>
        
        <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-streaming_2.11</artifactId>
                <version>${spark.version}</version>
        </dependency>
        
        <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-sql_2.11</artifactId>
                <version>${spark.version}</version>
        </dependency>
        
        <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
                <version>2.1.1</version>
        </dependency>

        <dependency>
                <groupId>mysql</groupId>
                <artifactId>mysql-connector-java</artifactId>
                <version>5.1.8</version>
        </dependency>

</dependencies>

测试
1.启动kafka集群,创建消息的生产者
2.将程序提交至集群运行
集群运行出现异常,jar包冲突

Exception in thread "main" java.lang.ClassCastException: kafka.cluster.BrokerEndPoint cannot be cast to kafka.cluster.Broker
        at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2$$anonfun$3$$anonfun$apply$6$$anonfun$apply$7.apply(KafkaCluster.scala:90)
        at scala.Option.map(Option.scala:145)
        at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2$$anonfun$3$$anonfun$apply$6.apply(KafkaCluster.scala:90)
        at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2$$anonfun$3$$anonfun$apply$6.apply(KafkaCluster.scala:87)
        at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
        at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
        at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
        at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:34)
        at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:251)
        at scala.collection.AbstractTraversable.flatMap(Traversable.scala:105)
        at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2$$anonfun$3.apply(KafkaCluster.scala:87)
        at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2$$anonfun$3.apply(KafkaCluster.scala:86)
        at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
        at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:251)
        at scala.collection.immutable.Set$Set1.foreach(Set.scala:74)
        at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:251)
        at scala.collection.AbstractTraversable.flatMap(Traversable.scala:105)
        at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2.apply(KafkaCluster.scala:86)
        at org.apache.spark.streaming.kafka.KafkaCluster$$anonfun$2.apply(KafkaCluster.scala:85)
        at scala.util.Either$RightProjection.flatMap(Either.scala:523)

解决方法
需将kafak的jar包替换成kafka_2.11-0.8.2.1.jar,复制在saprk每个节点的jars目录下
--------------------- 
作者:m0_37723298 
来源:CSDN 
原文:https://blog.csdn.net/m0_37723298/article/details/84750555 
版权声明:本文为博主原创文章,转载请附上博文链接!

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值