Spark: ------ 从mysql、hbase 读取数据,

导入依赖

		<!--MySQL依赖-->
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.38</version>
        </dependency>
        <!--Hbane依赖-->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.4</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.2.4</version>
        </dependency>

mysql

代码

package com.baizhi.jsy.createRDD
import java.sql.{PreparedStatement, ResultSet}
import java.util.Date
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.mapred.lib.db.DBWritable
import org.apache.hadoop.mapreduce.lib.db.{DBConfiguration, DBInputFormat}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object WorldCountMysql {
  def main(args: Array[String]): Unit = {
    //创建SparkContext
    val conf = new SparkConf()
      .setMaster("local[*]")
      .setAppName("WorldCountMysql ")
    val context = new SparkContext(conf)

    val hadoopConfiguration = new Configuration()

    DBConfiguration.configureDB(hadoopConfiguration,
      "com.mysql.jdbc.Driver",
    "jdbc:mysql://Centos:3306/mysql",
      "root",
      "root"
    )
    hadoopConfiguration.set(DBConfiguration.INPUT_QUERY,"SELECT id,name,birthday from student")
    hadoopConfiguration.set(DBConfiguration.INPUT_COUNT_QUERY,"SELECT count(id) from student")
    hadoopConfiguration.set(DBConfiguration.INPUT_CLASS_PROPERTY,"com.baizhi.jsy.createRDD.Student")
    val studentRDD:RDD[(LongWritable,Student)] = context.newAPIHadoopRDD[LongWritable, Student, DBInputFormat[Student]](
      hadoopConfiguration,
      classOf[DBInputFormat[Student]],
      classOf[LongWritable],
      classOf[Student]
    )
    studentRDD.map(t=>(t._2.id,t._2.name,t._2.birthday)).collect().foreach(println)

    //关闭
    context.stop()
  }
}
class Student extends DBWritable() {
  var id:String=_
  var name:String=_
  var birthday:Date=_

  override def write(preparedStatement: PreparedStatement): Unit = {}

  override def readFields(resultSet: ResultSet): Unit = {
      id=resultSet.getString("id")
      name=resultSet.getString("name")
      birthday=resultSet.getDate("birthday")
  }
}

mysql表

在这里插入图片描述

读取结果

在这里插入图片描述

RDD的打印语句

      studentRDD.map(t=>(t._2)).collect().foreach(println)

出现如下错误 没有序列化

在这里插入图片描述

实现序列化

class Student extends DBWritable() with Serializable {
  var id:String=_
  var name:String=_
  var birthday:Date=_

  override def write(preparedStatement: PreparedStatement): Unit = {}

  override def readFields(resultSet: ResultSet): Unit = {
      id=resultSet.getString("id")
      name=resultSet.getString("name")
      birthday=resultSet.getDate("birthday")
  }

结果

在这里插入图片描述

Hbase数据读取

package com.baizhi.jsy.createRDD
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.HConstants
import org.apache.hadoop.hbase.client.{Result, Scan}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.{TableInputFormat, TableMapReduceUtil}
import org.apache.hadoop.hbase.protobuf.ProtobufUtil
import org.apache.hadoop.hbase.util.{Base64, Bytes}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object WorldCountHbase {
  def main(args: Array[String]): Unit = {
    //创建SparkContext
    val conf = new SparkConf()
      .setMaster("local[*]")
      .setAppName("WorldCount")
    val context = new SparkContext(conf)

    val hadoopConfiguration = new Configuration()
    //Hbase的连接参数
    hadoopConfiguration.set(HConstants.ZOOKEEPER_QUORUM,"Centos")

    hadoopConfiguration.set(TableInputFormat.INPUT_TABLE,"WorldCountHbase:t_user")
    val scan = new Scan()
    val pro = ProtobufUtil.toScan(scan)
    val str = Base64.encodeBytes(pro.toByteArray)
    hadoopConfiguration.set(TableInputFormat.SCAN,str)

    val hbaseRDD:RDD[(ImmutableBytesWritable,Result)] = context.newAPIHadoopRDD(
      hadoopConfiguration,
      classOf[TableInputFormat],
      classOf[ImmutableBytesWritable],
      classOf[Result]
    )

   hbaseRDD.map(t=> {
        val rowkey = Bytes.toString(t._1.get())
        val name = Bytes.toString(t._2.getValue("cf1".getBytes(),"name".getBytes()))
      (rowkey,name)
      }).foreach(println)

    //关闭
    context.stop()
  }
}

运行出错
在这里插入图片描述

解决办法 引入包

		<dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-auth</artifactId>
            <version>2.9.2</version>
        </dependency>
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值