导入依赖
<!--MySQL依赖-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
<!--Hbane依赖-->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.4</version>
</dependency>
mysql
代码
package com.baizhi.jsy.createRDD
import java.sql.{PreparedStatement, ResultSet}
import java.util.Date
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.mapred.lib.db.DBWritable
import org.apache.hadoop.mapreduce.lib.db.{DBConfiguration, DBInputFormat}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object WorldCountMysql {
def main(args: Array[String]): Unit = {
//创建SparkContext
val conf = new SparkConf()
.setMaster("local[*]")
.setAppName("WorldCountMysql ")
val context = new SparkContext(conf)
val hadoopConfiguration = new Configuration()
DBConfiguration.configureDB(hadoopConfiguration,
"com.mysql.jdbc.Driver",
"jdbc:mysql://Centos:3306/mysql",
"root",
"root"
)
hadoopConfiguration.set(DBConfiguration.INPUT_QUERY,"SELECT id,name,birthday from student")
hadoopConfiguration.set(DBConfiguration.INPUT_COUNT_QUERY,"SELECT count(id) from student")
hadoopConfiguration.set(DBConfiguration.INPUT_CLASS_PROPERTY,"com.baizhi.jsy.createRDD.Student")
val studentRDD:RDD[(LongWritable,Student)] = context.newAPIHadoopRDD[LongWritable, Student, DBInputFormat[Student]](
hadoopConfiguration,
classOf[DBInputFormat[Student]],
classOf[LongWritable],
classOf[Student]
)
studentRDD.map(t=>(t._2.id,t._2.name,t._2.birthday)).collect().foreach(println)
//关闭
context.stop()
}
}
class Student extends DBWritable() {
var id:String=_
var name:String=_
var birthday:Date=_
override def write(preparedStatement: PreparedStatement): Unit = {}
override def readFields(resultSet: ResultSet): Unit = {
id=resultSet.getString("id")
name=resultSet.getString("name")
birthday=resultSet.getDate("birthday")
}
}
mysql表
读取结果
RDD的打印语句
studentRDD.map(t=>(t._2)).collect().foreach(println)
出现如下错误 没有序列化
实现序列化
class Student extends DBWritable() with Serializable {
var id:String=_
var name:String=_
var birthday:Date=_
override def write(preparedStatement: PreparedStatement): Unit = {}
override def readFields(resultSet: ResultSet): Unit = {
id=resultSet.getString("id")
name=resultSet.getString("name")
birthday=resultSet.getDate("birthday")
}
结果
Hbase数据读取
package com.baizhi.jsy.createRDD
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.hbase.HConstants
import org.apache.hadoop.hbase.client.{Result, Scan}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.{TableInputFormat, TableMapReduceUtil}
import org.apache.hadoop.hbase.protobuf.ProtobufUtil
import org.apache.hadoop.hbase.util.{Base64, Bytes}
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object WorldCountHbase {
def main(args: Array[String]): Unit = {
//创建SparkContext
val conf = new SparkConf()
.setMaster("local[*]")
.setAppName("WorldCount")
val context = new SparkContext(conf)
val hadoopConfiguration = new Configuration()
//Hbase的连接参数
hadoopConfiguration.set(HConstants.ZOOKEEPER_QUORUM,"Centos")
hadoopConfiguration.set(TableInputFormat.INPUT_TABLE,"WorldCountHbase:t_user")
val scan = new Scan()
val pro = ProtobufUtil.toScan(scan)
val str = Base64.encodeBytes(pro.toByteArray)
hadoopConfiguration.set(TableInputFormat.SCAN,str)
val hbaseRDD:RDD[(ImmutableBytesWritable,Result)] = context.newAPIHadoopRDD(
hadoopConfiguration,
classOf[TableInputFormat],
classOf[ImmutableBytesWritable],
classOf[Result]
)
hbaseRDD.map(t=> {
val rowkey = Bytes.toString(t._1.get())
val name = Bytes.toString(t._2.getValue("cf1".getBytes(),"name".getBytes()))
(rowkey,name)
}).foreach(println)
//关闭
context.stop()
}
}
运行出错
解决办法 引入包
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.9.2</version>
</dependency>