Spark读写数据demo

14 篇文章 8 订阅

读HBase数据

import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark._


object HbaseSparkRead {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setAppName("HBaseTest").setMaster("local")
    val sc = new SparkContext(sparkConf)

    //hbase information
    val conf = HBaseConfiguration.create()
    conf.set("hbase.zookeeper.quorum", "ht05")
    conf.set("hbase.zookeeper.property.clientPort", "2181")
    conf.set(TableInputFormat.INPUT_TABLE, "spark_hbase")


    //Read the data and convert it into rdd
    val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
      classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
      classOf[org.apache.hadoop.hbase.client.Result])

    //val count = hBaseRDD.count()
    //println(count)
    hBaseRDD.foreach { case (_, result) => {
      //Get the row key
      val key = Bytes.toString(result.getRow)
      //Get the column by column family and column name
      val name = Bytes.toString(result.getValue("cf".getBytes, "name".getBytes))
      println("Row key:" + key + " Name:" + name)
    }
    }

    //Save hbase data to txt
    hBaseRDD.map(x => Bytes.toString(x._2.getRow)).saveAsTextFile("hdfs://ht05:9000/test1")
  }
}

写HBase数据

import org.apache.hadoop.hbase.client.{Put, Result}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapreduce.Job
import org.apache.spark._

object HbaseSparkWrite {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setAppName("HBaseTest").setMaster("local")
    val sc = new SparkContext(sparkConf)

    //Write from txt to hbase
    val dataRdd = sc.textFile("hdfs://ht05:9000//zhaow/hotle0.txt")

    //hbase information
    sc.hadoopConfiguration.set("hbase.zookeeper.quorum", "ht05")
    sc.hadoopConfiguration.set("hbase.zookeeper.property.clientPort", "2181")
    sc.hadoopConfiguration.set(TableOutputFormat.OUTPUT_TABLE, "spark_test0")

    //lazy, lazy loading, if the program runs in spark-shell, lazy loading must be used, because every part of spark-shell will print the result
    lazy val job = new Job(sc.hadoopConfiguration)
    job.setOutputFormatClass(classOf[TableOutputFormat[ImmutableBytesWritable]])
    job.setOutputKeyClass(classOf[ImmutableBytesWritable])
    job.setOutputValueClass(classOf[Result])

    val rdd = dataRdd.filter(_.length > 0).map { line => {
      val rowkey: String = line
      val put = new Put(Bytes.toBytes(rowkey))
      put.add(Bytes.toBytes("cf"), Bytes.toBytes("name"), Bytes.toBytes(rowkey))
      (new ImmutableBytesWritable, put)
    }
    }
    rdd.saveAsNewAPIHadoopDataset(job.getConfiguration)
  }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值