基于Hbase的Spark Sql示例 一

spark sql读取hbase数据简单实例:
package enn.cn.dataimport


/**
  * Created by Administrator 
  */

import java.io.Serializable
import java.util.logging.Logger

import enn.cn.util.{CommonContent, KerberosUtil}
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.spark.{SparkConf, SparkContext}

object MySparkSql extends Serializable {

  case class Score(name: String,metric: String, value: String, timestamp: String, deviceTypeId: String, deviceNo: String, deviceId: String, gatewayId: String)

  val logger = Logger.getLogger(MySparkSql.getClass.getName)

  def main(args: Array[String]) {

    /*val jars: Array[String] = Array("D:\\workspace\\mysparksql_2.10-1.0.jar")
    System.setProperty("hadoop.home.dir", "E:\\Program Files\\hadoop-2.7.0") //win7环境下运行须加*/
    KerberosUtil.loginUserFromKeytab(CommonContent.KERBEROS_NAME, CommonContent.KERBEROS_KEYTAB)
    val sconf = new SparkConf()
      .setMaster("local")
      //      .setMaster("spark://h230:7077")//在集群测试下设置,h230是我的hostname,须自行修改
      .setAppName("MySparkSql") //win7环境下设置
      .set("spark.executor.memory", "1g")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    //      .setJars(jars)//在集群测试下,设置应用所依赖的jar包
    val sc = new SparkContext(sconf)

    val conf = HBaseConfiguration.create()
   /* conf.set("hbase.zookeeper.property.clientPort", "2181")
    conf.set("hbase.zookeeper.quorum", "h230")
    conf.set("hbase.master", "h230:60000")*/
    conf.addResource("hbase-site.xml")//替代以上三条配置信息
    conf.set(TableInputFormat.INPUT_TABLE, "test:iot_electric_sale")

    //    Scan操作
    val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
      classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
      classOf[org.apache.hadoop.hbase.client.Result])


    val score = hBaseRDD.map(m => m._2.listCells()).map(c =>
      Score(new String(c.get(0).getRow()),
        new String(c.get(0).getValue),
        new String(c.get(1).getValue),
        new String(c.get(2).getValue),
        new String(c.get(3).getValue),
        new String(c.get(4).getValue),
        new String(c.get(5).getValue),
        new String(c.get(6).getValue))
    )
    score.foreach(println)
    val sqlContext = new org.apache.spark.sql.SQLContext(sc)
    val scoreSchema = sqlContext.createDataFrame(score)
    scoreSchema.registerTempTable("test:iot_electric_sale")
    var result = sqlContext.sql("SELECT * FROM test:iot_electric_sale")
    result.collect().foreach(println)

  }
}
package enn.cn.dataimport;

/**
 * Created by Administrator 
 */
public class Score {
    public String name;

    public String metric;

    public String value;

    public String timestamp;

    public String deviceTypeId;

    public String deviceNo;

    public String deviceId;

    public String gatewayId;
    public Score(String name, String metric, String value, String timestamp, String deviceTypeId, String deviceNo, String deviceId, String gatewayId){
        this.name=name;
        this.metric=metric;
        this.value=value;
        this.timestamp=timestamp;
        this.deviceTypeId=deviceTypeId;
        this.deviceNo=deviceNo;
        this.deviceId=deviceId;
        this.gatewayId=gatewayId;
    }
    public String getMetric() {
        return metric;
    }

    public void setMetric(String metric) {
        this.metric = metric;
    }

    public String getValue() {
        return value;
    }

    public void setValue(String value) {
        this.value = value;
    }

    public String getTimestamp() {
        return timestamp;
    }

    public void setTimestamp(String timestamp) {
        this.timestamp = timestamp;
    }

    public String getDeviceTypeId() {
        return deviceTypeId;
    }

    public void setDeviceTypeId(String deviceTypeId) {
        this.deviceTypeId = deviceTypeId;
    }

    public String getDeviceNo() {
        return deviceNo;
    }

    public void setDeviceNo(String deviceNo) {
        this.deviceNo = deviceNo;
    }

    public String getDeviceId() {
        return deviceId;
    }

    public void setDeviceId(String deviceId) {
        this.deviceId = deviceId;
    }

    public String getGatewayId() {
        return gatewayId;
    }

    public void setGatewayId(String gatewayId) {
        this.gatewayId = gatewayId;
    }
}
依赖包,摘出hbase, spark相关的包即可:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
   <modelVersion>4.0.0</modelVersion>

   <groupId>enn.cn</groupId>
   <artifactId>dataimport</artifactId>
   <version>0.0.1-SNAPSHOT</version>
   <packaging>jar</packaging>

   <name>dataimport</name>
   <url>http://maven.apache.org</url>

   <properties>
      <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
      <elasticsearch.version>2.1.2</elasticsearch.version>
   </properties>

   <dependencies>
      <!-- https://mvnrepository.com/artifact/com.metamx/scala-util_2.10 -->
      <dependency>
         <groupId>com.metamx</groupId>
         <artifactId>scala-util_2.10</artifactId>
         <version>1.11.6</version>
      </dependency>

      <dependency>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-core_2.10</artifactId>
         <version>1.6.0-cdh5.10.0</version>
         <exclusions>
            <exclusion>
               <artifactId>jackson-annotations</artifactId>
               <groupId>com.fasterxml.jackson.core</groupId>
            </exclusion>
            <exclusion>
               <artifactId>jackson-databind</artifactId>
               <groupId>com.fasterxml.jackson.core</groupId>
            </exclusion>
         </exclusions>
      </dependency>
      <dependency>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-streaming-kafka_2.10</artifactId>
         <version>1.6.0-cdh5.10.0</version>
      </dependency>
      <dependency>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-streaming_2.10</artifactId>
         <version>1.6.0-cdh5.10.0</version>
      </dependency>
      <dependency>
         <groupId>unknown</groupId>
         <artifactId>PspaceClientSDK</artifactId>
         <version>1.4</version>
         <scope>system</scope>
         <systemPath>${basedir}/libs/PspaceClientSDK-1.4.jar</systemPath>
      </dependency>
      <dependency>
         <groupId>com.likong</groupId>
         <artifactId>pspace1.0</artifactId>
         <version>1.0.0</version>
      </dependency>
      <dependency>
         <groupId>org.apache.kafka</groupId>
         <artifactId>kafka_2.10</artifactId>
         <version>0.10.0.0</version>
      </dependency>

      <dependency>
         <groupId>org.apache.hbase</groupId>
         <artifactId>hbase-checkstyle</artifactId>
         <version>1.2.0-cdh5.10.0</version>
      </dependency>
      <dependency>
         <groupId>org.apache.hbase</groupId>
         <artifactId>hbase-client</artifactId>
         <version>1.2.0-cdh5.10.0</version>
      </dependency>
      <dependency>
         <groupId>org.apache.hbase</groupId>
         <artifactId>hbase-common</artifactId>
         <version>1.2.0-cdh5.10.0</version>
      </dependency>
      <dependency>
         <groupId>org.apache.hbase</groupId>
         <artifactId>hbase-hadoop2-compat</artifactId>
         <version>1.2.0-cdh5.10.0</version>
      </dependency>
      <dependency>
         <groupId>org.apache.hbase</groupId>
         <artifactId>hbase-protocol</artifactId>
         <version>1.2.0-cdh5.10.0</version>
      </dependency>
      <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-hdfs</artifactId>
         <version>2.6.0-cdh5.10.0</version>
         <exclusions>
            <exclusion>
               <groupId>tomcat</groupId>
               <artifactId>jasper-runtime</artifactId>
            </exclusion>
            <exclusion>
               <groupId>tomcat</groupId>
               <artifactId>jasper-compiler</artifactId>
            </exclusion>
         </exclusions>
      </dependency>

      <dependency>
         <groupId>commons-dbcp</groupId>
         <artifactId>commons-dbcp</artifactId>
         <version>1.4</version>
      </dependency>
      <dependency>
         <groupId>junit</groupId>
         <artifactId>junit</artifactId>
         <version>3.8.1</version>
         <scope>test</scope>
      </dependency>
      <!-- https://mvnrepository.com/artifact/javax.servlet/javax.servlet-api -->
      <dependency>
         <groupId>javax.servlet</groupId>
         <artifactId>javax.servlet-api</artifactId>
         <version>3.1.0</version>
      </dependency>

      <dependency>
         <groupId>org.springframework.data</groupId>
         <artifactId>spring-data-jpa</artifactId>
         <version>1.5.0.RELEASE</version>
      </dependency>
      <dependency>
         <groupId>org.springframework</groupId>
         <artifactId>spring-tx</artifactId>
         <version>3.2.3.RELEASE</version>
      </dependency>
      <dependency>
         <groupId>org.hibernate</groupId>
         <artifactId>hibernate-entitymanager</artifactId>
         <version>4.2.20.Final</version>
      </dependency>
      <dependency>
         <groupId>c3p0</groupId>
         <artifactId>c3p0</artifactId>
         <version>0.9.1.2</version>
      </dependency>
      <dependency>
         <groupId>org.hibernate</groupId>
         <artifactId>hibernate-c3p0</artifactId>
         <version>3.5.6-Final</version>
      </dependency>

      <dependency>
         <groupId>org.springframework</groupId>
         <artifactId>spring-webmvc</artifactId>
         <version>3.2.3.RELEASE</version>
      </dependency>
      <dependency>
         <groupId>org.springframework</groupId>
         <artifactId>spring-webmvc-portlet</artifactId>
         <version>3.2.3.RELEASE</version>
      </dependency>
      <dependency>
         <groupId>org.springframework</groupId>
         <artifactId>spring-expression</artifactId>
         <version>3.2.3.RELEASE</version>
      </dependency>
      <dependency>
         <groupId>org.springframework</groupId>
         <artifactId>spring-aspects</artifactId>
         <version>3.2.3.RELEASE</version>
      </dependency>
      <dependency>
         <groupId>org.springframework</groupId>
         <artifactId>spring-oxm</artifactId>
         <version>3.2.3.RELEASE</version>
      </dependency>
      <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-lang3</artifactId>
         <version>3.1</version>
      </dependency>
      <dependency>
         <groupId>org.apache.shiro</groupId>
         <artifactId>shiro-all</artifactId>
         <version>1.2.4</version>
      </dependency>
      <dependency>
         <groupId>commons-codec</groupId>
         <artifactId>commons-codec</artifactId>
         <version>1.10</version>
      </dependency>
      <dependency>
         <groupId>mysql</groupId>
         <artifactId>mysql-connector-java</artifactId>
         <version>5.1.25</version>
      </dependency>
      <dependency>
         <groupId>com.google.guava</groupId>
         <artifactId>guava-base</artifactId>
         <version>r03</version>
      </dependency>
      <dependency>
         <groupId>com.google.guava</groupId>
         <artifactId>guava-collections</artifactId>
         <version>r03</version>
      </dependency>
      <dependency>
         <groupId>org.hibernate</groupId>
         <artifactId>hibernate-ehcache</artifactId>
         <version>4.2.20.Final</version>
      </dependency>
      <!-- http://mvnrepository.com/artifact/net.sf.ehcache/ehcache-core -->
      <dependency>
         <groupId>net.sf.ehcache</groupId>
         <artifactId>ehcache-core</artifactId>
         <version>2.6.11</version>
      </dependency>

      <dependency>
         <groupId>com.fasterxml.jackson.core</groupId>
         <artifactId>jackson-annotations</artifactId>
         <version>2.7.5</version>
      </dependency>

      <dependency>
         <groupId>cglib</groupId>
         <artifactId>cglib</artifactId>
         <version>3.2.2</version>
      </dependency>
      <dependency>
         <groupId>org.codehaus.jackson</groupId>
         <artifactId>jackson-mapper-asl</artifactId>
         <version>1.8.6</version>
      </dependency>
      <dependency>
         <groupId>commons-fileupload</groupId>
         <artifactId>commons-fileupload</artifactId>
         <version>1.3</version>
      </dependency>
      <dependency>
         <groupId>org.slf4j</groupId>
         <artifactId>slf4j-log4j12</artifactId>
         <version>1.7.6</version>
      </dependency>
      <dependency>
         <groupId>javax.servlet</groupId>
         <artifactId>jstl</artifactId>
         <version>1.2</version>
      </dependency>
      <dependency>
         <groupId>com.alibaba</groupId>
         <artifactId>fastjson</artifactId>
         <version>1.2.10</version>
      </dependency>
      <dependency>
         <groupId>javax.mail</groupId>
         <artifactId>mail</artifactId>
         <version>1.4.7</version>
      </dependency>
      <dependency>
         <groupId>org.opensymphony.quartz</groupId>
         <artifactId>quartz-all</artifactId>
         <version>1.6.1</version>
      </dependency>
      <dependency>
         <groupId>commons-collections</groupId>
         <artifactId>commons-collections</artifactId>
         <version>3.2.1</version>
      </dependency>
      <dependency>
         <groupId>com.belerweb</groupId>
         <artifactId>pinyin4j</artifactId>
         <version>2.5.0</version>
      </dependency>
      <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-pool2</artifactId>
         <version>2.2</version>
      </dependency>
      <dependency>
         <groupId>commons-pool</groupId>
         <artifactId>commons-pool</artifactId>
         <version>1.6</version>
      </dependency>
      <dependency>
         <groupId>org.ow2.asm</groupId>
         <artifactId>asm-util</artifactId>
         <version>4.0</version>
      </dependency>
      <dependency>
         <groupId>commons-net</groupId>
         <artifactId>commons-net</artifactId>
         <version>3.3</version>
      </dependency>
      <dependency>
         <groupId>commons-lang</groupId>
         <artifactId>commons-lang</artifactId>
         <version>2.4</version>
      </dependency>
      <!-- http://mvnrepository.com/artifact/commons-beanutils/commons-beanutils -->
      <dependency>
         <groupId>commons-beanutils</groupId>
         <artifactId>commons-beanutils</artifactId>
         <version>1.9.2</version>
      </dependency>
      <!-- http://mvnrepository.com/artifact/commons-beanutils/commons-beanutils-core -->
      <dependency>
         <groupId>commons-beanutils</groupId>
         <artifactId>commons-beanutils-core</artifactId>
         <version>1.8.3</version>
      </dependency>
      <!-- http://mvnrepository.com/artifact/commons-beanutils/commons-beanutils-bean-collections -->
      <dependency>
         <groupId>commons-beanutils</groupId>
         <artifactId>commons-beanutils-bean-collections</artifactId>
         <version>1.8.3</version>
      </dependency>
      <!-- http://mvnrepository.com/artifact/redis.clients/jedis -->
      <dependency>
         <groupId>redis.clients</groupId>
         <artifactId>jedis</artifactId>
         <version>2.7.0</version>
      </dependency>
      <!-- http://mvnrepository.com/artifact/net.heartsavior/jedis <dependency>
         <groupId>net.heartsavior</groupId> <artifactId>jedis</artifactId> <version>2.2.1.1</version>
         </dependency> -->
      <!-- shiro 分布式session及cache管理 -->
      <!-- https://mvnrepository.com/artifact/org.bouncycastle/bcprov-jdk16 -->
      <dependency>
         <groupId>org.bouncycastle</groupId>
         <artifactId>bcprov-jdk16</artifactId>
         <version>1.45</version>
      </dependency>

      <dependency>
         <groupId>org.crazycake</groupId>
         <artifactId>shiro-redis</artifactId>
         <version>2.4.2.1-RELEASE</version>
      </dependency>
      <!-- https://mvnrepository.com/artifact/net.sf.json-lib/json-lib-ext-spring -->
      <dependency>
         <groupId>net.sf.json-lib</groupId>
         <artifactId>json-lib-ext-spring</artifactId>
         <version>1.0.2</version>
      </dependency>
      <!-- https://mvnrepository.com/artifact/commons-httpclient/commons-httpclient -->
      <dependency>
         <groupId>commons-httpclient</groupId>
         <artifactId>commons-httpclient</artifactId>
         <version>3.0</version>
      </dependency>
      <!-- https://mvnrepository.com/artifact/com.thoughtworks.xstream/xstream -->
      <dependency>
         <groupId>com.thoughtworks.xstream</groupId>
         <artifactId>xstream</artifactId>
         <version>1.4.8</version>
      </dependency>
      <dependency>
         <groupId>jaxen</groupId>
         <artifactId>jaxen</artifactId>
         <version>1.1.6</version>
      </dependency>
      <dependency>
         <groupId>dom4j</groupId>
         <artifactId>dom4j</artifactId>
         <version>1.6.1</version>
      </dependency>
      <dependency>
         <groupId>org.apache.httpcomponents</groupId>
         <artifactId>httpcore</artifactId>
         <version>4.4.6</version>
      </dependency>
      <dependency>
         <groupId>commons-codec</groupId>
         <artifactId>commons-codec</artifactId>
         <version>1.9</version>
      </dependency>
      <dependency>
         <groupId>org.apache.httpcomponents</groupId>
         <artifactId>httpclient</artifactId>
         <version>4.4</version>
      </dependency>
      <!-- https://mvnrepository.com/artifact/org.apache.zookeeper/zookeeper -->
      <dependency>
         <groupId>org.apache.zookeeper</groupId>
         <artifactId>zookeeper</artifactId>
         <version>3.4.10</version>
         <type>pom</type>
      </dependency>
      <!-- https://mvnrepository.com/artifact/org.apache.hive/hive-jdbc -->
      <dependency>
         <groupId>org.apache.hive</groupId>
         <artifactId>hive-jdbc</artifactId>
         <version>1.1.0-cdh5.10.0</version>
      </dependency>
      <dependency>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-sql_2.10</artifactId>
         <version>1.6.0-cdh5.10.0</version>
      </dependency>
      <dependency>
         <groupId>org.apache.hbase</groupId>
         <artifactId>hbase-server</artifactId>
         <version>1.2.0-cdh5.10.0</version>
      </dependency>
   </dependencies>
   <build>
      <plugins>
         <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-jar-plugin</artifactId>
            <version>2.6</version>
            <configuration>
               <archive>
                  <manifest>
                     <addClasspath>true</addClasspath>
                     <classpathPrefix>lib/</classpathPrefix>
                     <!--<mainClass>enn.cn.pspace.EnnPspaceConn</mainClass>-->
                  </manifest>
               </archive>
            </configuration>
         </plugin>
         <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-dependency-plugin</artifactId>
            <version>2.10</version>
            <executions>
               <execution>
                  <id>copy-dependencies</id>
                  <phase>package</phase>
                  <goals>
                     <goal>copy-dependencies</goal>
                  </goals>
                  <configuration>
                     <outputDirectory>${project.build.directory}/lib</outputDirectory>
                  </configuration>
               </execution>
            </executions>
         </plugin>
         <plugin>
            <groupId>org.scala-tools</groupId>
            <artifactId>maven-scala-plugin</artifactId>
            <executions>
               <execution>
                  <goals>
                     <goal>compile</goal>
                     <goal>testCompile</goal>
                  </goals>
               </execution>
            </executions>
            <configuration>
               <scalaVersion>2.10.6</scalaVersion>
            </configuration>
         </plugin>
         <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-shade-plugin</artifactId>
            <executions>
               <execution>
                  <phase>package</phase>
                  <goals>
                     <goal>shade</goal>
                  </goals>
                  <configuration>
                     <relocations>
                        <relocation>
                           <pattern>org.apache.http</pattern>
                           <shadedPattern>shade.org.apache.http</shadedPattern>
                        </relocation>
                     </relocations>
                     <artifactSet>
                        <includes>
                           <include>*:*</include>
                        </includes>
                     </artifactSet>
                     <filters>
                        <filter>
                           <artifact>*:*</artifact>
                           <excludes>
                              <exclude>META-INF/*.SF</exclude>
                              <exclude>META-INF/*.DSA</exclude>
                              <exclude>META-INF/*.RSA</exclude>
                           </excludes>
                        </filter>
                     </filters>
                      <transformers>
                            <transformer
                                implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                              <!--  <mainClass>enn.cn.reports.IotToHiveMain</mainClass>-->
                            </transformer>
                            <transformer
                                implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                <resource>META-INF/spring.handlers</resource>
                            </transformer>
                            <transformer
                                implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                <resource>META-INF/spring.schemas</resource>
                            </transformer>
                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                <resource>META-INF/services/org.apache.hadoop.fs.FileSystem</resource>
                            </transformer>
                            </transformers>
                  </configuration>
               </execution>
            </executions>
         </plugin>
      </plugins>
   </build>
</project>

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值