spark sql读取hbase数据简单实例:
package enn.cn.dataimport /** * Created by Administrator */ import java.io.Serializable import java.util.logging.Logger import enn.cn.util.{CommonContent, KerberosUtil} import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.spark.{SparkConf, SparkContext} object MySparkSql extends Serializable { case class Score(name: String,metric: String, value: String, timestamp: String, deviceTypeId: String, deviceNo: String, deviceId: String, gatewayId: String) val logger = Logger.getLogger(MySparkSql.getClass.getName) def main(args: Array[String]) { /*val jars: Array[String] = Array("D:\\workspace\\mysparksql_2.10-1.0.jar") System.setProperty("hadoop.home.dir", "E:\\Program Files\\hadoop-2.7.0") //win7环境下运行须加*/ KerberosUtil.loginUserFromKeytab(CommonContent.KERBEROS_NAME, CommonContent.KERBEROS_KEYTAB) val sconf = new SparkConf() .setMaster("local") // .setMaster("spark://h230:7077")//在集群测试下设置,h230是我的hostname,须自行修改 .setAppName("MySparkSql") //win7环境下设置 .set("spark.executor.memory", "1g") .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") // .setJars(jars)//在集群测试下,设置应用所依赖的jar包 val sc = new SparkContext(sconf) val conf = HBaseConfiguration.create() /* conf.set("hbase.zookeeper.property.clientPort", "2181") conf.set("hbase.zookeeper.quorum", "h230") conf.set("hbase.master", "h230:60000")*/ conf.addResource("hbase-site.xml")//替代以上三条配置信息 conf.set(TableInputFormat.INPUT_TABLE, "test:iot_electric_sale") // Scan操作 val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result]) val score = hBaseRDD.map(m => m._2.listCells()).map(c => Score(new String(c.get(0).getRow()), new String(c.get(0).getValue), new String(c.get(1).getValue), new String(c.get(2).getValue), new String(c.get(3).getValue), new String(c.get(4).getValue), new String(c.get(5).getValue), new String(c.get(6).getValue)) ) score.foreach(println) val sqlContext = new org.apache.spark.sql.SQLContext(sc) val scoreSchema = sqlContext.createDataFrame(score) scoreSchema.registerTempTable("test:iot_electric_sale") var result = sqlContext.sql("SELECT * FROM test:iot_electric_sale") result.collect().foreach(println) } }package enn.cn.dataimport;
/** * Created by Administrator */ public class Score { public String name; public String metric; public String value; public String timestamp; public String deviceTypeId; public String deviceNo; public String deviceId; public String gatewayId; public Score(String name, String metric, String value, String timestamp, String deviceTypeId, String deviceNo, String deviceId, String gatewayId){ this.name=name; this.metric=metric; this.value=value; this.timestamp=timestamp; this.deviceTypeId=deviceTypeId; this.deviceNo=deviceNo; this.deviceId=deviceId; this.gatewayId=gatewayId; } public String getMetric() { return metric; } public void setMetric(String metric) { this.metric = metric; } public String getValue() { return value; } public void setValue(String value) { this.value = value; } public String getTimestamp() { return timestamp; } public void setTimestamp(String timestamp) { this.timestamp = timestamp; } public String getDeviceTypeId() { return deviceTypeId; } public void setDeviceTypeId(String deviceTypeId) { this.deviceTypeId = deviceTypeId; } public String getDeviceNo() { return deviceNo; } public void setDeviceNo(String deviceNo) { this.deviceNo = deviceNo; } public String getDeviceId() { return deviceId; } public void setDeviceId(String deviceId) { this.deviceId = deviceId; } public String getGatewayId() { return gatewayId; } public void setGatewayId(String gatewayId) { this.gatewayId = gatewayId; } }依赖包,摘出hbase, spark相关的包即可:<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>enn.cn</groupId> <artifactId>dataimport</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>dataimport</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <elasticsearch.version>2.1.2</elasticsearch.version> </properties> <dependencies> <!-- https://mvnrepository.com/artifact/com.metamx/scala-util_2.10 --> <dependency> <groupId>com.metamx</groupId> <artifactId>scala-util_2.10</artifactId> <version>1.11.6</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.10</artifactId> <version>1.6.0-cdh5.10.0</version> <exclusions> <exclusion> <artifactId>jackson-annotations</artifactId> <groupId>com.fasterxml.jackson.core</groupId> </exclusion> <exclusion> <artifactId>jackson-databind</artifactId> <groupId>com.fasterxml.jackson.core</groupId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka_2.10</artifactId> <version>1.6.0-cdh5.10.0</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming_2.10</artifactId> <version>1.6.0-cdh5.10.0</version> </dependency> <dependency> <groupId>unknown</groupId> <artifactId>PspaceClientSDK</artifactId> <version>1.4</version> <scope>system</scope> <systemPath>${basedir}/libs/PspaceClientSDK-1.4.jar</systemPath> </dependency> <dependency> <groupId>com.likong</groupId> <artifactId>pspace1.0</artifactId> <version>1.0.0</version> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka_2.10</artifactId> <version>0.10.0.0</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-checkstyle</artifactId> <version>1.2.0-cdh5.10.0</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>1.2.0-cdh5.10.0</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-common</artifactId> <version>1.2.0-cdh5.10.0</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-hadoop2-compat</artifactId> <version>1.2.0-cdh5.10.0</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-protocol</artifactId> <version>1.2.0-cdh5.10.0</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.6.0-cdh5.10.0</version> <exclusions> <exclusion> <groupId>tomcat</groupId> <artifactId>jasper-runtime</artifactId> </exclusion> <exclusion> <groupId>tomcat</groupId> <artifactId>jasper-compiler</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>commons-dbcp</groupId> <artifactId>commons-dbcp</artifactId> <version>1.4</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <!-- https://mvnrepository.com/artifact/javax.servlet/javax.servlet-api --> <dependency> <groupId>javax.servlet</groupId> <artifactId>javax.servlet-api</artifactId> <version>3.1.0</version> </dependency> <dependency> <groupId>org.springframework.data</groupId> <artifactId>spring-data-jpa</artifactId> <version>1.5.0.RELEASE</version> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-tx</artifactId> <version>3.2.3.RELEASE</version> </dependency> <dependency> <groupId>org.hibernate</groupId> <artifactId>hibernate-entitymanager</artifactId> <version>4.2.20.Final</version> </dependency> <dependency> <groupId>c3p0</groupId> <artifactId>c3p0</artifactId> <version>0.9.1.2</version> </dependency> <dependency> <groupId>org.hibernate</groupId> <artifactId>hibernate-c3p0</artifactId> <version>3.5.6-Final</version> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-webmvc</artifactId> <version>3.2.3.RELEASE</version> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-webmvc-portlet</artifactId> <version>3.2.3.RELEASE</version> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-expression</artifactId> <version>3.2.3.RELEASE</version> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-aspects</artifactId> <version>3.2.3.RELEASE</version> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-oxm</artifactId> <version>3.2.3.RELEASE</version> </dependency> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-lang3</artifactId> <version>3.1</version> </dependency> <dependency> <groupId>org.apache.shiro</groupId> <artifactId>shiro-all</artifactId> <version>1.2.4</version> </dependency> <dependency> <groupId>commons-codec</groupId> <artifactId>commons-codec</artifactId> <version>1.10</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.25</version> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava-base</artifactId> <version>r03</version> </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava-collections</artifactId> <version>r03</version> </dependency> <dependency> <groupId>org.hibernate</groupId> <artifactId>hibernate-ehcache</artifactId> <version>4.2.20.Final</version> </dependency> <!-- http://mvnrepository.com/artifact/net.sf.ehcache/ehcache-core --> <dependency> <groupId>net.sf.ehcache</groupId> <artifactId>ehcache-core</artifactId> <version>2.6.11</version> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-annotations</artifactId> <version>2.7.5</version> </dependency> <dependency> <groupId>cglib</groupId> <artifactId>cglib</artifactId> <version>3.2.2</version> </dependency> <dependency> <groupId>org.codehaus.jackson</groupId> <artifactId>jackson-mapper-asl</artifactId> <version>1.8.6</version> </dependency> <dependency> <groupId>commons-fileupload</groupId> <artifactId>commons-fileupload</artifactId> <version>1.3</version> </dependency> <dependency> <groupId>org.slf4j</groupId> <artifactId>slf4j-log4j12</artifactId> <version>1.7.6</version> </dependency> <dependency> <groupId>javax.servlet</groupId> <artifactId>jstl</artifactId> <version>1.2</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.10</version> </dependency> <dependency> <groupId>javax.mail</groupId> <artifactId>mail</artifactId> <version>1.4.7</version> </dependency> <dependency> <groupId>org.opensymphony.quartz</groupId> <artifactId>quartz-all</artifactId> <version>1.6.1</version> </dependency> <dependency> <groupId>commons-collections</groupId> <artifactId>commons-collections</artifactId> <version>3.2.1</version> </dependency> <dependency> <groupId>com.belerweb</groupId> <artifactId>pinyin4j</artifactId> <version>2.5.0</version> </dependency> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-pool2</artifactId> <version>2.2</version> </dependency> <dependency> <groupId>commons-pool</groupId> <artifactId>commons-pool</artifactId> <version>1.6</version> </dependency> <dependency> <groupId>org.ow2.asm</groupId> <artifactId>asm-util</artifactId> <version>4.0</version> </dependency> <dependency> <groupId>commons-net</groupId> <artifactId>commons-net</artifactId> <version>3.3</version> </dependency> <dependency> <groupId>commons-lang</groupId> <artifactId>commons-lang</artifactId> <version>2.4</version> </dependency> <!-- http://mvnrepository.com/artifact/commons-beanutils/commons-beanutils --> <dependency> <groupId>commons-beanutils</groupId> <artifactId>commons-beanutils</artifactId> <version>1.9.2</version> </dependency> <!-- http://mvnrepository.com/artifact/commons-beanutils/commons-beanutils-core --> <dependency> <groupId>commons-beanutils</groupId> <artifactId>commons-beanutils-core</artifactId> <version>1.8.3</version> </dependency> <!-- http://mvnrepository.com/artifact/commons-beanutils/commons-beanutils-bean-collections --> <dependency> <groupId>commons-beanutils</groupId> <artifactId>commons-beanutils-bean-collections</artifactId> <version>1.8.3</version> </dependency> <!-- http://mvnrepository.com/artifact/redis.clients/jedis --> <dependency> <groupId>redis.clients</groupId> <artifactId>jedis</artifactId> <version>2.7.0</version> </dependency> <!-- http://mvnrepository.com/artifact/net.heartsavior/jedis <dependency> <groupId>net.heartsavior</groupId> <artifactId>jedis</artifactId> <version>2.2.1.1</version> </dependency> --> <!-- shiro 分布式session及cache管理 --> <!-- https://mvnrepository.com/artifact/org.bouncycastle/bcprov-jdk16 --> <dependency> <groupId>org.bouncycastle</groupId> <artifactId>bcprov-jdk16</artifactId> <version>1.45</version> </dependency> <dependency> <groupId>org.crazycake</groupId> <artifactId>shiro-redis</artifactId> <version>2.4.2.1-RELEASE</version> </dependency> <!-- https://mvnrepository.com/artifact/net.sf.json-lib/json-lib-ext-spring --> <dependency> <groupId>net.sf.json-lib</groupId> <artifactId>json-lib-ext-spring</artifactId> <version>1.0.2</version> </dependency> <!-- https://mvnrepository.com/artifact/commons-httpclient/commons-httpclient --> <dependency> <groupId>commons-httpclient</groupId> <artifactId>commons-httpclient</artifactId> <version>3.0</version> </dependency> <!-- https://mvnrepository.com/artifact/com.thoughtworks.xstream/xstream --> <dependency> <groupId>com.thoughtworks.xstream</groupId> <artifactId>xstream</artifactId> <version>1.4.8</version> </dependency> <dependency> <groupId>jaxen</groupId> <artifactId>jaxen</artifactId> <version>1.1.6</version> </dependency> <dependency> <groupId>dom4j</groupId> <artifactId>dom4j</artifactId> <version>1.6.1</version> </dependency> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpcore</artifactId> <version>4.4.6</version> </dependency> <dependency> <groupId>commons-codec</groupId> <artifactId>commons-codec</artifactId> <version>1.9</version> </dependency> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.4</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.zookeeper/zookeeper --> <dependency> <groupId>org.apache.zookeeper</groupId> <artifactId>zookeeper</artifactId> <version>3.4.10</version> <type>pom</type> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.hive/hive-jdbc --> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>1.1.0-cdh5.10.0</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.10</artifactId> <version>1.6.0-cdh5.10.0</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>1.2.0-cdh5.10.0</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-jar-plugin</artifactId> <version>2.6</version> <configuration> <archive> <manifest> <addClasspath>true</addClasspath> <classpathPrefix>lib/</classpathPrefix> <!--<mainClass>enn.cn.pspace.EnnPspaceConn</mainClass>--> </manifest> </archive> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-dependency-plugin</artifactId> <version>2.10</version> <executions> <execution> <id>copy-dependencies</id> <phase>package</phase> <goals> <goal>copy-dependencies</goal> </goals> <configuration> <outputDirectory>${project.build.directory}/lib</outputDirectory> </configuration> </execution> </executions> </plugin> <plugin> <groupId>org.scala-tools</groupId> <artifactId>maven-scala-plugin</artifactId> <executions> <execution> <goals> <goal>compile</goal> <goal>testCompile</goal> </goals> </execution> </executions> <configuration> <scalaVersion>2.10.6</scalaVersion> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <relocations> <relocation> <pattern>org.apache.http</pattern> <shadedPattern>shade.org.apache.http</shadedPattern> </relocation> </relocations> <artifactSet> <includes> <include>*:*</include> </includes> </artifactSet> <filters> <filter> <artifact>*:*</artifact> <excludes> <exclude>META-INF/*.SF</exclude> <exclude>META-INF/*.DSA</exclude> <exclude>META-INF/*.RSA</exclude> </excludes> </filter> </filters> <transformers> <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> <!-- <mainClass>enn.cn.reports.IotToHiveMain</mainClass>--> </transformer> <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> <resource>META-INF/spring.handlers</resource> </transformer> <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> <resource>META-INF/spring.schemas</resource> </transformer> <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> <resource>META-INF/services/org.apache.hadoop.fs.FileSystem</resource> </transformer> </transformers> </configuration> </execution> </executions> </plugin> </plugins> </build> </project>