java读spark hive_Spark记录-本地Spark读取Hive数据简单例子

该博客介绍了如何在Java中使用Spark读取Hive数据。主要内容包括设置Spark配置,创建SparkContext,使用HiveContext操作Hive数据库,如选择数据库、创建表、导入数据和查询数据。示例代码展示了读取Hive表的过程,并处理可能的异常。文章还提到了需要将MySQL驱动和hive-site.xml文件放在相应目录下。
摘要由CSDN通过智能技术生成

注意:将mysql的驱动包拷贝到spark/lib下,将hive-site.xml拷贝到项目resources下,远程调试不要使用主机名

import org.apache.spark._

import org.apache.spark.SparkConf

import org.apache.spark.SparkContext

import org.apache.spark.sql.hive.HiveContext

import java.io.FileNotFoundException

import java.io.IOException

object HiveSelect {

def main(args: Array[String]) {

System.setProperty("hadoop.home.dir", "D:\\hadoop") //加载hadoop组件

val conf = new SparkConf().setAppName("HiveApp").setMaster("spark://192.168.66.66:7077")

.set("spark.executor.memory", "1g")

.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

.setJars(Seq("D:\\workspace\\scala\\out\\scala.jar"))//加载远程spark

//.set("hive.metastore.uris", "thrift://192.168.66.66:9083")//远程hive的meterstore地址

// .set("spark.driver.extraClassPath","D:\\json\\mysql-connector-java-5.1.39.jar")

val sparkcontext = new SparkContext(conf);

try {

val hiveContext = new HiveContext(sparkcontext);

hiveContext.sql("use siat"); //使用数据库

hiveContext.sql("DROP TABLE IF EXISTS src") //删除表

hiveContext.sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING) " +

"ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ");//创建表

hiveContext.sql("LOAD DATA LOCAL INPATH 'D:\\workspace\\scala\\src.txt' INTO TABLE src "); //导入数据

hiveContext.sql(" SELECT * FROM src").collect().foreach(println);//查询数据

}

catch {

case e: FileNotFoundException => println("Missing file exception")

case ex: IOException => println("IO Exception")

case ee: ArithmeticException => println(ee)

case eee: Throwable => println("found a unknown exception" + eee)

case ef: NumberFormatException => println(ef)

case ec: Exception => println(ec)

case e: IllegalArgumentException => println("illegal arg. exception");

case e: IllegalStateException => println("illegal state exception");

}

finally {

sparkcontext.stop()

}

}

}

附录1:scala-spark api-http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package

org.apache.spark

org.apache.spark.api.java

org.apache.spark.api.java.function

org.apache.spark.broadcast

org.apache.spark.graphx

org.apache.spark.graphx.impl

org.apache.spark.graphx.lib

org.apache.spark.graphx.util

org.apache.spark.input

org.apache.spark.internal

org.apache.spark.internal.io

org.apache.spark.io

org.apache.spark.launcher

org.apache.spark.mapred

org.apache.spark.metrics.source

org.apache.spark.ml

org.apache.spark.ml.attribute

org.apache.spark.ml.classification

org.apache.spark.ml.clustering

org.apache.spark.ml.evaluation

org.apache.spark.ml.feature

org.apache.spark.ml.fpm

org.apache.spark.ml.linalg

org.apache.spark.ml.param

org.apache.spark.ml.recommendation

org.apache.spark.ml.regression

org.apache.spark.ml.source.libsvm

org.apache.spark.ml.stat

org.apache.spark.ml.stat.distribution

org.apache.spark.ml.tree

org.apache.spark.ml.tuning

org.apache.spark.ml.util

org.apache.spark.mllib

org.apache.spark.mllib.classification

org.apache.spark.mllib.clustering

org.apache.spark.mllib.evaluation

org.apache.spark.mllib.feature

org.apache.spark.mllib.fpm

org.apache.spark.mllib.linalg

org.apache.spark.mllib.linalg.distributed

org.apache.spark.mllib.optimization

org.apache.spark.mllib.pmml

org.apache.spark.mllib.random

org.apache.spark.mllib.rdd

org.apache.spark.mllib.recommendation

org.apache.spark.mllib.regression

org.apache.spark.mllib.stat

org.apache.spark.mllib.stat.distribution

org.apache.spark.mllib.stat.test

org.apache.spark.mllib.tree

org.apache.spark.mllib.tree.configuration

org.apache.spark.mllib.tree.impurity

org.apache.spark.mllib.tree.loss

org.apache.spark.mllib.tree.model

org.apache.spark.mllib.util

org.apache.spark.partial

org.apache.spark.rdd

org.apache.spark.scheduler

org.apache.spark.scheduler.cluster

org.apache.spark.security

org.apache.spark.serializer

org.apache.spark.sql

org.apache.spark.sql.api.java

org.apache.spark.sql.catalog

org.apache.spark.sql.expressions

org.apache.spark.sql.expressions.javalang

org.apache.spark.sql.expressions.scalalang

org.apache.spark.sql.hive

org.apache.spark.sql.hive.execution

org.apache.spark.sql.hive.orc

org.apache.spark.sql.jdbc

org.apache.spark.sql.sources

org.apache.spark.sql.streaming

org.apache.spark.sql.types

org.apache.spark.sql.util

org.apache.spark.status.api.v1

org.apache.spark.status.api.v1.streaming

org.apache.spark.storage

org.apache.spark.streaming

org.apache.spark.streaming.api.java

org.apache.spark.streaming.dstream

org.apache.spark.streaming.flume

org.apache.spark.streaming.kafka

org.apache.spark.streaming.kinesis

org.apache.spark.streaming.receiver

org.apache.spark.streaming.scheduler

org.apache.spark.streaming.scheduler.rate

org.apache.spark.streaming.util

org.apache.spark.ui.env

org.apache.spark.ui.exec

org.apache.spark.ui.jobs

org.apache.spark.ui.storage

org.apache.spark.util

org.apache.spark.util.random

org.apache.spark.util.sketch

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值