1.首先将集群的这3个文件hive-site.xml,core-size.xml,hdfs-site.xml放到资源文件里(必须,否则报错)
2.代码方面。下面几个测试都可以运行。
1)test03.java
import org.apache.spark.sql.SparkSession;
import java.text.ParseException;public classtest03 {public static voidmain(String[] args) throws ParseException {
SparkSession spark=SparkSession
.builder()
.appName("Java Spark Hive Example")
.master("local[*]")//.config("spark.sql.warehouse.dir", "/user/hive/warehouse")
.config("hadoop.home.dir", "/user/hive/warehouse")
.enableHiveSupport()
.getOrCreate();
spark.sql("SELECT * FROM mt1").show();
}
}
2)Hive03.scala
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.hive.HiveContextobjectHive03 {
def main(args: Array[String]): Unit={
val conf= new SparkConf().setAppName("test").setMaster("local[2]")
val sc= newSparkContext(conf)
val sqlContext= newHiveContext(sc)
sqlContext.table("mt1") //库名.表名 的格式
.registerTempTable("person") //注册成临时表
sqlContext.sql(""" | select *
| fromperson| limit 10
""".stripMargin).show()
sc.stop()
}
}
3) SparkHiveText.scala
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.{SparkConf, SparkContext}objectSparkHiveText {
def main(args: Array[String]) {
val conf=new SparkConf().setMaster("local").setAppName("SparkHiveText")
val sc=newSparkContext(conf)
val hc=newHiveContext(sc)
hc.sql("select * from mt1").show()
sc.stop()
}
}
运行时会出现类似这个错误 :出现错误(null) entry in command string: null chmod 0700
解决办法:
运行结果:
4.0.0
test
test
1.0-SNAPSHOT
2.2.1
2.11
org.codehaus.jettison
jettison
1.3.4
org.apache.spark
spark-core_${scala.version}
${spark.version}
org.apache.spark
spark-streaming_${scala.version}
${spark.version}
org.apache.spark
spark-sql_${scala.version}
${spark.version}
org.apache.spark
spark-hive_${scala.version}
${spark.version}
org.apache.spark
spark-sql-kafka-0-10_2.11
2.2.1
org.apache.spark
spark-streaming-kafka-0-10_2.11
2.2.1
org.apache.spark
spark-mllib_${scala.version}
${spark.version}
org.apache.spark
spark-sql_2.11
2.2.1
org.apache.spark
spark-core_2.11
2.2.1
org.apache.spark
spark-streaming_2.11
2.2.1
provided
com.sparkjava
spark-core
2.2
org.apache.spark
spark-repl_2.11
2.2.1
org.elasticsearch
elasticsearch-spark-20_2.11
5.6.3
mysql
mysql-connector-java
5.1.41
org.apache.commons
commons-lang3
3.4
org.slf4j
slf4j-api
log4j
log4j
log4j
log4j
1.2.17
org.slf4j
slf4j-log4j12
1.7.12
org.slf4j
slf4j-api
1.7.12
com.101tec
zkclient
0.10
org.scala-tools
maven-scala-plugin
2.15.2
compile
testCompile
maven-compiler-plugin
3.6.0
1.8
1.8
org.apache.maven.plugins
maven-surefire-plugin
2.19
true
pom.xml
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
spark读取的结果并写到hive里
import org.apache.spark.sql.{SaveMode, SparkSession}objectHive05 {
def main(args: Array[String]): Unit={
val spark=SparkSession
.builder().master("local[*]")
.appName("wer")//.config("spark.sql.warehouse.dir", "/user/hive/warehouse")//这行可有可无
.enableHiveSupport()
.getOrCreate()
val df= spark.table("mt1").createOrReplaceTempView("person") //自己hive的表,person是创建的临时视图//注册成临时表
val tt= spark.sql("select BUS_NO,CITY_NO,INS_TIME from person")
tt.show();
tt.write.mode(SaveMode.Overwrite).saveAsTable("test05")
}
}
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
遇到问题。hive on spark有很多问题。例如scala和spark兼容问题等等
import org.apache.spark.sql.SparkSession;
import java.text.ParseException;public classtest03 {public static voidmain(String[] args) throws ParseException {
System.setProperty("hadoop.home.dir", "C:\\hadoop-common-2.2.0");
SparkSession spark=SparkSession
.builder()
.appName("Java Spark Hive Example")
.master("local[*]")//.config("spark.sql.warehouse.dir", "/user/hive/warehouse")//.config("hadoop.home.dir", "/user/hive/warehouse")
.enableHiveSupport()
.getOrCreate();
spark.sql("SELECT * FROM mt1").show();
}
}
上面是我在网上下载的。下载地址https://github.com/srccodes/hadoop-common-2.2.0-bin
参考博客:https://blog.csdn.net/woshixuye/article/details/53461975
2.还有scala版本不要使用2.12.X。我使用的是maven的2.11
4.0.0
test
test
1.0-SNAPSHOT
2.2.1
2.11
org.apache.hbase
hbase-server
1.2.4
org.apache.hbase
hbase-protocol
1.2.4
org.apache.hbase
hbase-common
1.2.4
org.apache.hbase
hbase-client
1.2.4
org.codehaus.jettison
jettison
1.3.4
org.apache.spark
spark-core_${scala.version}
${spark.version}
org.apache.spark
spark-streaming_${scala.version}
${spark.version}
org.apache.spark
spark-sql_${scala.version}
${spark.version}
org.apache.spark
spark-hive_${scala.version}
${spark.version}
org.apache.spark
spark-sql-kafka-0-10_2.11
2.2.1
org.apache.spark
spark-streaming-kafka-0-10_2.11
2.2.1
org.apache.spark
spark-mllib_${scala.version}
${spark.version}
org.apache.spark
spark-sql_2.11
2.2.1
org.apache.spark
spark-core_2.11
2.2.1
org.apache.spark
spark-streaming_2.11
2.2.1
provided
com.sparkjava
spark-core
2.2
org.apache.spark
spark-repl_2.11
2.2.1
org.elasticsearch
elasticsearch-spark-20_2.11
5.6.3
mysql
mysql-connector-java
5.1.17
org.apache.commons
commons-lang3
3.4
org.slf4j
slf4j-api
log4j
log4j
log4j
log4j
1.2.17
org.slf4j
slf4j-log4j12
1.7.12
org.slf4j
slf4j-api
1.7.12
com.101tec
zkclient
0.10
org.scala-tools
maven-scala-plugin
2.15.2
compile
testCompile
maven-compiler-plugin
3.6.0
1.8
1.8
org.apache.maven.plugins
maven-surefire-plugin
2.19
true
*hive设置使用spark 的计算模型
//使用Hive On Spark非常简单//只要用set hive.execution.engine命令设置Hive的执行引擎为spark即可//默认是mr
set hive.execution.engine=spark;//这里,是完全可以将其设置为Spark Master的URL地址的
set spark.master=spark://192.168.1.107:7077