1.准备:
分发
[/soft/hadoop/etc/hadoop/]
core-site.xml
hdfs-site.xml
[/soft/hive/conf/]
hive-site.xml
三个文件到所有worker节点的spark/conf目录下
2.启动spark集群(完全分布式-standalone)
$>/soft/spark/sbin/start-all.sh
master //201
worker //202 ~ 204
3.创建hive的数据表在默认库下。
$>hive -e "create table tt(id int,name string , age int) row format delimited fields terminated by ',' lines terminated by '\n' stored as textfile"
4.加载数据到hive表中.
$>hive -e "load data local inpath 'file:///home/centos/data.txt' into table tt"
$>hive -e "select * from tt"
5.启动spark-shell
$>spark-shell --master spark://s201:7077
7.启动thriftserver服务器
$>start-thriftserver.sh --master spark://s201:7077
java 访问:
package com.mao.scala.java;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;
/**
* java 通过ThriftServer,spark访问hive
*/
public class ThriftServerClientJava {
public static void main(String[] args) throws Exception {
Class.forName("org.apache.hive.jdbc.HiveDriver");
Connection conn = DriverManager.getConnection("jdbc:hive2://s201:10000");
Statement st = conn.createStatement();
ResultSet rs = st.executeQuery("select count(1) from tt where age > 12 ");
while(rs.next()){
int count = rs.getInt(1);
System.out.println(count);
}
rs.close();
}
}