上一篇讲的是springboot如何连接hadoop3.0
地址:https://blog.csdn.net/u011165335/article/details/106303379
那个是没有开启Kerberos认证的;
先贴一下组件版本:我这里的是CDH6.1.1+HADOOP3.0+HIVE2.1.1
CDH里面只要开启了认证,那么每个组件都会有各自的一个keytab
用各自的keytab认证即可
下面是本地的win测试,keytab是从linux服务器复制下来的;
##Kerberos认证配置##
dmp.Kerberos.flag=true
dmp.keytab.hivepath=D:/keyTab/hive.keytab
dmp.keytab.hdfspath=D:/keyTab/hdfs.keytab
dmp.keytab.hbasepath=D:/keyTab/hbase.keytab
dmp.krb5.path=src/main/resources/krb5-test.conf
这里主要说明:
1.springboot如何连接Kerberos认证的hadoop3.0
配置:
hdfs.path=hdfs://xxxx:8020
代码:
package com.xxxx.tpi.dmp.config;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Configuration;
import java.io.IOException;
/**
* Created by yusy02 on 2020/9/1 14:56
*/
@Configuration
public class Kerberoslogin {
Logger logger = LoggerFactory.getLogger(getClass());
@Value("${dmp.Kerberos.flag}")
private boolean kerberosFlag;
@Value("${dmp.keytab.path}")
private String keytabPath;
/**
* 返回是否登录成功
*
* @param conf
* @return
* @throws IOException
*/
public boolean Kerberoslogin(org.apache.hadoop.conf.Configuration conf) {
//Kerberos认证
if (conf == null) {
conf = new org.apache.hadoop.conf.Configuration();
}
try {
if (kerberosFlag) {
//System.setProperty("sun.security.krb5.debug", "true");
System.setProperty("java.security.krb5.conf", "src/main/resources/krb5.conf");
conf.set("hadoop.security.authentication", "Kerberos");
UserGroupInformation.setConfiguration(conf);
UserGroupInformation.loginUserFromKeytab("xxx@xxx.COM", keytabPath);
}
} catch (IOException e) {
logger.error("认证失败->", e);
e.printStackTrace();
}
return kerberosFlag;
}
}
package com.xxx.tpi.dmp.config;
import com.cntaiping.tpi.dmp.util.HdfsUtil;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class HdfsConfig {
@Value("${hdfs.path}")
private String defaultHdfsUri;
@Autowired
Kerberoslogin Kerberoslogin;
@Bean
public HdfsUtil getHbaseService() {
org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
//操作文件io,用来读写
//conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
conf.set("fs.defaultFS", defaultHdfsUri);
//认证
Kerberoslogin.Kerberoslogin(conf);
return new HdfsUtil(conf, defaultHdfsUri);
}
}
2.springboot如何连接hive(使用durid连接池)
配置
注意连接串后面有principal认证
#####################hive###################
#hive.url=jdbc:hive2://xxxx:10000
hive.url=jdbc:hive2://xxxx.cntaiping.com:10000/default;principal=hive/whtpicdmapp02.cntaiping.com@CNTAIPING.COM
hive.driver-class-name=org.apache.hive.jdbc.HiveDriver
#hive.type=com.alibaba.druid.pool.DruidDataSource
hive.user=appuser
hive.password=appuser
# 下面为连接池的补充设置,应用到上面所有数据源中
# 初始化大小,最小,最大
hive.initialSize=1
hive.minIdle=3
hive.maxActive=20
# 配置获取连接等待超时的时间
hive.maxWait=60000
# 配置间隔多久才进行一次检测,检测需要关闭的空闲连接,单位是毫秒
hive.timeBetweenEvictionRunsMillis=60000
# 配置一个连接在池中最小生存的时间,单位是毫秒
hive.minEvictableIdleTimeMillis=30000
hive.validationQuery=select 1
hive.testWhileIdle=true
hive.testOnBorrow=false
hive.testOnReturn=false
# 打开PSCache,并且指定每个连接上PSCache的大小
hive.poolPreparedStatements=true
hive.maxPoolPreparedStatementPerConnectionSize=20
代码
package com.xxx.tpi.dmp.config;
import com.alibaba.druid.pool.DruidDataSource;
import org.mybatis.spring.annotation.MapperScan;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.jdbc.core.JdbcTemplate;
import javax.sql.DataSource;
// 默认文件路径是加载yml或者prop下面的
@Configuration
@ConfigurationProperties(prefix = "hive")
@MapperScan(basePackages = {"com.xxxx.tpi.dmp.mapper.hive"}, sqlSessionFactoryRef = "hiveSqlSessionFactory")
public class HiveDruidConfig {
@Autowired
Kerberoslogin Kerberoslogin;
private String url;
private String user;
private String password;
private String driverClassName;
private int initialSize;
private int minIdle;
private int maxActive;
private int maxWait;
private int timeBetweenEvictionRunsMillis;
private int minEvictableIdleTimeMillis;
private String validationQuery;
private boolean testWhileIdle;
private boolean testOnBorrow;
private boolean testOnReturn;
private boolean poolPreparedStatements;
private int maxPoolPreparedStatementPerConnectionSize;
public HiveDruidConfig() {
}
@Bean(name = "hiveDruidDataSource")
@Qualifier("hiveDruidDataSource")
public DataSource dataSource() {
boolean kerberosFlag = Kerberoslogin.Kerberoslogin(null);
DruidDataSource datasource = new DruidDataSource();
datasource.setUrl(url);
if(!kerberosFlag){
datasource.setUsername(user);
datasource.setPassword(password);
}
datasource.setDriverClassName(driverClassName);
// pool configuration
datasource.setInitialSize(initialSize);
datasource.setMinIdle(minIdle);
datasource.setMaxActive(maxActive);
datasource.setMaxWait(maxWait);
datasource.setTimeBetweenEvictionRunsMillis(timeBetweenEvictionRunsMillis);
datasource.setMinEvictableIdleTimeMillis(minEvictableIdleTimeMillis);
datasource.setValidationQuery(validationQuery);
datasource.setTestWhileIdle(testWhileIdle);
datasource.setTestOnBorrow(testOnBorrow);
datasource.setTestOnReturn(testOnReturn);
datasource.setPoolPreparedStatements(poolPreparedStatements);
datasource.setMaxPoolPreparedStatementPerConnectionSize(maxPoolPreparedStatementPerConnectionSize);
return datasource;
}
@Bean(name = "hiveDruidTemplate")
public JdbcTemplate hiveDruidTemplate(@Qualifier("hiveDruidDataSource") DataSource dataSource) {
return new JdbcTemplate(dataSource);
}
/*
// hive目前不需要xml写sql
@Bean(name = "hiveSqlSessionFactory")
public SqlSessionFactory sqlSessionFactory(@Qualifier("hiveDruidDataSource") DataSource dataSource) throws Exception {
SqlSessionFactoryBean sessionFactoryBean = new SqlSessionFactoryBean();
sessionFactoryBean.setDataSource(dataSource);
//这里先占个位置,如何需要,会创建hive目录
sessionFactoryBean.setMapperLocations(new PathMatchingResourcePatternResolver()
.getResources("classpath*:mybatis/hive/*.xml"));
return sessionFactoryBean.getObject();
}*/
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getUser() {
return user;
}
public void setUser(String user) {
this.user = user;
}
public String getPassword() {
return password;
}
public void setPassword(String password) {
this.password = password;
}
public String getDriverClassName() {
return driverClassName;
}
public void setDriverClassName(String driverClassName) {
this.driverClassName = driverClassName;
}
public int getInitialSize() {
return initialSize;
}
public void setInitialSize(int initialSize) {
this.initialSize = initialSize;
}
public int getMinIdle() {
return minIdle;
}
public void setMinIdle(int minIdle) {
this.minIdle = minIdle;
}
public int getMaxActive() {
return maxActive;
}
public void setMaxActive(int maxActive) {
this.maxActive = maxActive;
}
public int getMaxWait() {
return maxWait;
}
public void setMaxWait(int maxWait) {
this.maxWait = maxWait;
}
public int getTimeBetweenEvictionRunsMillis() {
return timeBetweenEvictionRunsMillis;
}
public void setTimeBetweenEvictionRunsMillis(int timeBetweenEvictionRunsMillis) {
this.timeBetweenEvictionRunsMillis = timeBetweenEvictionRunsMillis;
}
public int getMinEvictableIdleTimeMillis() {
return minEvictableIdleTimeMillis;
}
public void setMinEvictableIdleTimeMillis(int minEvictableIdleTimeMillis) {
this.minEvictableIdleTimeMillis = minEvictableIdleTimeMillis;
}
public String getValidationQuery() {
return validationQuery;
}
public void setValidationQuery(String validationQuery) {
this.validationQuery = validationQuery;
}
public boolean isTestWhileIdle() {
return testWhileIdle;
}
public void setTestWhileIdle(boolean testWhileIdle) {
this.testWhileIdle = testWhileIdle;
}
public boolean isTestOnBorrow() {
return testOnBorrow;
}
public void setTestOnBorrow(boolean testOnBorrow) {
this.testOnBorrow = testOnBorrow;
}
public boolean isTestOnReturn() {
return testOnReturn;
}
public void setTestOnReturn(boolean testOnReturn) {
this.testOnReturn = testOnReturn;
}
public boolean isPoolPreparedStatements() {
return poolPreparedStatements;
}
public void setPoolPreparedStatements(boolean poolPreparedStatements) {
this.poolPreparedStatements = poolPreparedStatements;
}
public int getMaxPoolPreparedStatementPerConnectionSize() {
return maxPoolPreparedStatementPerConnectionSize;
}
public void setMaxPoolPreparedStatementPerConnectionSize(int maxPoolPreparedStatementPerConnectionSize) {
this.maxPoolPreparedStatementPerConnectionSize = maxPoolPreparedStatementPerConnectionSize;
}
}
3.主要的问题
我本地默认的搭配是跟服务器上面一致的: hive2.1.1+hadoop3.0
但是这样会出现如下问题:
此时要么升级hive版本到3.0,要么降级hadoop到2.x.x
(最终方案,降级hadoop)
1).Unrecognized Hadoop major version number: 3.0.0
2018年5月21日,Hive 3.0.0发布,支持Hadoop 3。
这个是因为我的搭配的hive2.1是不支持hadoop3.0的;
看ShimLoader源码(hive2.1.1)
再看ShimLoader的2.1源码(hive3.0.0)
依赖如下:
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>3.0.0</version>
</dependency>
- 版本问题解决了,下面又报错了Required field ‘client_protocol’ is unset!
Caused by: java.sql.SQLException: Could not establish connection to jdbc:hive2://xxxx.cntaiping.com:10000/default;principal=hive/xxx.cntaiping.com@xxx.COM: Required field 'client_protocol' is unset! Struct:TOpenSessionReq(client_protocol:null, configuration:{set:hiveconf:hive.server2.thrift.resultset.default.fetch.size=1000, use:database=default})
这个原因是,我本地的jdbc的hive版本是3.0.0,但是服务器上面还是2.1.1,版本不匹配导致的,
也就是说,不能升级hive,那我把本地的hadoop降级试一下
注意:我本地的hadoop只是一个客户端,这个下载地址:
https://gitee.com/bochangguan/winutils/tree/master
然后把hadoop的依赖改成2.8.3;发现就可以成功连接了;
参考地址:
https://www.jianshu.com/p/e2248a86f0fa
https://blog.csdn.net/mm_bit/article/details/51958344
https://stackoverflow.com/questions/47004014/unrecognized-hadoop-major-version-number-3-0-0-beta1-at-org-apache-hadoop-hive/47007925#47007925