文章目录
前言
生产环境肯定需要开启kerberos的。这个时候坑就多了
CDH6 + IMPALA + Kerberos
本地数据上传到 - > hdfs - >导入impala
Project
Application.yml
# 导入配置
import-config:
csv-tmp-path: /home/huweihui/back-end/data/import
# hive-url: jdbc:hive2://x.x.x.x:10000/default
impala-url: jdbc:impala://xxx:21050/default;AuthMech=1;KrbRealm=EXAMPLE.COM;KrbHostFQDN=xxx;KrbServiceName=impala;
impala-user: huweihui
impala-password: Hantele@1234!
hdfs-uri: hdfs://xxxx:8020
hdfs-user: huweihui
hdfs-tmp-path: /user/huweihui/web_data
#kerberos配置
kerberos:
krb5-file-path: /etc/krb5.conf
keytab-file-path: /home/huweihui/back-end/config/huweihui-bi-be/huweihui.keytab
ker-user: huweihui@EXAMPLE.COM
Core-code
配置类
/**
* <b><code>ImportConfig</code></b>
* <p/>
* Description
* <p/>
* <b>Creation Time:</b> 2019/6/12 16:58.
*
* @author Hu-Weihui
*/
@Component
@Data
@ConfigurationProperties(prefix = "import-config")
public class ImportConfig {
private String csvTmpPath;
private String impalaUrl;
private String impalaUser;
private String impalaPassword;
private String hdfsUser;
private String hdfsUri;
private String hdfsTmpPath;
}
/**
* <b><code>KerberosConfig</code></b>
* <p/>
* Description
* <p/>
* <b>Creation Time:</b> 2019/7/5 11:12.
*
* @author Hu-Weihui
* @since ${PROJECT_VERSION}
*/
@Component
@Data
@ConfigurationProperties(prefix = "kerberos")
public class KerberosConfig {
/** kerberos principal*/
private String kerUser;
/**设置java安全krb5配置,其中krb5.conf文件可以从成功开启kerberos的集群任意一台节点/etc/krb5.conf拿到,放置本地*/
private String krb5FilePath;
/** 对应kerberos principal的keytab文件,从服务器获取放置本地*/
private String keytabFilePath;
}
Kerberos认证工具
/**
* <b><code>KerberosUtil</code></b>
* <p/>
* Description kerberos 认证工具类
* <p/>
* <b>Creation Time:</b> 2019/7/5 11:06.
*
* @author Hu-Weihui
* @since ${PROJECT_VERSION}
*/
@Slf4j
public class KerberosUtil {
/**
* kerberos认证。
* @param configuration
* @param krb5FilePath
* @param kerUser
* @param keytabFilePath
* @return 返回kerberos登录对象,可使用此对象进一步操作
*/
public static UserGroupInformation kerberosAuth(Configuration configuration, String krb5FilePath, String kerUser, String keytabFilePath) {
// krb5.conf配置路径
System.setProperty("java.security.krb5.conf", krb5FilePath);
//开启kerberos
configuration.set("hadoop.security.authentication", "kerberos");
//鉴权
UserGroupInformation.setConfiguration(configuration);
try {
UserGroupInformation.loginUserFromKeytab(kerUser, keytabFilePath);
UserGroupInformation loginUser = UserGroupInformation.getLoginUser();
return loginUser;
} catch (IOException e) {
log.error("kerberos auth fail : {}", e);
}
return null;
}
}
数据导入
关键点:
1.configuration设置增加了远端访问的配置
2.进行Kerberos认证
3.IMPALA操作需要使用认证后的用户(loginUser,通过UserGroupInformation登录后返回)
4.kerberos配置好
/**
* IMPALA数据导入
*
* @param tableName
* @param updateMethod
* @param multipartFile
*/
@Override
public void importImpalaData(String tableName, String updateMethod, MultipartFile multipartFile) {
// 1.csv 保存到loacal
File localFile = saveToLocal(multipartFile);
String localFilePath = localFile.getPath();
String hdfsDstPath = importConfig.getHdfsTmpPath() + "/" + localFile.getName();
// 2.上传到hdfs上
Path srcPath = new Path(localFilePath);
Path dstPath = new Path(hdfsDstPath);
Path hdfsPath = new Path(importConfig.getHdfsTmpPath());
try {
// remote access need to set configuration
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS", importConfig.getHdfsUri());
configuration.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
configuration.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
configuration.set(" dfs.namenode.kerberos.principal", "huweihui@EXAMPLE.COM");
configuration.set("dfs.namenode.kerberos.principal.pattern", "*@EXAMPLE.COM");
// read the config from yaml
String krb5FilePath = kerberosConfig.getKrb5FilePath();
String kerUser = kerberosConfig.getKerUser();
String keytabFilePath = kerberosConfig.getKeytabFilePath();
//kerberos auth
UserGroupInformation loginUser = KerberosUtil.kerberosAuth(configuration, krb5FilePath, kerUser, keytabFilePath);
FileSystem fileSystem = FileSystem.get(configuration);
if (!fileSystem.exists(hdfsPath)) {
fileSystem.mkdirs(hdfsPath);
}
fileSystem.copyFromLocalFile(srcPath, dstPath);
// 3. impala 使用 Load命令从 hdfs 导入数据
loginUser.doAs((PrivilegedAction<Void>) () -> {
String url = importConfig.getImpalaUrl();
String user = importConfig.getImpalaUser();
String password = importConfig.getImpalaPassword();
try (Connection connection = DriverManager.getConnection(url, user, password);
Statement statement = connection.createStatement();) {
Class.forName("com.cloudera.impala.jdbc41.Driver");
// load data from hdfs
String loadSql = "LOAD DATA INPATH '" + hdfsDstPath + "' INTO TABLE " + tableName;
if (updateMethod.equals(UpdateMethod.OVERRIDE.getCode())) {
loadSql = "LOAD DATA INPATH '" + hdfsDstPath + "'OVERWRITE INTO TABLE " + tableName;
}
statement.execute(loadSql);
// refresh the impala table
String refreshResult = String.format("REFRESH %s", tableName);
statement.execute(refreshResult);
} catch (ClassNotFoundException e) {
log.error("load impala driver class fail :", e);
throw new DataManagementException("导入数据失败");
} catch (SQLException e) {
log.error("can not to load hdfs data into impala :", e);
throw new DataManagementException("导入数据失败");
}
return null;
});
} catch (IOException e) {
log.error("con not get FileSystem :", e);
throw new DataManagementException("上传到数据失败");
}
}
爬坑日志
configuration配置
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS", importConfig.getHdfsUri());
configuration.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
configuration.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
configuration.set(" dfs.namenode.kerberos.principal", "huweihui@EXAMPLE.COM");
configuration.set("dfs.namenode.kerberos.principal.pattern", "*@EXAMPLE.COM");
IMPALA的URL
impala-url: jdbc:impala://xxx:21050/default;AuthMech=1;KrbRealm=EXAMPLE.COM;KrbHostFQDN=xxx;KrbServiceName=impala;
IMPALA执行操作需要使用LoginUser
loginUser.doAs((PrivilegedAction<Void>) () -> {
//....todo
});
确认好了你的生产环境Kerberos没问题!!
- 确保你的用户是能认证kerberos并且访问HDFS&IMPALA
- 奉劝生产环境别用IMPALA和HDFS这两个默认用户
- 新增一个能使用双方的用户。举例huweihui,把该用户加到和hdfs和impala同一个supergroup里面。(不懂的找专业安装CDH的人)
- 安装完kerberos要给对应用户生成 keytab文件并且copy到项目
- 上面第3点需要产生一个huweihui@EXAMPLE.COM的账户。root登录Kereros服务器,执行kadmin.local - > listprincs(查看用户列表必须有一个 huweihui@EXAMPLE.COM)
- 网上坑货太多,建议直接看CDH impala-jdbc.dirver包的使用文档。附上链接:https://www.cloudera.com/documentation/other/connectors/impala-jdbc/latest.html
Installation Guide (在线PDF)->CONFIGURING AUTHENTICATION -> USING KERBEROS (11页) - 推荐一个Kerberos常用命令的BLOG :https://www.jianshu.com/p/69e6a2e7c648
- 怎么安装Kerberos和配置账号请自己查询一下,但是上述都是要注意的关键点
Author
作者:HuHui
转载:欢迎一起讨论web和大数据问题,转载请注明作者和原文链接,感谢