DataX_HDFS不同权限用户怎么overwrite覆盖数据(在没有Kerberos协议的情况下)
1.先获取文件夹下面的所有文件
下面展示一些 内联代码片
。
hdfsWriter.jar 包---- HdfsWriter.java 类
//获取这路径下的所有文件
//path : /user/hive/warehouse/
//fileName:文件名称
Path[] existFilePaths = hdfsHelper.hdfsDirListForDelete(path,fileName);
hdfsWriter.jar 包----HdfsHelper.java 类
/**
* 获取以fileName__ 开头的文件列表
* @param dir 文件路径 /user/hive/warehouse/wx_test
* @param fileName wx_test
* @return
*/
public Path[] hdfsDirListForDelete(String dir,String fileName){
Path path = new Path(dir);
Path[] files = null;
//String filterFileName = fileName + "__*";
String filterFileName = fileName + "*";
try {
PathFilter pathFilter = new GlobFilter(filterFileName);
//找到这地址下的所有文件
FileStatus[] status = fileSystem.listStatus(path);
files = new Path[status.length];
for(int i=0;i<status.length;i++){
files[i] = status[i].getPath();
}
} catch (IOException e) {
String message = String.format("获取目录[%s]下文件名以[%s]开头的文件列表时发生网络IO异常,请检查您的网络是否正常!",
dir,fileName);
LOG.error(message);
throw DataXException.asDataXException(HdfsWriterErrorCode.CONNECT_HDFS_IO_ERROR, e);
}
return files;
}
2.删除文件夹下面的所有文件
hdfsWriter.jar 包---- HdfsWriter.java 类
hdfsHelper.deleteFiles(existFilePaths);
hdfsWriter.jar 包----HdfsHelper.java 类
public void deleteFiles(Path[] paths){
for(int i=0;i<paths.length;i++){
LOG.info(String.format("delete file [%s].", paths[i].toString()));
try {
fileSystem.delete(paths[i],true);
} catch (IOException e) {
String message = String.format("删除文件[%s]时发生IO异常,请检查您的网络是否正常!",
paths[i].toString());
LOG.error(message);
throw DataXException.asDataXException(HdfsWriterErrorCode.CONNECT_HDFS_IO_ERROR, e);
}
}
}
3.获取hdfs的用户(自己在Json里定义hdfs_user)
hdfsWriter.jar 包----HdfsHelper.java 类
是否有Kerberos认证,如果启用Hadoop的Kerberos认证那么会自动获取用户,然后根据用户取得file,但是如果没有Kerberos认证,就需要用hdfsuser来获取并且用此用户写下数据
// An highlighted block
public void getFileSystem(String defaultFS, Configuration taskConfig){
hadoopConf = new org.apache.hadoop.conf.Configuration();
Configuration hadoopSiteParams = taskConfig.getConfiguration(Key.HADOOP_CONFIG);
JSONObject hadoopSiteParamsAsJsonObject = JSON.parseObject(taskConfig.getString(Key.HADOOP_CONFIG));
if (null != hadoopSiteParams) {
Set<String> paramKeys = hadoopSiteParams.getKeys();
for (String each : paramKeys) {
hadoopConf.set(each, hadoopSiteParamsAsJsonObject.getString(each));
}
}
hadoopConf.set(HDFS_DEFAULTFS_KEY, defaultFS);
//获取hdfs的用户
String hdsfUser = taskConfig.getString(HDFS_USER);
LOG.error(String.format("hdsfUser为:------------------------------------------- [%s]", hdsfUser));
LOG.error(String.format("HDFS_DEFAULTFS_KEY为:------------------------------------------- [%s]", HDFS_DEFAULTFS_KEY));
hdsfUser = "admin";
//是否有Kerberos认证
this.haveKerberos = taskConfig.getBool(Key.HAVE_KERBEROS, false);
if(haveKerberos){
this.kerberosKeytabFilePath = taskConfig.getString(Key.KERBEROS_KEYTAB_FILE_PATH);
this.kerberosPrincipal = taskConfig.getString(Key.KERBEROS_PRINCIPAL);
hadoopConf.set(HADOOP_SECURITY_AUTHENTICATION_KEY, "kerberos");
}
this.kerberosAuthentication(this.kerberosPrincipal, this.kerberosKeytabFilePath);
conf = new JobConf(hadoopConf);
try {
if(StringUtils.isNotBlank(hdsfUser)&&!haveKerberos){
System.setProperty("HADOOP_USER_NAME",hdsfUser);
fileSystem = FileSystem.get(new URI(hadoopConf.get(HDFS_DEFAULTFS_KEY)),conf,hdsfUser);
}else{
fileSystem = FileSystem.get(conf);
}
} catch (IOException e) {
String message = String.format("获取FileSystem时发生网络IO异常,请检查您的网络是否正常!HDFS地址:[%s]",
"message:defaultFS =" + defaultFS);
LOG.error(message);
throw DataXException.asDataXException(HdfsWriterErrorCode.CONNECT_HDFS_IO_ERROR, e);
}catch (Exception e) {
String message = String.format("获取FileSystem失败,请检查HDFS地址是否正确: [%s]",
"message:defaultFS =" + defaultFS);
LOG.error(message);
throw DataXException.asDataXException(HdfsWriterErrorCode.CONNECT_HDFS_IO_ERROR, e);
}
if(null == fileSystem || null == conf){
String message = String.format("获取FileSystem失败,请检查HDFS地址是否正确: [%s]",
"message:defaultFS =" + defaultFS);
LOG.error(message);
throw DataXException.asDataXException(HdfsWriterErrorCode.CONNECT_HDFS_IO_ERROR, message);
}
}
主要代码模块是判断这里
if(StringUtils.isNotBlank(hdsfUser)&&!haveKerberos){
System.setProperty("HADOOP_USER_NAME",hdsfUser);
//hdfsuser获取file
fileSystem = FileSystem.get(new URI(hadoopConf.get(HDFS_DEFAULTFS_KEY)),conf,hdsfUser);
}else{
fileSystem = FileSystem.get(conf);
}