/**
* @since 2022/11/28 15:56
*/
@Slf4j
@Component
@RefreshScope
public class HdfsFileUtil {
@Value("${dmm.hdfs.uri:hdfs://127.0.0.1:8020}")
private String hdfsUri;
@Value("${dmm.hdfs.user:}")
private String hdfsUser;
@Value("#{${dmm.hdfs.extra-properties:{}}}")
private Map<String, String> extraProperties;
@Value("${dmm.kerberos.tempo_hadoop_kerberos_enabled:false}")
private boolean hdfsKerberos;
@Value("${dmm.kerberos.tempo_hadoop_kerberos_keytab_file_path:${CLOUD_HOME}/file/cloud_mon/DEFAULT_CONFIG/hdfs.keytab}")
public String hdfs_keytab;
@Value("${dmm.kerberos.tempo_kerberos_krb5_file_path:${CLOUD_HOME}/file/cloud_mon/DEFAULT_CONFIG/krb5.conf}")
public String krb5_conf;
@Value("${dmm.kerberos.tempo_hadoop_kerberos_principal:hdfs/nn1@HADOOP.COM}")
public String principal;
private static final int MAX_LOGGING_TIME = 3600000;
private static final Map<String, Long> USER_POOL = new HashMap<>();
private static String lastLoginUser = null;
/**
* 获取FileSystem
* 默认不开启kerberos
*
* @return FileSystem
*/
public FileSystem getFileSystem() {
return getFileSystem(hdfsUri);
}
/**
* 获取FileSystem
* 默认不开启kerberos
*
* @param hdfsUri hdfs://191.168.5.120:8020
* @return FileSystem
*/
public FileSystem getFileSystem(String hdfsUri) {
return getFileSystem(hdfsUri, hdfsKerberos);
}
/**
* 获取FileSystem
*
* @param hdfsUri hdfs://191.168.5.120:8020
* @param kerberosEnable 是否开启kerberos
* @return FileSystem
*/
public FileSystem getFileSystem(String hdfsUri, Boolean kerberosEnable) {
FileSystem fileSystem = null;
try {
if (StrUtil.isNotEmpty(hdfsUser)) {
fileSystem = FileSystem.get(URI.create(hdfsUri), getConfiguration(kerberosEnable), hdfsUser);
} else {
fileSystem = FileSystem.get(URI.create(hdfsUri), getConfiguration(kerberosEnable));
}
} catch (Exception exception) {
log.error("HdfsFileUtils::getFileSystem报错:{}", exception.getMessage(), exception);
}
return fileSystem;
}
public Configuration getConfiguration(Boolean kerberosEnable) {
Configuration configuration = newConfiguration();
if (kerberosEnable) {
configuration.set("hdfs.keytab.file", hdfs_keytab);
configuration.set("dfs.datanode.kerberos.principal", principal);
configuration.set("dfs.namenode.kerberos.principal", principal);
loginInternal(configuration);
}
return configuration;
}
public Configuration newConfiguration() {
Configuration configuration = new Configuration();
configuration.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
configuration.set("dfs.client.use.datanode.hostname", "true");
if (MapUtil.isNotEmpty(extraProperties)) {
for (Map.Entry<String, String> extraPropertiesEntry : extraProperties.entrySet()) {
configuration.set(extraPropertiesEntry.getKey(), extraPropertiesEntry.getValue());
}
}
return configuration;
}
public synchronized void loginInternal(Configuration conf) {
try {
long now = System.currentTimeMillis();
long lastLogin = -1;
if (USER_POOL.containsKey(principal)) {
lastLogin = USER_POOL.get(principal);
}
if (now - lastLogin > MAX_LOGGING_TIME || !principal.equals(lastLoginUser)) {
System.setProperty("java.security.krb5.conf", krb5_conf);
conf.set("hadoop.security.authentication", "kerberos");
UserGroupInformation.setConfiguration(conf);
UserGroupInformation.loginUserFromKeytab(principal, hdfs_keytab);
USER_POOL.put(principal, now);
lastLoginUser = principal;
}
} catch (Exception exception) {
log.error("HdfsFileUtils::loginInternal报错:{}", exception.getMessage(), exception);
}
}
}
在进行非Kerberos认证的HDFS连接时,配置如下
#hdfs的uri
#dmm.hdfs.uri=hdfs://191.168.5.120:8020
dmm.hdfs.uri=hdfs://191.168.7.179:9000
#是否开启kerberos
dmm.kerberos.tempo_hadoop_kerberos_enabled=false
在对文件流进行操作时,出现了如下报错
Caused by: java.nio.channels.UnresolvedAddressException: null
at sun.nio.ch.Net.checkAddress(Net.java:101)
at sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:622)
at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:192)
at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:531)
at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3450)
at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:777)
at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:694)
at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:355)
at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:648)
at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:874)
at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:926)
at java.io.DataInputStream.read(DataInputStream.java:100)
at org.apache.commons.io.IOUtils.copyLarge(IOUtils.java:1309)
at org.apache.commons.io.IOUtils.copy(IOUtils.java:978)
at org.apache.commons.io.IOUtils.copyLarge(IOUtils.java:1282)
at org.apache.commons.io.IOUtils.copy(IOUtils.java:953)
at org.jodconverter.core.job.SourceDocumentSpecsFromInputStream.getFile(SourceDocumentSpecsFromInputStream.java:76)
at org.jodconverter.local.task.LocalConversionTask.execute(LocalConversionTask.java:108)
at org.jodconverter.local.office.OfficeProcessManagerPoolEntry.doExecute(OfficeProcessManagerPoolEntry.java:187)
at org.jodconverter.core.office.AbstractOfficeManagerPoolEntry.lambda$execute$0(AbstractOfficeManagerPoolEntry.java:83)
at java.util.concurrent.FutureTask.run$$$capture(FutureTask.java:266)
at java.util.concurrent.FutureTask.run(FutureTask.java)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
... 1 common frames omitted
但是此时本地的hosts已经配置,且配置为
191.168.7.179 node7179
此时选择上hdfs服务器进行查看
发现此时在服务器上对自己的IP配置了多个hosts.且当前执行命令时生效的是meritcloud的hosts,所以此时选择给本地补充hosts
191.168.7.179 node7179
191.168.7.179 meritcloud
问题解决