Hudi 集成 Minio
一、基于的组件
- Hadoop-aliyun:3.2.1.jar
- minio:7.0.2.jar
二、Hudi写入OSS代码
1. 引入
云上对象存储的廉价让不少公司将其作为主要的存储方案,而Hudi作为数据湖解决方案,支持对象存储也是必不可少。之前AWS EMR已经内置集成Hudi,也意味着可以在S3上无缝使用Hudi。当然国内用户可能更多使用阿里云OSS作为云上存储方案,那么如果用户想基于OSS构建数据湖,那么Hudi是否支持呢?随着Hudi社区主分支已经合并了支持OSS的PR,现在只需要基于master分支build版本即可,或者等待下一个版本释出便可直接使用,经过简单的配置便可将数据写入OSS。
2. 配置
2.1 pom依赖
需要额外添加的主要pom依赖如下
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aliyun</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>com.aliyun.oss</groupId>
<artifactId>aliyun-sdk-oss</artifactId>
<version>3.8.1</version>
</dependency>
2.2 core-site.xml配置
若需访问OSS,需要修改core-site.xml,关键配置如下
<property>
<name>fs.defaultFS</name>
<value>oss://bucketname/</value>
</property>
<property>
<name>fs.oss.endpoint</name>
<value>oss-endpoint-address</value>
<description>Aliyun OSS endpoint to connect to.</description>
</property>
<property>
<name>fs.oss.accessKeyId</name>
<value>oss_key</value>
<description>Aliyun access key ID</description>
</property>
<property>
<name>fs.oss.accessKeySecret</name>
<value>oss-secret</value>
<description>Aliyun access key secret</description>
</property>
<property>
<name>fs.oss.impl</name>
<value>org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem</value>
</property>
3. 源码
示例源码如下
import org.apache.hudi.QuickstartUtils.*;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import java.io.IOException;
import java.util.List;
import static org.apache.hudi.QuickstartUtils.convertToStringList;
import static org.apache.hudi.QuickstartUtils.getQuickstartWriteConfigs;
import static org.apache.hudi.config.HoodieWriteConfig.TABLE_NAME;
import static org.apache.spark.sql.SaveMode.Overwrite;
public class OssHudiDemo {
public static void main(String[] args) throws IOException {
SparkSession spark = SparkSession.builder().appName("Hoodie Datasource test")
.master("local[2]")
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
.config("spark.io.compression.codec", "snappy")
.config("spark.sql.hive.convertMetastoreParquet", "false")
.getOrCreate();
JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());
String tableName = "hudi_trips_cow";
String basePath = "/tmp/hudi_trips_cow";
DataGenerator dataGen = new DataGenerator();
List<String> inserts = convertToStringList(dataGen.generateInserts(10));
Dataset<Row> df = spark.read().json(jsc.parallelize(inserts, 2));
df.write().format("org.apache.hudi").
options(getQuickstartWriteConfigs()).
option(TABLE_NAME, tableName).
mode(Overwrite).
save(basePath);
Dataset<Row> roViewDF = spark.read().format("org.apache.hudi").load(basePath + "/*/*/*");
roViewDF.registerTempTable("hudi_ro_table");
spark.sql("select * from hudi_ro_table").show(false);
spark.stop();
}
}
即先写入OSS,下图可以看到OSS的Bucket中已经成功写入了数据,然后再通过spark查询写入的结果。
三、数据写入Minio代码
1、pom.xml的依赖
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<!--MINIO-->
<dependency>
<groupId>io.minio</groupId>
<artifactId>minio</artifactId>
<version>8.3.4</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.9.3</version>
</dependency>
2、minio上传数据
public void init(){
minioClient = MinioClient.builder().endpoint(endpoint).credentials(accessKey, secretKey).build();
}
@Test
public void upload(){
File file = new File("/Users/shenyunsese/Desktop/pic3.png");
String objectName="test/pic3.png";
try {
FileInputStream fileInputStream=new FileInputStream(file);
minioClient.putObject(PutObjectArgs.builder().bucket(bucket)
.object(objectName)
.contentType("image/png")
.stream(fileInputStream, fileInputStream.available(), -1).build());
}catch (Exception e){
e.printStackTrace();
}
System.out.println("finished");
}
3、minio下载数据
@Test
public void download(){
String objectName="test/pic3.png";
String fileName="/Users/shenyunsese/Desktop/download2.png";
try {
StatObjectResponse response = minioClient.statObject(
StatObjectArgs.builder().bucket(bucket).object(objectName).build()
);
if (response != null) {
minioClient.downloadObject(DownloadObjectArgs.builder()
.bucket(bucket)
.object(objectName)
.filename(fileName)
.build());
}
}catch (Exception e){
e.printStackTrace();
}
System.out.println("finished");
}
四、源码分析
4.1 Hadoop-aliyun:3.2.1.jar
1、AliyunCredentialsProvider
package org.apache.hadoop.fs.aliyun.oss;
import com.aliyun.oss.common.auth.Credentials;
import com.aliyun.oss.common.auth.CredentialsProvider;
import com.aliyun.oss.common.auth.DefaultCredentials;
import com.aliyun.oss.common.auth.InvalidCredentialsException;
import java.io.IOException;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
## 阿里云权限校验
public class AliyunCredentialsProvider implements CredentialsProvider {
private Credentials credentials = null;
public AliyunCredentialsProvider(Configuration conf) throws IOException {
String accessKeyId;
String accessKeySecret;
try {
## 获取XML文件accessKeyId
accessKeyId = AliyunOSSUtils.getValueWithKey(conf, "fs.oss.accessKeyId");
## 获取XML文件accessKeySecret
accessKeySecret = AliyunOSSUtils.getValueWithKey(conf, "fs.oss.accessKeySecret");
} catch (IOException var7) {
throw new InvalidCredentialsException(var7);
}
String securityToken;
try {
## 获取XML文件securityToken
securityToken = AliyunOSSUtils.getValueWithKey(conf, "fs.oss.securityToken");
} catch (IOException var6) {
securityToken = null;
}
if (!StringUtils.isEmpty(accessKeyId) && !StringUtils.isEmpty(accessKeySecret)) {
if (StringUtils.isNotEmpty(securityToken)) {
## 生成默认权限校验对象
this.credentials = new DefaultCredentials(accessKeyId, accessKeySecret, securityToken);
} else {
this.credentials = new DefaultCredentials(accessKeyId, accessKeySecret);
}
} else {
throw new InvalidCredentialsException("AccessKeyId and AccessKeySecret should not be null or empty.");
}
}
public void setCredentials(Credentials creds) {
if (creds == null) {
throw new InvalidCredentialsException("Credentials should not be null.");
} else {
this.credentials = creds;
}
}
public Credentials getCredentials() {
if (this.credentials == null) {
throw new InvalidCredentialsException("Invalid credentials");
} else {
return this.credentials;
}
}
}
2、AliyunOSSBlockOutputStream
package org.apache.hadoop.fs.aliyun.oss;
import com.aliyun.oss.model.PartETag;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
## 阿里云OSS块输出流
public class AliyunOSSBlockOutputStream extends OutputStream {
private static final Logger LOG = LoggerFactory.getLogger(AliyunOSSBlockOutputStream.class);
private AliyunOSSFileSystemStore store;
private Configuration conf;
private boolean closed;
private String key;
## 文件块
private File blockFile;
private Map<Integer, File> blockFiles = new HashMap();
private long blockSize;
private int blockId = 0;
private long blockWritten = 0L;
private String uploadId = null;
private final List<ListenableFuture<PartETag>> partETagsFutures;
private final ListeningExecutorService executorService;
## 输出流
private OutputStream blockStream;
private final byte[] singleByte = new byte[1];
public AliyunOSSBlockOutputStream(Configuration conf, AliyunOSSFileSystemStore store, String key, Long blockSize, ExecutorService executorService) throws IOException {
this.store = store;
this.conf = conf;
this.key = key;
this.blockSize = blockSize;
this.blockFile = this.newBlockFile();
this.blockStream = new BufferedOutputStream(new FileOutputStream(this.blockFile));
this.partETagsFutures = new ArrayList(2);
this.executorService = MoreExecutors.listeningDecorator(executorService);
}
private File newBlockFile() throws IOException {
## 创建阿里云临时文件写入——>阿里云OSS工具类
return AliyunOSSUtils.createTmpFileForWrite(String.format("oss-block-%04d-", this.blockId), this.blockSize, this.conf);
}
public synchronized void flush() throws IOException {
this.blockStream.flush();
}
public synchronized void close() throws IOException {
if (!this.closed) {
this.blockStream.flush();
this.blockStream.close();
## 如果块文件Map不包含该块文件,将块文件写入Map最后
if (!this.blockFiles.values().contains(this.blockFile)) {
++this.blockId;
this.blockFiles.put(this.blockId, this.blockFile);
}
try {
if (this.blockFiles.size() == 1) {
# 如果只存在一个文件直接上传对象
this.store.uploadObject(this.key, this.blockFile);
} else {
if (this.blockWritten > 0L) {
## 使用ListenableFuture Guava帮我们检测Future是否完成了,如果完成就自动调用回调函数,这样可以减少并发程序的复杂度。
ListenableFuture<PartETag> partETagFuture = this.executorService.submit(() -> {
## 上传对象
PartETag partETag = this.store.uploadPart(this.blockFile, this.key, this.uploadId, this.blockId);
return partETag;
});
## 将已完成的Future加入集合
this.partETagsFutures.add(partETagFuture);
}
## 等待所有部分都上传完
List<PartETag> partETags = this.waitForAllPartUploads();
if (null == partETags) {
throw new IOException("Failed to multipart upload to oss, abort it.");
}
## 完成文件上传
this.store.completeMultipartUpload(this.key, this.uploadId, new ArrayList(partETags));
}
} finally {
## 移除临时文件
this.removeTemporaryFiles();
this.closed = true;
}
}
}
## 写入(可复用)
public synchronized void write(int b) throws IOException {
this.singleByte[0] = (byte)b;
this.write(this.singleByte, 0, 1);
}
## 写入(可复用)
public synchronized void write(byte[] b, int off, int len) throws IOException {
if (this.closed) {
throw new IOException("Stream closed.");
} else {
this.blockStream.write(b, off, len);
this.blockWritten += (long)len;
if (this.blockWritten >= this.blockSize) {
this.uploadCurrentPart();
this.blockWritten = 0L;
}
}
}
## 移除临时文件(可复用)
private void removeTemporaryFiles() {
Iterator var1 = this.blockFiles.values().iterator();
while(var1.hasNext()) {
File file = (File)var1.next();
if (file != null && file.exists() && !file.delete()) {
LOG.warn("Failed to delete temporary file {}", file);
}
}
}
## 移除块存储部分文件(可复用)
private void removePartFiles() throws IOException {
Iterator var1 = this.partETagsFutures.iterator();
while(var1.hasNext()) {
ListenableFuture<PartETag> partETagFuture = (ListenableFuture)var1.next();
if (partETagFuture.isDone()) {
try {
File blockFile = (File)this.blockFiles.get(((PartETag)partETagFuture.get()).getPartNumber());
if (blockFile != null && blockFile.exists() && !blockFile.delete()) {
LOG.warn("Failed to delete temporary file {}", blockFile);
}
} catch (ExecutionException | InterruptedException var4) {
throw new IOException(var4);
}
}
}
}
## 上传当前部分块文件(可复用)
private void uploadCurrentPart() throws IOException {
this.blockStream.flush();
this.blockStream.close();
if (this.blockId == 0) {
this.uploadId = this.store.getUploadId(this.key);
}
++this.blockId;
this.blockFiles.put(this.blockId, this.blockFile);
File currentFile = this.blockFile;
int currentBlockId = this.blockId;
ListenableFuture<PartETag> partETagFuture = this.executorService.submit(() -> {
PartETag partETag = this.store.uploadPart(currentFile, this.key, this.uploadId, currentBlockId);
return partETag;
});
this.partETagsFutures.add(partETagFuture);
this.removePartFiles();
this.blockFile = this.newBlockFile();
this.blockStream = new BufferedOutputStream(new FileOutputStream(this.blockFile));
}
## 等待所有块文件上传完毕(可复用)
private List<PartETag> waitForAllPartUploads() throws IOException {
LOG.debug("Waiting for {} uploads to complete", this.partETagsFutures.size());
try {
return (List)Futures.allAsList(this.partETagsFutures).get();
} catch (InterruptedException var4) {
LOG.warn("Interrupted partUpload", var4);
Thread.currentThread().interrupt();
return null;
} catch (ExecutionException var5) {
LOG.debug("While waiting for upload completion", var5);
LOG.debug("Cancelling futures");
Iterator var2 = this.partETagsFutures.iterator();
while(var2.hasNext()) {
ListenableFuture<PartETag> future = (ListenableFuture)var2.next();
future.cancel(true);
}
this.store.abortMultipartUpload(this.key, this.uploadId);
throw new IOException("Multi-part upload with id '" + this.uploadId + "' to " + this.key, var5);
}
}
}
3、AliyunOSSCopyFileContext
package org.apache.hadoop.fs.aliyun.oss;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
## 阿里云复制文件内容
public class AliyunOSSCopyFileContext {
private final ReentrantLock lock = new ReentrantLock();
private Condition readyCondition;
private boolean copyFailure;
private int copiesFinish;
public AliyunOSSCopyFileContext() {
this.readyCondition = this.lock.newCondition();
this.copyFailure = false;
this.copiesFinish = 0;
}
public void lock() {
this.lock.lock();
}
public void unlock() {
this.lock.unlock();
}
public void awaitAllFinish(int copiesToFinish) throws InterruptedException {
while(this.copiesFinish != copiesToFinish) {
this.readyCondition.await();
}
}
public void signalAll() {
this.readyCondition.signalAll();
}
public boolean isCopyFailure() {
return this.copyFailure;
}
public void setCopyFailure(boolean copyFailure) {
this.copyFailure = copyFailure;
}
public void incCopiesFinish() {
++this.copiesFinish;
}
}
4、AliyunOSSCopyFileTask
package org.apache.hadoop.fs.aliyun.oss;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
## 阿里云复制文件任务
public class AliyunOSSCopyFileTask implements Runnable {
public static final Logger LOG = LoggerFactory.getLogger(AliyunOSSCopyFileTask.class);
private AliyunOSSFileSystemStore store;
private String srcKey;
private long srcLen;
private String dstKey;
private AliyunOSSCopyFileContext copyFileContext;
public AliyunOSSCopyFileTask(AliyunOSSFileSystemStore store, String srcKey, long srcLen, String dstKey, AliyunOSSCopyFileContext copyFileContext) {
this.store = store;
this.srcKey = srcKey;
this.srcLen = srcLen;
this.dstKey = dstKey;
this.copyFileContext = copyFileContext;
}
public void run() {
boolean fail = false;
try {
fail = !this.store.copyFile(this.srcKey, this.srcLen, this.dstKey);
} catch (Exception var6) {
LOG.warn("Exception thrown when copy from " + this.srcKey + " to " + this.dstKey + ", exception: " + var6);
fail = true;
} finally {
this.copyFileContext.lock();
if (fail) {
this.copyFileContext.setCopyFailure(fail);
}
this.copyFileContext.incCopiesFinish();
this.copyFileContext.signalAll();
this.copyFileContext.unlock();
}
}
}
5、AliyunOSSFileReaderTask
package org.apache.hadoop.fs.aliyun.oss;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.fs.aliyun.oss.ReadBuffer.STATUS;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryPolicy.RetryAction.RetryDecision;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
## 阿里云文件读取任务
public class AliyunOSSFileReaderTask implements Runnable {
public static final Logger LOG = LoggerFactory.getLogger(AliyunOSSFileReaderTask.class);
private String key;
private AliyunOSSFileSystemStore store;
private ReadBuffer readBuffer;
private static final int MAX_RETRIES = 3;
private RetryPolicy retryPolicy;
public AliyunOSSFileReaderTask(String key, AliyunOSSFileSystemStore store, ReadBuffer readBuffer) {
this.key = key;
this.store = store;
this.readBuffer = readBuffer;
RetryPolicy defaultPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(3, 3L, TimeUnit.SECONDS);
Map<Class<? extends Exception>, RetryPolicy> policies = new HashMap();
policies.put(IOException.class, defaultPolicy);
policies.put(IndexOutOfBoundsException.class, RetryPolicies.TRY_ONCE_THEN_FAIL);
policies.put(NullPointerException.class, RetryPolicies.TRY_ONCE_THEN_FAIL);
this.retryPolicy = RetryPolicies.retryByException(defaultPolicy, policies);
}
public void run() {
int retries = 0;
this.readBuffer.lock();
try {
while(true) {
try {
InputStream in = this.store.retrieve(this.key, this.readBuffer.getByteStart(), this.readBuffer.getByteEnd());
Throwable var29 = null;
try {
IOUtils.readFully(in, this.readBuffer.getBuffer(), 0, this.readBuffer.getBuffer().length);
this.readBuffer.setStatus(STATUS.SUCCESS);
break;
} catch (Throwable var22) {
var29 = var22;
throw var22;
} finally {
if (in != null) {
if (var29 != null) {
try {
in.close();
} catch (Throwable var23) {
var29.addSuppressed(var23);
}
} else {
in.close();
}
}
}
} catch (Exception var26) {
Exception e = var26;
LOG.warn("Exception thrown when retrieve key: " + this.key + ", exception: " + var26);
try {
RetryPolicy.RetryAction rc = this.retryPolicy.shouldRetry(e, retries++, 0, true);
if (rc.action != RetryDecision.RETRY) {
break;
}
Thread.sleep(rc.delayMillis);
} catch (Exception var25) {
LOG.warn("Exception thrown when call shouldRetry, exception " + var25);
break;
}
}
}
if (this.readBuffer.getStatus() != STATUS.SUCCESS) {
this.readBuffer.setStatus(STATUS.ERROR);
}
this.readBuffer.signalAll();
} finally {
this.readBuffer.unlock();
}
}
}
6、AliyunOSSFileSystem
package org.apache.hadoop.fs.aliyun.oss;
import com.aliyun.oss.model.OSSObjectSummary;
import com.aliyun.oss.model.ObjectListing;
import com.aliyun.oss.model.ObjectMetadata;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.BlockingThreadPoolExecutorService;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.SemaphoredDelegatingExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class AliyunOSSFileSystem extends FileSystem {
private static final Logger LOG = LoggerFactory.getLogger(AliyunOSSFileSystem.class);
private URI uri;
private String bucket;
private String username;
private Path workingDir;
private int blockOutputActiveBlocks;
private AliyunOSSFileSystemStore store;
private int maxKeys;
private int maxReadAheadPartNumber;
private int maxConcurrentCopyTasksPerDir;
private ListeningExecutorService boundedThreadPool;
private ListeningExecutorService boundedCopyThreadPool;
private static final PathFilter DEFAULT_FILTER = new PathFilter() {
public boolean accept(Path file) {
return true;
}
};
public AliyunOSSFileSystem() {
}
public FSDataOutputStream append(Path path, int bufferSize, Progressable progress) throws IOException {
throw new IOException("Append is not supported!");
}
public void close() throws IOException {
try {
this.store.close();
this.boundedThreadPool.shutdown();
this.boundedCopyThreadPool.shutdown();
} finally {
super.close();
}
}
public FSDataOutputStream create(Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
String key = this.pathToKey(path);
FileStatus status = null;
try {
status = this.getFileStatus(path);
if (status.isDirectory()) {
throw new FileAlreadyExistsException(path + " is a directory");
}
if (!overwrite) {
throw new FileAlreadyExistsException(path + " already exists");
}
LOG.debug("Overwriting file {}", path);
} catch (FileNotFoundException var13) {
}
long uploadPartSize = AliyunOSSUtils.getMultipartSizeProperty(this.getConf(), "fs.oss.multipart.upload.size", 104857600L);
return new FSDataOutputStream(new AliyunOSSBlockOutputStream(this.getConf(), this.store, key, uploadPartSize, new SemaphoredDelegatingExecutor(this.boundedThreadPool, this.blockOutputActiveBlocks, true)), this.statistics);
}
public FSDataOutputStream createNonRecursive(Path path, FsPermission permission, EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
Path parent = path.getParent();
if (parent != null && !this.getFileStatus(parent).isDirectory()) {
throw new FileAlreadyExistsException("Not a directory: " + parent);
} else {
return this.create(path, permission, flags.contains(CreateFlag.OVERWRITE), bufferSize, replication, blockSize, progress);
}
}
public boolean delete(Path path, boolean recursive) throws IOException {
try {
return this.innerDelete(this.getFileStatus(path), recursive);
} catch (FileNotFoundException var4) {
LOG.debug("Couldn't delete {} - does not exist", path);
return false;
}
}
private boolean innerDelete(FileStatus status, boolean recursive) throws IOException {
Path f = status.getPath();
String p = f.toUri().getPath();
FileStatus[] statuses;
## 如果根目录拒绝文件删除
if (p.equals("/")) {
statuses = this.listStatus(status.getPath());
boolean isEmptyDir = statuses.length <= 0;
return this.rejectRootDirectoryDelete(isEmptyDir, recursive);
} else {
String key = this.pathToKey(f);
if (status.isDirectory()) {
## 如果非递归文件夹
if (!recursive) {
## 列出文件列表
statuses = this.listStatus(status.getPath());
## 非空不可移除文件夹
if (statuses.length > 0) {
throw new IOException("Cannot remove directory " + f + ": It is not empty!");
}
## 对文件路径进行格式化
key = AliyunOSSUtils.maybeAddTrailingSlash(key);
this.store.deleteObject(key);
} else {
this.store.deleteDirs(key);
}
} else {
this.store.deleteObject(key);
}
this.createFakeDirectoryIfNecessary(f);
return true;
}
}
private boolean rejectRootDirectoryDelete(boolean isEmptyDir, boolean recursive) throws IOException {
LOG.info("oss delete the {} root directory of {}", this.bucket, recursive);
if (isEmptyDir) {
return true;
} else if (recursive) {
return false;
} else {
throw new PathIOException(this.bucket, "Cannot delete root path");
}
}
## 创建假文件夹
private void createFakeDirectoryIfNecessary(Path f) throws IOException {
String key = this.pathToKey(f);
if (StringUtils.isNotEmpty(key) && !this.exists(f)) {
LOG.debug("Creating new fake directory at {}", f);
this.mkdir(this.pathToKey(f.getParent()));
}
}
## 获取文件状态
public FileStatus getFileStatus(Path path) throws IOException {
## 文件路径格式校验
Path qualifiedPath = path.makeQualified(this.uri, this.workingDir);
String key = this.pathToKey(qualifiedPath);
if (key.length() == 0) {
return new OSSFileStatus(0L, true, 1, 0L, 0L, qualifiedPath, this.username);
} else {
## 获取文件元对象
ObjectMetadata meta = this.store.getObjectMetadata(key);
if (meta == null && !key.endsWith("/")) {
key = key + "/";
meta = this.store.getObjectMetadata(key);
}
## 判断元数据是否为空
if (meta == null) {
ObjectListing listing = this.store.listObjects(key, 1, (String)null, false);
if (!CollectionUtils.isNotEmpty(listing.getObjectSummaries()) && !CollectionUtils.isNotEmpty(listing.getCommonPrefixes())) {
throw new FileNotFoundException(path + ": No such file or directory!");
} else {
return new OSSFileStatus(0L, true, 1, 0L, 0L, qualifiedPath, this.username);
}
} else {
return AliyunOSSUtils.objectRepresentsDirectory(key, meta.getContentLength()) ? new OSSFileStatus(0L, true, 1, 0L, meta.getLastModified().getTime(), qualifiedPath, this.username) : new OSSFileStatus(meta.getContentLength(), false, 1, this.getDefaultBlockSize(path), meta.getLastModified().getTime(), qualifiedPath, this.username);
}
}
}
public String getScheme() {
return "oss";
}
public URI getUri() {
return this.uri;
}
public int getDefaultPort() {
return -1;
}
public Path getWorkingDirectory() {
return this.workingDir;
}
/** @deprecated */
@Deprecated
public long getDefaultBlockSize() {
return this.getConf().getLong("fs.oss.block.size", 67108864L);
}
public String getCanonicalServiceName() {
return null;
}
public void initialize(URI name, Configuration conf) throws IOException {
super.initialize(name, conf);
this.bucket = name.getHost();
this.uri = URI.create(name.getScheme() + "://" + name.getAuthority());
this.username = UserGroupInformation.getCurrentUser().getShortUserName();
this.workingDir = (new Path("/user", this.username)).makeQualified(this.uri, (Path)null);
long keepAliveTime = AliyunOSSUtils.longOption(conf, "fs.oss.threads.keepalivetime", 60L, 0L);
this.blockOutputActiveBlocks = AliyunOSSUtils.intOption(conf, "fs.oss.upload.active.blocks", 4, 1);
this.store = new AliyunOSSFileSystemStore();
this.store.initialize(name, conf, this.username, this.statistics);
this.maxKeys = conf.getInt("fs.oss.paging.maximum", 1000);
int threadNum = AliyunOSSUtils.intPositiveOption(conf, "fs.oss.multipart.download.threads", 10);
int totalTasks = AliyunOSSUtils.intPositiveOption(conf, "fs.oss.max.total.tasks", 128);
this.maxReadAheadPartNumber = AliyunOSSUtils.intPositiveOption(conf, "fs.oss.multipart.download.ahead.part.max.number", 4);
this.boundedThreadPool = BlockingThreadPoolExecutorService.newInstance(threadNum, totalTasks, keepAliveTime, TimeUnit.SECONDS, "oss-transfer-shared");
this.maxConcurrentCopyTasksPerDir = AliyunOSSUtils.intPositiveOption(conf, "fs.oss.max.copy.tasks.per.dir", 5);
int maxCopyThreads = AliyunOSSUtils.intPositiveOption(conf, "fs.oss.max.copy.threads", 25);
int maxCopyTasks = AliyunOSSUtils.intPositiveOption(conf, "fs.oss.max.copy.tasks", 10485760);
this.boundedCopyThreadPool = BlockingThreadPoolExecutorService.newInstance(maxCopyThreads, maxCopyTasks, 60L, TimeUnit.SECONDS, "oss-copy-unbounded");
this.setConf(conf);
}
private String pathToKey(Path path) {
if (!path.isAbsolute()) {
path = new Path(this.workingDir, path);
}
return path.toUri().getPath().substring(1);
}
private Path keyToPath(String key) {
return new Path("/" + key);
}
public FileStatus[] listStatus(Path path) throws IOException {
String key = this.pathToKey(path);
if (LOG.isDebugEnabled()) {
LOG.debug("List status for path: " + path);
}
List<FileStatus> result = new ArrayList();
FileStatus fileStatus = this.getFileStatus(path);
if (fileStatus.isDirectory()) {
if (LOG.isDebugEnabled()) {
LOG.debug("listStatus: doing listObjects for directory " + key);
}
ObjectListing objects = this.store.listObjects(key, this.maxKeys, (String)null, false);
while(true) {
Iterator var6 = objects.getObjectSummaries().iterator();
while(var6.hasNext()) {
OSSObjectSummary objectSummary = (OSSObjectSummary)var6.next();
String objKey = objectSummary.getKey();
if (objKey.equals(key + "/")) {
if (LOG.isDebugEnabled()) {
LOG.debug("Ignoring: " + objKey);
}
} else {
Path keyPath = this.keyToPath(objectSummary.getKey()).makeQualified(this.uri, this.workingDir);
if (LOG.isDebugEnabled()) {
LOG.debug("Adding: fi: " + keyPath);
}
result.add(new OSSFileStatus(objectSummary.getSize(), false, 1, this.getDefaultBlockSize(keyPath), objectSummary.getLastModified().getTime(), keyPath, this.username));
}
}
var6 = objects.getCommonPrefixes().iterator();
while(var6.hasNext()) {
String prefix = (String)var6.next();
if (prefix.equals(key + "/")) {
if (LOG.isDebugEnabled()) {
LOG.debug("Ignoring: " + prefix);
}
} else {
Path keyPath = this.keyToPath(prefix).makeQualified(this.uri, this.workingDir);
if (LOG.isDebugEnabled()) {
LOG.debug("Adding: rd: " + keyPath);
}
result.add(this.getFileStatus(keyPath));
}
}
if (!objects.isTruncated()) {
break;
}
if (LOG.isDebugEnabled()) {
LOG.debug("listStatus: list truncated - getting next batch");
}
String nextMarker = objects.getNextMarker();
objects = this.store.listObjects(key, this.maxKeys, nextMarker, false);
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding: rd (not a dir): " + path);
}
result.add(fileStatus);
}
return (FileStatus[])result.toArray(new FileStatus[result.size()]);
}
public RemoteIterator<LocatedFileStatus> listFiles(final Path f, boolean recursive) throws IOException {
Path qualifiedPath = f.makeQualified(this.uri, this.workingDir);
final FileStatus status = this.getFileStatus(qualifiedPath);
PathFilter filter = new PathFilter() {
public boolean accept(Path path) {
return status.isFile() || !path.equals(f);
}
};
## 文件块路径对象接收器
FileStatusAcceptor acceptor = new FileStatusAcceptor.AcceptFilesOnly(qualifiedPath);
return this.innerList(f, status, filter, acceptor, recursive);
}
## 获取列表中的块路径对象
public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f) throws IOException {
return this.listLocatedStatus(f, DEFAULT_FILTER);
}
public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f, PathFilter filter) throws IOException {
## 文件路径校验
Path qualifiedPath = f.makeQualified(this.uri, this.workingDir);
FileStatus status = this.getFileStatus(qualifiedPath);
FileStatusAcceptor acceptor = new FileStatusAcceptor.AcceptAllButSelf(qualifiedPath);
return this.innerList(f, status, filter, acceptor, false);
}
## 获取列表文件中的文件块路径对象
private RemoteIterator<LocatedFileStatus> innerList(Path f, FileStatus status, PathFilter filter, FileStatusAcceptor acceptor, boolean recursive) throws IOException {
Path qualifiedPath = f.makeQualified(this.uri, this.workingDir);
String key = this.pathToKey(qualifiedPath);
if (status.isFile()) {
LOG.debug("{} is a File", qualifiedPath);
BlockLocation[] locations = this.getFileBlockLocations(status, 0L, status.getLen());
return this.store.singleStatusRemoteIterator(filter.accept(f) ? status : null, locations);
} else {
return this.store.createLocatedFileStatusIterator(key, this.maxKeys, this, filter, acceptor, recursive ? null : "/");
}
}
## 可复用
private boolean mkdir(String key) throws IOException {
String dirName = key;
if (StringUtils.isNotEmpty(key)) {
if (!key.endsWith("/")) {
dirName = key + "/";
}
this.store.storeEmptyFile(dirName);
}
return true;
}
## 可复用
public boolean mkdirs(Path path, FsPermission permission) throws IOException {
try {
FileStatus fileStatus = this.getFileStatus(path);
if (fileStatus.isDirectory()) {
return true;
} else {
throw new FileAlreadyExistsException("Path is a file: " + path);
}
} catch (FileNotFoundException var5) {
this.validatePath(path);
String key = this.pathToKey(path);
return this.mkdir(key);
}
}
## 可复用
private void validatePath(Path path) throws IOException {
Path fPart = path.getParent();
while(true) {
try {
FileStatus fileStatus = this.getFileStatus(fPart);
if (!fileStatus.isDirectory()) {
throw new FileAlreadyExistsException(String.format("Can't make directory for path '%s', it is a file.", fPart));
}
break;
} catch (FileNotFoundException var4) {
fPart = fPart.getParent();
if (fPart == null) {
break;
}
}
}
}
## 打开文件对象
public FSDataInputStream open(Path path, int bufferSize) throws IOException {
FileStatus fileStatus = this.getFileStatus(path);
if (fileStatus.isDirectory()) {
throw new FileNotFoundException("Can't open " + path + " because it is a directory");
} else {
return new FSDataInputStream(new AliyunOSSInputStream(this.getConf(), new SemaphoredDelegatingExecutor(this.boundedThreadPool, this.maxReadAheadPartNumber, true), this.maxReadAheadPartNumber, this.store, this.pathToKey(path), fileStatus.getLen(), this.statistics));
}
}
## 重命名文件路径(可复用)
public boolean rename(Path srcPath, Path dstPath) throws IOException {
if (srcPath.isRoot()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Cannot rename the root of a filesystem");
}
return false;
} else {
Path parent;
for(parent = dstPath.getParent(); parent != null && !srcPath.equals(parent); parent = parent.getParent()) {
}
if (parent != null) {
return false;
} else {
FileStatus srcStatus = this.getFileStatus(srcPath);
FileStatus dstStatus;
try {
dstStatus = this.getFileStatus(dstPath);
} catch (FileNotFoundException var9) {
dstStatus = null;
}
if (dstStatus == null) {
dstStatus = this.getFileStatus(dstPath.getParent());
if (!dstStatus.isDirectory()) {
throw new IOException(String.format("Failed to rename %s to %s, %s is a file", srcPath, dstPath, dstPath.getParent()));
}
} else {
if (srcStatus.getPath().equals(dstStatus.getPath())) {
return !srcStatus.isDirectory();
}
if (!dstStatus.isDirectory()) {
throw new FileAlreadyExistsException(String.format("Failed to rename %s to %s, file already exists!", srcPath, dstPath));
}
dstPath = new Path(dstPath, srcPath.getName());
FileStatus[] statuses;
try {
statuses = this.listStatus(dstPath);
} catch (FileNotFoundException var8) {
statuses = null;
}
if (statuses != null && statuses.length > 0) {
throw new FileAlreadyExistsException(String.format("Failed to rename %s to %s, file already exists or not empty!", srcPath, dstPath));
}
}
boolean succeed;
if (srcStatus.isDirectory()) {
succeed = this.copyDirectory(srcPath, dstPath);
} else {
succeed = this.copyFile(srcPath, srcStatus.getLen(), dstPath);
}
return srcPath.equals(dstPath) || succeed && this.delete(srcPath, true);
}
}
}
## 复制文件(可复用)
private boolean copyFile(Path srcPath, long srcLen, Path dstPath) {
String srcKey = this.pathToKey(srcPath);
String dstKey = this.pathToKey(dstPath);
return this.store.copyFile(srcKey, srcLen, dstKey);
}
## 复制文件夹(可复用)
private boolean copyDirectory(Path srcPath, Path dstPath) throws IOException {
## 文件路径校验加上格式符
String srcKey = AliyunOSSUtils.maybeAddTrailingSlash(this.pathToKey(srcPath));
## 文件路径校验加上格式符
String dstKey = AliyunOSSUtils.maybeAddTrailingSlash(this.pathToKey(dstPath));
if (dstKey.startsWith(srcKey)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Cannot rename a directory to a subdirectory of self");
}
return false;
} else {
this.store.storeEmptyFile(dstKey);
AliyunOSSCopyFileContext copyFileContext = new AliyunOSSCopyFileContext();
ExecutorService executorService = MoreExecutors.listeningDecorator(new SemaphoredDelegatingExecutor(this.boundedCopyThreadPool, this.maxConcurrentCopyTasksPerDir, true));
ObjectListing objects = this.store.listObjects(srcKey, this.maxKeys, (String)null, true);
int copiesToFinish = 0;
while(true) {
Iterator var9 = objects.getObjectSummaries().iterator();
while(var9.hasNext()) {
OSSObjectSummary objectSummary = (OSSObjectSummary)var9.next();
String newKey = dstKey.concat(objectSummary.getKey().substring(srcKey.length()));
executorService.execute(new AliyunOSSCopyFileTask(this.store, objectSummary.getKey(), objectSummary.getSize(), newKey, copyFileContext));
++copiesToFinish;
if (copyFileContext.isCopyFailure()) {
break;
}
}
if (!objects.isTruncated()) {
copyFileContext.lock();
try {
copyFileContext.awaitAllFinish(copiesToFinish);
} catch (InterruptedException var15) {
LOG.warn("interrupted when wait copies to finish");
} finally {
copyFileContext.unlock();
}
return !copyFileContext.isCopyFailure();
}
String nextMarker = objects.getNextMarker();
objects = this.store.listObjects(srcKey, this.maxKeys, nextMarker, true);
}
}
}
public void setWorkingDirectory(Path dir) {
this.workingDir = dir;
}
public AliyunOSSFileSystemStore getStore() {
return this.store;
}
}
7、AliyunOSSFileSystemStore
package org.apache.hadoop.fs.aliyun.oss;
import com.aliyun.oss.ClientConfiguration;
import com.aliyun.oss.ClientException;
import com.aliyun.oss.OSSClient;
import com.aliyun.oss.OSSException;
import com.aliyun.oss.common.auth.CredentialsProvider;
import com.aliyun.oss.common.comm.Protocol;
import com.aliyun.oss.model.AbortMultipartUploadRequest;
import com.aliyun.oss.model.CannedAccessControlList;
import com.aliyun.oss.model.CompleteMultipartUploadRequest;
import com.aliyun.oss.model.CompleteMultipartUploadResult;
import com.aliyun.oss.model.CopyObjectResult;
import com.aliyun.oss.model.DeleteObjectsRequest;
import com.aliyun.oss.model.DeleteObjectsResult;
import com.aliyun.oss.model.GenericRequest;
import com.aliyun.oss.model.GetObjectRequest;
import com.aliyun.oss.model.InitiateMultipartUploadRequest;
import com.aliyun.oss.model.InitiateMultipartUploadResult;
import com.aliyun.oss.model.ListObjectsRequest;
import com.aliyun.oss.model.OSSObjectSummary;
import com.aliyun.oss.model.ObjectListing;
import com.aliyun.oss.model.ObjectMetadata;
import com.aliyun.oss.model.PartETag;
import com.aliyun.oss.model.PutObjectResult;
import com.aliyun.oss.model.UploadPartCopyRequest;
import com.aliyun.oss.model.UploadPartCopyResult;
import com.aliyun.oss.model.UploadPartRequest;
import com.aliyun.oss.model.UploadPartResult;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.NoSuchElementException;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.util.VersionInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
## 阿里云OSS文件系统存储
public class AliyunOSSFileSystemStore {
public static final Logger LOG = LoggerFactory.getLogger(AliyunOSSFileSystemStore.class);
private String username;
private FileSystem.Statistics statistics;
private OSSClient ossClient;
private String bucketName;
private long uploadPartSize;
private int maxKeys;
private String serverSideEncryptionAlgorithm;
public AliyunOSSFileSystemStore() {
}
public void initialize(URI uri, Configuration conf, String user, FileSystem.Statistics stat) throws IOException {
this.username = user;
this.statistics = stat;
ClientConfiguration clientConf = new ClientConfiguration();
clientConf.setMaxConnections(conf.getInt("fs.oss.connection.maximum", 32));
boolean secureConnections = conf.getBoolean("fs.oss.connection.secure.enabled", true);
clientConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP);
clientConf.setMaxErrorRetry(conf.getInt("fs.oss.attempts.maximum", 10));
clientConf.setConnectionTimeout(conf.getInt("fs.oss.connection.establish.timeout", 50000));
clientConf.setSocketTimeout(conf.getInt("fs.oss.connection.timeout", 200000));
clientConf.setUserAgent(conf.get("fs.oss.user.agent.prefix", Constants.USER_AGENT_PREFIX_DEFAULT) + ", Hadoop/" + VersionInfo.getVersion());
String proxyHost = conf.getTrimmed("fs.oss.proxy.host", "");
int proxyPort = conf.getInt("fs.oss.proxy.port", -1);
String proxyUsername;
String cannedACLName;
if (StringUtils.isNotEmpty(proxyHost)) {
clientConf.setProxyHost(proxyHost);
if (proxyPort >= 0) {
clientConf.setProxyPort(proxyPort);
} else if (secureConnections) {
LOG.warn("Proxy host set without port. Using HTTPS default 443");
clientConf.setProxyPort(443);
} else {
LOG.warn("Proxy host set without port. Using HTTP default 80");
clientConf.setProxyPort(80);
}
proxyUsername = conf.getTrimmed("fs.oss.proxy.username");
String proxyPassword = conf.getTrimmed("fs.oss.proxy.password");
if (proxyUsername == null != (proxyPassword == null)) {
cannedACLName = "Proxy error: fs.oss.proxy.username or fs.oss.proxy.password set without the other.";
LOG.error(cannedACLName);
throw new IllegalArgumentException(cannedACLName);
}
clientConf.setProxyUsername(proxyUsername);
clientConf.setProxyPassword(proxyPassword);
clientConf.setProxyDomain(conf.getTrimmed("fs.oss.proxy.domain"));
clientConf.setProxyWorkstation(conf.getTrimmed("fs.oss.proxy.workstation"));
} else if (proxyPort >= 0) {
proxyUsername = "Proxy error: fs.oss.proxy.port set without fs.oss.proxy.host";
LOG.error(proxyUsername);
throw new IllegalArgumentException(proxyUsername);
}
proxyUsername = conf.getTrimmed("fs.oss.endpoint", "");
if (StringUtils.isEmpty(proxyUsername)) {
throw new IllegalArgumentException("Aliyun OSS endpoint should not be null or empty. Please set proper endpoint with 'fs.oss.endpoint'.");
} else {
CredentialsProvider provider = AliyunOSSUtils.getCredentialsProvider(uri, conf);
this.ossClient = new OSSClient(proxyUsername, provider, clientConf);
this.uploadPartSize = AliyunOSSUtils.getMultipartSizeProperty(conf, "fs.oss.multipart.upload.size", 104857600L);
this.serverSideEncryptionAlgorithm = conf.get("fs.oss.server-side-encryption-algorithm", "");
this.bucketName = uri.getHost();
cannedACLName = conf.get("fs.oss.acl.default", "");
if (StringUtils.isNotEmpty(cannedACLName)) {
CannedAccessControlList cannedACL = CannedAccessControlList.valueOf(cannedACLName);
this.ossClient.setBucketAcl(this.bucketName, cannedACL);
this.statistics.incrementWriteOps(1);
}
this.maxKeys = conf.getInt("fs.oss.paging.maximum", 1000);
}
}
public void deleteObject(String key) {
this.ossClient.deleteObject(this.bucketName, key);
this.statistics.incrementWriteOps(1);
}
public void deleteObjects(List<String> keysToDelete) throws IOException {
if (CollectionUtils.isEmpty(keysToDelete)) {
LOG.warn("Keys to delete is empty.");
} else {
int retry = 10;
int tries = 0;
List<String> deleteFailed = keysToDelete;
while(CollectionUtils.isNotEmpty(deleteFailed)) {
DeleteObjectsRequest deleteRequest = new DeleteObjectsRequest(this.bucketName);
deleteRequest.setKeys(deleteFailed);
deleteRequest.setQuiet(true);
DeleteObjectsResult result = this.ossClient.deleteObjects(deleteRequest);
this.statistics.incrementWriteOps(1);
deleteFailed = result.getDeletedObjects();
++tries;
if (tries == retry) {
break;
}
}
if (tries == retry && CollectionUtils.isNotEmpty(deleteFailed)) {
throw new IOException("Failed to delete Aliyun OSS objects for " + tries + " times.");
}
}
}
public void deleteDirs(String key) throws IOException {
key = AliyunOSSUtils.maybeAddTrailingSlash(key);
ListObjectsRequest listRequest = new ListObjectsRequest(this.bucketName);
listRequest.setPrefix(key);
listRequest.setDelimiter((String)null);
listRequest.setMaxKeys(this.maxKeys);
while(true) {
ObjectListing objects = this.ossClient.listObjects(listRequest);
this.statistics.incrementReadOps(1);
List<String> keysToDelete = new ArrayList();
Iterator var5 = objects.getObjectSummaries().iterator();
while(var5.hasNext()) {
OSSObjectSummary objectSummary = (OSSObjectSummary)var5.next();
keysToDelete.add(objectSummary.getKey());
}
this.deleteObjects(keysToDelete);
if (!objects.isTruncated()) {
return;
}
listRequest.setMarker(objects.getNextMarker());
}
}
public ObjectMetadata getObjectMetadata(String key) {
try {
GenericRequest request = new GenericRequest(this.bucketName, key);
request.setLogEnabled(false);
ObjectMetadata objectMeta = this.ossClient.getObjectMetadata(request);
this.statistics.incrementReadOps(1);
return objectMeta;
} catch (OSSException var4) {
LOG.debug("Exception thrown when get object meta: " + key + ", exception: " + var4);
return null;
}
}
public void storeEmptyFile(String key) throws IOException {
ObjectMetadata dirMeta = new ObjectMetadata();
byte[] buffer = new byte[0];
ByteArrayInputStream in = new ByteArrayInputStream(buffer);
dirMeta.setContentLength(0L);
try {
this.ossClient.putObject(this.bucketName, key, in, dirMeta);
this.statistics.incrementWriteOps(1);
} finally {
in.close();
}
}
public boolean copyFile(String srcKey, long srcLen, String dstKey) {
try {
return this.singleCopy(srcKey, dstKey);
} catch (Exception var6) {
LOG.debug("Exception thrown when copy file: " + srcKey + ", exception: " + var6 + ", use multipartCopy instead");
return this.multipartCopy(srcKey, srcLen, dstKey);
}
}
private boolean singleCopy(String srcKey, String dstKey) {
CopyObjectResult copyResult = this.ossClient.copyObject(this.bucketName, srcKey, this.bucketName, dstKey);
this.statistics.incrementWriteOps(1);
LOG.debug(copyResult.getETag());
return true;
}
private boolean multipartCopy(String srcKey, long contentLength, String dstKey) {
long realPartSize = AliyunOSSUtils.calculatePartSize(contentLength, this.uploadPartSize);
int partNum = (int)(contentLength / realPartSize);
if (contentLength % realPartSize != 0L) {
++partNum;
}
InitiateMultipartUploadRequest initiateMultipartUploadRequest = new InitiateMultipartUploadRequest(this.bucketName, dstKey);
ObjectMetadata meta = new ObjectMetadata();
if (StringUtils.isNotEmpty(this.serverSideEncryptionAlgorithm)) {
meta.setServerSideEncryption(this.serverSideEncryptionAlgorithm);
}
initiateMultipartUploadRequest.setObjectMetadata(meta);
InitiateMultipartUploadResult initiateMultipartUploadResult = this.ossClient.initiateMultipartUpload(initiateMultipartUploadRequest);
String uploadId = initiateMultipartUploadResult.getUploadId();
List<PartETag> partETags = new ArrayList();
try {
for(int i = 0; i < partNum; ++i) {
long skipBytes = realPartSize * (long)i;
long size = realPartSize < contentLength - skipBytes ? realPartSize : contentLength - skipBytes;
UploadPartCopyRequest partCopyRequest = new UploadPartCopyRequest();
partCopyRequest.setSourceBucketName(this.bucketName);
partCopyRequest.setSourceKey(srcKey);
partCopyRequest.setBucketName(this.bucketName);
partCopyRequest.setKey(dstKey);
partCopyRequest.setUploadId(uploadId);
partCopyRequest.setPartSize(size);
partCopyRequest.setBeginIndex(skipBytes);
partCopyRequest.setPartNumber(i + 1);
UploadPartCopyResult partCopyResult = this.ossClient.uploadPartCopy(partCopyRequest);
this.statistics.incrementWriteOps(1);
this.statistics.incrementBytesWritten(size);
partETags.add(partCopyResult.getPartETag());
}
CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest(this.bucketName, dstKey, uploadId, partETags);
CompleteMultipartUploadResult completeMultipartUploadResult = this.ossClient.completeMultipartUpload(completeMultipartUploadRequest);
LOG.debug(completeMultipartUploadResult.getETag());
return true;
} catch (ClientException | OSSException var20) {
AbortMultipartUploadRequest abortMultipartUploadRequest = new AbortMultipartUploadRequest(this.bucketName, dstKey, uploadId);
this.ossClient.abortMultipartUpload(abortMultipartUploadRequest);
return false;
}
}
public void uploadObject(String key, File file) throws IOException {
File object = file.getAbsoluteFile();
FileInputStream fis = new FileInputStream(object);
ObjectMetadata meta = new ObjectMetadata();
meta.setContentLength(object.length());
if (StringUtils.isNotEmpty(this.serverSideEncryptionAlgorithm)) {
meta.setServerSideEncryption(this.serverSideEncryptionAlgorithm);
}
try {
PutObjectResult result = this.ossClient.putObject(this.bucketName, key, fis, meta);
LOG.debug(result.getETag());
this.statistics.incrementWriteOps(1);
} finally {
fis.close();
}
}
public ObjectListing listObjects(String prefix, int maxListingLength, String marker, boolean recursive) {
String delimiter = recursive ? null : "/";
prefix = AliyunOSSUtils.maybeAddTrailingSlash(prefix);
ListObjectsRequest listRequest = new ListObjectsRequest(this.bucketName);
listRequest.setPrefix(prefix);
listRequest.setDelimiter(delimiter);
listRequest.setMaxKeys(maxListingLength);
listRequest.setMarker(marker);
ObjectListing listing = this.ossClient.listObjects(listRequest);
this.statistics.incrementReadOps(1);
return listing;
}
public InputStream retrieve(String key, long byteStart, long byteEnd) {
try {
GetObjectRequest request = new GetObjectRequest(this.bucketName, key);
request.setRange(byteStart, byteEnd);
InputStream in = this.ossClient.getObject(request).getObjectContent();
this.statistics.incrementReadOps(1);
return in;
} catch (ClientException | OSSException var8) {
LOG.error("Exception thrown when store retrieves key: " + key + ", exception: " + var8);
return null;
}
}
public void close() {
if (this.ossClient != null) {
this.ossClient.shutdown();
this.ossClient = null;
}
}
public void purge(String prefix) throws IOException {
try {
ObjectListing objects = this.listObjects(prefix, this.maxKeys, (String)null, true);
Iterator var4 = objects.getObjectSummaries().iterator();
while(var4.hasNext()) {
OSSObjectSummary object = (OSSObjectSummary)var4.next();
String key = object.getKey();
this.ossClient.deleteObject(this.bucketName, key);
this.statistics.incrementWriteOps(1);
}
var4 = objects.getCommonPrefixes().iterator();
while(var4.hasNext()) {
String dir = (String)var4.next();
this.deleteDirs(dir);
}
} catch (ClientException | OSSException var6) {
LOG.error("Failed to purge " + prefix);
}
}
public RemoteIterator<LocatedFileStatus> singleStatusRemoteIterator(final FileStatus fileStatus, final BlockLocation[] locations) {
return new RemoteIterator<LocatedFileStatus>() {
private boolean hasNext = true;
public boolean hasNext() throws IOException {
return fileStatus != null && this.hasNext;
}
public LocatedFileStatus next() throws IOException {
if (this.hasNext()) {
LocatedFileStatus s = new LocatedFileStatus(fileStatus, fileStatus.isFile() ? locations : null);
this.hasNext = false;
return s;
} else {
throw new NoSuchElementException();
}
}
};
}
public RemoteIterator<LocatedFileStatus> createLocatedFileStatusIterator(final String prefix, final int maxListingLength, final FileSystem fs, final PathFilter filter, final FileStatusAcceptor acceptor, final String delimiter) {
return new RemoteIterator<LocatedFileStatus>() {
private String nextMarker = null;
private boolean firstListing = true;
private boolean meetEnd = false;
private ListIterator<FileStatus> batchIterator;
public boolean hasNext() throws IOException {
if (this.firstListing) {
this.requestNextBatch();
this.firstListing = false;
}
return this.batchIterator.hasNext() || this.requestNextBatch();
}
public LocatedFileStatus next() throws IOException {
if (this.hasNext()) {
FileStatus status = (FileStatus)this.batchIterator.next();
BlockLocation[] locations = fs.getFileBlockLocations(status, 0L, status.getLen());
return new LocatedFileStatus(status, status.isFile() ? locations : null);
} else {
throw new NoSuchElementException();
}
}
private boolean requestNextBatch() {
if (this.meetEnd) {
return false;
} else {
ListObjectsRequest listRequest = new ListObjectsRequest(AliyunOSSFileSystemStore.this.bucketName);
listRequest.setPrefix(AliyunOSSUtils.maybeAddTrailingSlash(prefix));
listRequest.setMaxKeys(maxListingLength);
listRequest.setMarker(this.nextMarker);
listRequest.setDelimiter(delimiter);
ObjectListing listing = AliyunOSSFileSystemStore.this.ossClient.listObjects(listRequest);
List<FileStatus> stats = new ArrayList(listing.getObjectSummaries().size() + listing.getCommonPrefixes().size());
Iterator var4 = listing.getObjectSummaries().iterator();
while(var4.hasNext()) {
OSSObjectSummary summary = (OSSObjectSummary)var4.next();
String key = summary.getKey();
Path path = fs.makeQualified(new Path("/" + key));
if (filter.accept(path) && acceptor.accept(path, summary)) {
FileStatus statusx = new OSSFileStatus(summary.getSize(), key.endsWith("/"), 1, fs.getDefaultBlockSize(path), summary.getLastModified().getTime(), path, AliyunOSSFileSystemStore.this.username);
stats.add(statusx);
}
}
var4 = listing.getCommonPrefixes().iterator();
while(var4.hasNext()) {
String commonPrefix = (String)var4.next();
Path pathx = fs.makeQualified(new Path("/" + commonPrefix));
if (filter.accept(pathx) && acceptor.accept(pathx, commonPrefix)) {
FileStatus status = new OSSFileStatus(0L, true, 1, 0L, 0L, pathx, AliyunOSSFileSystemStore.this.username);
stats.add(status);
}
}
this.batchIterator = stats.listIterator();
if (listing.isTruncated()) {
this.nextMarker = listing.getNextMarker();
} else {
this.meetEnd = true;
}
AliyunOSSFileSystemStore.this.statistics.incrementReadOps(1);
return this.batchIterator.hasNext();
}
}
};
}
public PartETag uploadPart(File file, String key, String uploadId, int idx) throws IOException {
InputStream instream = null;
Exception caught = null;
for(int tries = 3; tries > 0; --tries) {
try {
instream = new FileInputStream(file);
UploadPartRequest uploadRequest = new UploadPartRequest();
uploadRequest.setBucketName(this.bucketName);
uploadRequest.setKey(key);
uploadRequest.setUploadId(uploadId);
uploadRequest.setInputStream(instream);
uploadRequest.setPartSize(file.length());
uploadRequest.setPartNumber(idx);
UploadPartResult uploadResult = this.ossClient.uploadPart(uploadRequest);
this.statistics.incrementWriteOps(1);
PartETag var10 = uploadResult.getPartETag();
return var10;
} catch (Exception var14) {
LOG.debug("Failed to upload " + file.getPath() + ", try again.", var14);
caught = var14;
} finally {
if (instream != null) {
instream.close();
instream = null;
}
}
}
assert caught != null;
throw new IOException("Failed to upload " + file.getPath() + " for 3 times.", caught);
}
public String getUploadId(String key) {
InitiateMultipartUploadRequest initiateMultipartUploadRequest = new InitiateMultipartUploadRequest(this.bucketName, key);
InitiateMultipartUploadResult initiateMultipartUploadResult = this.ossClient.initiateMultipartUpload(initiateMultipartUploadRequest);
return initiateMultipartUploadResult.getUploadId();
}
public CompleteMultipartUploadResult completeMultipartUpload(String key, String uploadId, List<PartETag> partETags) {
Collections.sort(partETags, new PartNumberAscendComparator());
CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest(this.bucketName, key, uploadId, partETags);
return this.ossClient.completeMultipartUpload(completeMultipartUploadRequest);
}
public void abortMultipartUpload(String key, String uploadId) {
AbortMultipartUploadRequest request = new AbortMultipartUploadRequest(this.bucketName, key, uploadId);
this.ossClient.abortMultipartUpload(request);
}
private static class PartNumberAscendComparator implements Comparator<PartETag>, Serializable {
private PartNumberAscendComparator() {
}
public int compare(PartETag o1, PartETag o2) {
return o1.getPartNumber() > o2.getPartNumber() ? 1 : -1;
}
}
}
8、AliyunOSSInputStream
package org.apache.hadoop.fs.aliyun.oss;
import com.google.common.util.concurrent.MoreExecutors;
import java.io.EOFException;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Queue;
import java.util.concurrent.ExecutorService;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.aliyun.oss.ReadBuffer.STATUS;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class AliyunOSSInputStream extends FSInputStream {
public static final Logger LOG = LoggerFactory.getLogger(AliyunOSSInputStream.class);
private final long downloadPartSize;
private AliyunOSSFileSystemStore store;
private final String key;
private FileSystem.Statistics statistics;
private boolean closed;
private long contentLength;
private long position;
private long partRemaining;
private byte[] buffer;
private int maxReadAheadPartNumber;
private long expectNextPos;
private long lastByteStart;
private ExecutorService readAheadExecutorService;
private Queue<ReadBuffer> readBufferQueue = new ArrayDeque();
public AliyunOSSInputStream(Configuration conf, ExecutorService readAheadExecutorService, int maxReadAheadPartNumber, AliyunOSSFileSystemStore store, String key, Long contentLength, FileSystem.Statistics statistics) throws IOException {
this.readAheadExecutorService = MoreExecutors.listeningDecorator(readAheadExecutorService);
this.store = store;
this.key = key;
this.statistics = statistics;
this.contentLength = contentLength;
this.downloadPartSize = conf.getLong("fs.oss.multipart.download.size", 524288L);
this.maxReadAheadPartNumber = maxReadAheadPartNumber;
this.expectNextPos = 0L;
this.lastByteStart = -1L;
this.reopen(0L);
this.closed = false;
}
## 重新打开输入流
private synchronized void reopen(long pos) throws IOException {
if (pos < 0L) {
throw new EOFException("Cannot seek at negative position:" + pos);
} else if (pos > this.contentLength) {
throw new EOFException("Cannot seek after EOF, contentLength:" + this.contentLength + " position:" + pos);
} else {
long partSize;
if (pos + this.downloadPartSize > this.contentLength) {
partSize = this.contentLength - pos;
} else {
partSize = this.downloadPartSize;
}
if (this.buffer != null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Aborting old stream to open at pos " + pos);
}
this.buffer = null;
}
boolean isRandomIO = true;
if (pos == this.expectNextPos) {
isRandomIO = false;
} else {
while(this.readBufferQueue.size() != 0 && ((ReadBuffer)this.readBufferQueue.element()).getByteStart() != pos) {
this.readBufferQueue.poll();
}
}
this.expectNextPos = pos + partSize;
int currentSize = this.readBufferQueue.size();
if (currentSize == 0) {
this.lastByteStart = pos - partSize;
} else {
ReadBuffer[] readBuffers = (ReadBuffer[])this.readBufferQueue.toArray(new ReadBuffer[currentSize]);
this.lastByteStart = readBuffers[currentSize - 1].getByteStart();
}
int maxLen = this.maxReadAheadPartNumber - currentSize;
for(int i = 0; i < maxLen && i < (currentSize + 1) * 2 && this.lastByteStart + partSize * (long)(i + 1) <= this.contentLength; ++i) {
long byteStart = this.lastByteStart + partSize * (long)(i + 1);
long byteEnd = byteStart + partSize - 1L;
if (byteEnd >= this.contentLength) {
byteEnd = this.contentLength - 1L;
}
ReadBuffer readBuffer = new ReadBuffer(byteStart, byteEnd);
if (readBuffer.getBuffer().length == 0) {
readBuffer.setStatus(STATUS.SUCCESS);
} else {
this.readAheadExecutorService.execute(new AliyunOSSFileReaderTask(this.key, this.store, readBuffer));
}
this.readBufferQueue.add(readBuffer);
if (isRandomIO) {
break;
}
}
ReadBuffer readBuffer = (ReadBuffer)this.readBufferQueue.poll();
readBuffer.lock();
try {
readBuffer.await(STATUS.INIT);
if (readBuffer.getStatus() == STATUS.ERROR) {
this.buffer = null;
} else {
this.buffer = readBuffer.getBuffer();
}
} catch (InterruptedException var17) {
LOG.warn("interrupted when wait a read buffer");
} finally {
readBuffer.unlock();
}
if (this.buffer == null) {
throw new IOException("Null IO stream");
} else {
this.position = pos;
this.partRemaining = partSize;
}
}
}
## 输入流读取
public synchronized int read() throws IOException {
this.checkNotClosed();
if (this.partRemaining <= 0L && this.position < this.contentLength) {
this.reopen(this.position);
}
int byteRead = -1;
if (this.partRemaining != 0L) {
byteRead = this.buffer[this.buffer.length - (int)this.partRemaining] & 255;
}
if (byteRead >= 0) {
++this.position;
--this.partRemaining;
}
if (this.statistics != null && byteRead >= 0) {
this.statistics.incrementBytesRead((long)byteRead);
}
return byteRead;
}
private void checkNotClosed() throws IOException {
if (this.closed) {
throw new IOException("Stream is closed!");
}
}
public synchronized int read(byte[] buf, int off, int len) throws IOException {
this.checkNotClosed();
if (buf == null) {
throw new NullPointerException();
} else if (off >= 0 && len >= 0 && len <= buf.length - off) {
if (len == 0) {
return 0;
} else {
int bytesRead = 0;
while(this.position < this.contentLength && bytesRead < len) {
if (this.partRemaining == 0L) {
this.reopen(this.position);
}
int bytes = 0;
for(int i = this.buffer.length - (int)this.partRemaining; i < this.buffer.length; ++i) {
buf[off + bytesRead] = this.buffer[i];
++bytes;
++bytesRead;
if (off + bytesRead >= len) {
break;
}
}
if (bytes > 0) {
this.position += (long)bytes;
this.partRemaining -= (long)bytes;
} else if (this.partRemaining != 0L) {
throw new IOException("Failed to read from stream. Remaining:" + this.partRemaining);
}
}
if (this.statistics != null && bytesRead > 0) {
this.statistics.incrementBytesRead((long)bytesRead);
}
return bytesRead == 0 && len > 0 ? -1 : bytesRead;
}
} else {
throw new IndexOutOfBoundsException();
}
}
public synchronized void close() throws IOException {
if (!this.closed) {
this.closed = true;
this.buffer = null;
}
}
public synchronized int available() throws IOException {
this.checkNotClosed();
long remaining = this.contentLength - this.position;
return remaining > 2147483647L ? Integer.MAX_VALUE : (int)remaining;
}
public synchronized void seek(long pos) throws IOException {
this.checkNotClosed();
if (this.position != pos) {
if (pos > this.position && pos < this.position + this.partRemaining) {
long len = pos - this.position;
this.position = pos;
this.partRemaining -= len;
} else {
this.reopen(pos);
}
}
}
public synchronized long getPos() throws IOException {
this.checkNotClosed();
return this.position;
}
public boolean seekToNewSource(long targetPos) throws IOException {
this.checkNotClosed();
return false;
}
public long getExpectNextPos() {
return this.expectNextPos;
}
}
9、AliyunOSSUtils
package org.apache.hadoop.fs.aliyun.oss;
import com.aliyun.oss.common.auth.CredentialsProvider;
import com.google.common.base.Preconditions;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.security.ProviderUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public final class AliyunOSSUtils {
private static final Logger LOG = LoggerFactory.getLogger(AliyunOSSUtils.class);
private static LocalDirAllocator directoryAllocator;
private AliyunOSSUtils() {
}
public static int intPositiveOption(Configuration conf, String key, int defVal) {
int v = conf.getInt(key, defVal);
if (v <= 0) {
LOG.warn(key + " is configured to " + v + ", will use default value: " + defVal);
v = defVal;
}
return v;
}
## 通过key获取值
public static String getValueWithKey(Configuration conf, String key) throws IOException {
try {
char[] pass = conf.getPassword(key);
return pass != null ? (new String(pass)).trim() : "";
} catch (IOException var3) {
throw new IOException("Cannot find password option " + key, var3);
}
}
## 计算分区块大小
public static long calculatePartSize(long contentLength, long minPartSize) {
long tmpPartSize = contentLength / 10000L + 1L;
return Math.max(minPartSize, tmpPartSize);
}
## 获取鉴权提供器
public static CredentialsProvider getCredentialsProvider(URI uri, Configuration conf) throws IOException {
String className = conf.getTrimmed("fs.oss.credentials.provider");
Object credentials;
if (StringUtils.isEmpty(className)) {
Configuration newConf = ProviderUtils.excludeIncompatibleCredentialProviders(conf, AliyunOSSFileSystem.class);
credentials = new AliyunCredentialsProvider(newConf);
} else {
try {
LOG.debug("Credential provider class is:" + className);
Class<?> credClass = Class.forName(className);
try {
credentials = (CredentialsProvider)credClass.getDeclaredConstructor(URI.class, Configuration.class).newInstance(uri, conf);
} catch (SecurityException | NoSuchMethodException var6) {
credentials = (CredentialsProvider)credClass.getDeclaredConstructor().newInstance();
}
} catch (ClassNotFoundException var7) {
throw new IOException(className + " not found.", var7);
} catch (SecurityException | NoSuchMethodException var8) {
throw new IOException(String.format("%s constructor exception. A class specified in %s must provide an accessible constructor accepting URI and Configuration, or an accessible default constructor.", className, "fs.oss.credentials.provider"), var8);
} catch (IllegalArgumentException | ReflectiveOperationException var9) {
throw new IOException(className + " instantiation exception.", var9);
}
}
return (CredentialsProvider)credentials;
}
public static String maybeAddTrailingSlash(String key) {
return StringUtils.isNotEmpty(key) && !key.endsWith("/") ? key + '/' : key;
}
public static boolean objectRepresentsDirectory(String name, long size) {
return StringUtils.isNotEmpty(name) && name.endsWith("/") && size == 0L;
}
public static File createTmpFileForWrite(String path, long size, Configuration conf) throws IOException {
if (conf.get("fs.oss.buffer.dir") == null) {
conf.set("fs.oss.buffer.dir", conf.get("hadoop.tmp.dir") + "/oss");
}
if (directoryAllocator == null) {
directoryAllocator = new LocalDirAllocator("fs.oss.buffer.dir");
}
return directoryAllocator.createTmpFileForWrite(path, size, conf);
}
static int intOption(Configuration conf, String key, int defVal, int min) {
int v = conf.getInt(key, defVal);
Preconditions.checkArgument(v >= min, String.format("Value of %s: %d is below the minimum value %d", key, v, min));
LOG.debug("Value of {} is {}", key, v);
return v;
}
static long longOption(Configuration conf, String key, long defVal, long min) {
long v = conf.getLong(key, defVal);
Preconditions.checkArgument(v >= min, String.format("Value of %s: %d is below the minimum value %d", key, v, min));
LOG.debug("Value of {} is {}", key, v);
return v;
}
public static long getMultipartSizeProperty(Configuration conf, String property, long defVal) {
long partSize = conf.getLong(property, defVal);
if (partSize < 102400L) {
LOG.warn("{} must be at least 100 KB; configured value is {}", property, partSize);
partSize = 102400L;
} else if (partSize > 2147483647L) {
LOG.warn("oss: {} capped to ~2.14GB(maximum allowed size with current output mechanism)", "fs.oss.multipart.upload.size");
partSize = 2147483647L;
}
return partSize;
}
}
10、Constants
package org.apache.hadoop.fs.aliyun.oss;
import com.aliyun.oss.common.utils.VersionInfoUtils;
public final class Constants {
public static final String USER_AGENT_PREFIX = "fs.oss.user.agent.prefix";
public static final String USER_AGENT_PREFIX_DEFAULT = VersionInfoUtils.getDefaultUserAgent();
public static final String CREDENTIALS_PROVIDER_KEY = "fs.oss.credentials.provider";
public static final int OSS_DEFAULT_PORT = -1;
public static final String ACCESS_KEY_ID = "fs.oss.accessKeyId";
public static final String ACCESS_KEY_SECRET = "fs.oss.accessKeySecret";
public static final String SECURITY_TOKEN = "fs.oss.securityToken";
public static final String MAXIMUM_CONNECTIONS_KEY = "fs.oss.connection.maximum";
public static final int MAXIMUM_CONNECTIONS_DEFAULT = 32;
public static final String SECURE_CONNECTIONS_KEY = "fs.oss.connection.secure.enabled";
public static final boolean SECURE_CONNECTIONS_DEFAULT = true;
public static final String ENDPOINT_KEY = "fs.oss.endpoint";
public static final String PROXY_HOST_KEY = "fs.oss.proxy.host";
public static final String PROXY_PORT_KEY = "fs.oss.proxy.port";
public static final String PROXY_USERNAME_KEY = "fs.oss.proxy.username";
public static final String PROXY_PASSWORD_KEY = "fs.oss.proxy.password";
public static final String PROXY_DOMAIN_KEY = "fs.oss.proxy.domain";
public static final String PROXY_WORKSTATION_KEY = "fs.oss.proxy.workstation";
public static final String MAX_ERROR_RETRIES_KEY = "fs.oss.attempts.maximum";
public static final int MAX_ERROR_RETRIES_DEFAULT = 10;
public static final String ESTABLISH_TIMEOUT_KEY = "fs.oss.connection.establish.timeout";
public static final int ESTABLISH_TIMEOUT_DEFAULT = 50000;
public static final String SOCKET_TIMEOUT_KEY = "fs.oss.connection.timeout";
public static final int SOCKET_TIMEOUT_DEFAULT = 200000;
public static final String MAX_PAGING_KEYS_KEY = "fs.oss.paging.maximum";
public static final int MAX_PAGING_KEYS_DEFAULT = 1000;
public static final String MULTIPART_UPLOAD_PART_SIZE_KEY = "fs.oss.multipart.upload.size";
public static final long MULTIPART_UPLOAD_PART_SIZE_DEFAULT = 104857600L;
public static final int MULTIPART_MIN_SIZE = 102400;
public static final int MULTIPART_UPLOAD_PART_NUM_LIMIT = 10000;
public static final String MIN_MULTIPART_UPLOAD_THRESHOLD_KEY = "fs.oss.multipart.upload.threshold";
public static final long MIN_MULTIPART_UPLOAD_THRESHOLD_DEFAULT = 20971520L;
public static final String MULTIPART_DOWNLOAD_SIZE_KEY = "fs.oss.multipart.download.size";
public static final long MULTIPART_DOWNLOAD_SIZE_DEFAULT = 524288L;
public static final String MULTIPART_DOWNLOAD_THREAD_NUMBER_KEY = "fs.oss.multipart.download.threads";
public static final int MULTIPART_DOWNLOAD_THREAD_NUMBER_DEFAULT = 10;
public static final String MAX_TOTAL_TASKS_KEY = "fs.oss.max.total.tasks";
public static final int MAX_TOTAL_TASKS_DEFAULT = 128;
public static final String MULTIPART_DOWNLOAD_AHEAD_PART_MAX_NUM_KEY = "fs.oss.multipart.download.ahead.part.max.number";
public static final int MULTIPART_DOWNLOAD_AHEAD_PART_MAX_NUM_DEFAULT = 4;
public static final String MAX_COPY_TASKS_KEY = "fs.oss.max.copy.tasks";
public static final int MAX_COPY_TASKS_DEFAULT = 10485760;
public static final String MAX_COPY_THREADS_NUM_KEY = "fs.oss.max.copy.threads";
public static final int MAX_COPY_THREADS_DEFAULT = 25;
public static final String MAX_CONCURRENT_COPY_TASKS_PER_DIR_KEY = "fs.oss.max.copy.tasks.per.dir";
public static final int MAX_CONCURRENT_COPY_TASKS_PER_DIR_DEFAULT = 5;
public static final String BUFFER_DIR_KEY = "fs.oss.buffer.dir";
public static final String CANNED_ACL_KEY = "fs.oss.acl.default";
public static final String CANNED_ACL_DEFAULT = "";
public static final String SERVER_SIDE_ENCRYPTION_ALGORITHM_KEY = "fs.oss.server-side-encryption-algorithm";
public static final String FS_OSS_BLOCK_SIZE_KEY = "fs.oss.block.size";
public static final int FS_OSS_BLOCK_SIZE_DEFAULT = 67108864;
public static final String FS_OSS = "oss";
public static final String KEEPALIVE_TIME_KEY = "fs.oss.threads.keepalivetime";
public static final int KEEPALIVE_TIME_DEFAULT = 60;
public static final String UPLOAD_ACTIVE_BLOCKS_KEY = "fs.oss.upload.active.blocks";
public static final int UPLOAD_ACTIVE_BLOCKS_DEFAULT = 4;
private Constants() {
}
}
11、FileStatusAcceptor
package org.apache.hadoop.fs.aliyun.oss;
import com.aliyun.oss.model.OSSObjectSummary;
import org.apache.hadoop.fs.Path;
## 文件状态接收器
public interface FileStatusAcceptor {
boolean accept(Path var1, OSSObjectSummary var2);
boolean accept(Path var1, String var2);
public static class AcceptAllButSelf implements FileStatusAcceptor {
private final Path qualifiedPath;
public AcceptAllButSelf(Path qualifiedPath) {
this.qualifiedPath = qualifiedPath;
}
public boolean accept(Path keyPath, OSSObjectSummary summary) {
return !keyPath.equals(this.qualifiedPath);
}
public boolean accept(Path keyPath, String prefix) {
return !keyPath.equals(this.qualifiedPath);
}
}
public static class AcceptFilesOnly implements FileStatusAcceptor {
private final Path qualifiedPath;
public AcceptFilesOnly(Path qualifiedPath) {
this.qualifiedPath = qualifiedPath;
}
public boolean accept(Path keyPath, OSSObjectSummary summary) {
return !keyPath.equals(this.qualifiedPath) && !AliyunOSSUtils.objectRepresentsDirectory(summary.getKey(), summary.getSize());
}
public boolean accept(Path keyPath, String prefix) {
return false;
}
}
}
12、OSS
package org.apache.hadoop.fs.aliyun.oss;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DelegateToFileSystem;
## OSS文件系统父类
public class OSS extends DelegateToFileSystem {
public OSS(URI theUri, Configuration conf) throws IOException, URISyntaxException {
super(theUri, new AliyunOSSFileSystem(), conf, "oss", false);
}
public int getUriDefaultPort() {
return -1;
}
}
13、OSSFileStatus
package org.apache.hadoop.fs.aliyun.oss;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
@Private
@Evolving
## OSS文件状态值
public class OSSFileStatus extends FileStatus {
public OSSFileStatus(long length, boolean isdir, int blockReplication, long blocksize, long modTime, Path path, String user) {
super(length, isdir, blockReplication, blocksize, modTime, path);
this.setOwner(user);
this.setGroup(user);
}
}
14、ReadBuffer
package org.apache.hadoop.fs.aliyun.oss;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
## 读取字节缓存块
public class ReadBuffer {
private final ReentrantLock lock = new ReentrantLock();
private Condition readyCondition;
private byte[] buffer;
private STATUS status;
private long byteStart;
private long byteEnd;
public ReadBuffer(long byteStart, long byteEnd) {
this.readyCondition = this.lock.newCondition();
this.buffer = new byte[(int)(byteEnd - byteStart) + 1];
this.status = ReadBuffer.STATUS.INIT;
this.byteStart = byteStart;
this.byteEnd = byteEnd;
}
public void lock() {
this.lock.lock();
}
public void unlock() {
this.lock.unlock();
}
public void await(STATUS waitStatus) throws InterruptedException {
while(this.status == waitStatus) {
this.readyCondition.await();
}
}
public void signalAll() {
this.readyCondition.signalAll();
}
public byte[] getBuffer() {
return this.buffer;
}
public STATUS getStatus() {
return this.status;
}
public void setStatus(STATUS status) {
this.status = status;
}
public long getByteStart() {
return this.byteStart;
}
public long getByteEnd() {
return this.byteEnd;
}
static enum STATUS {
INIT,
SUCCESS,
ERROR;
private STATUS() {
}
}
}
4.2 minio:7.0.2.jar 中对应的类进行对比改造
1、AliyunOSSUtils——>MinIoUtils
- 改造点
- private static final Logger LOG = LoggerFactory.getLogger(AliyunOSSUtils.class);——>类名的修改
- com.aliyun.oss.common.auth.CredentialsProvider ——>对应的MinIo中的StaticProvider进行修改替换
- AliyunCredentialsProvider——>MinIoCredentialsProvider进行替换和修改
package org.apache.hadoop.fs.aliyun.oss;
import com.aliyun.oss.common.auth.CredentialsProvider;
import com.google.common.base.Preconditions;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.security.ProviderUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public final class AliyunOSSUtils {
private static final Logger LOG = LoggerFactory.getLogger(AliyunOSSUtils.class);
private static LocalDirAllocator directoryAllocator;
private AliyunOSSUtils() {
}
public static int intPositiveOption(Configuration conf, String key, int defVal) {
int v = conf.getInt(key, defVal);
if (v <= 0) {
LOG.warn(key + " is configured to " + v + ", will use default value: " + defVal);
v = defVal;
}
return v;
}
## 通过key获取值
public static String getValueWithKey(Configuration conf, String key) throws IOException {
try {
char[] pass = conf.getPassword(key);
return pass != null ? (new String(pass)).trim() : "";
} catch (IOException var3) {
throw new IOException("Cannot find password option " + key, var3);
}
}
## 计算分区块大小
public static long calculatePartSize(long contentLength, long minPartSize) {
long tmpPartSize = contentLength / 10000L + 1L;
return Math.max(minPartSize, tmpPartSize);
}
## 获取鉴权提供器
public static CredentialsProvider getCredentialsProvider(URI uri, Configuration conf) throws IOException {
String className = conf.getTrimmed("fs.oss.credentials.provider");
Object credentials;
if (StringUtils.isEmpty(className)) {
Configuration newConf = ProviderUtils.excludeIncompatibleCredentialProviders(conf, AliyunOSSFileSystem.class);
credentials = new AliyunCredentialsProvider(newConf);
} else {
try {
LOG.debug("Credential provider class is:" + className);
Class<?> credClass = Class.forName(className);
try {
credentials = (CredentialsProvider)credClass.getDeclaredConstructor(URI.class, Configuration.class).newInstance(uri, conf);
} catch (SecurityException | NoSuchMethodException var6) {
credentials = (CredentialsProvider)credClass.getDeclaredConstructor().newInstance();
}
} catch (ClassNotFoundException var7) {
throw new IOException(className + " not found.", var7);
} catch (SecurityException | NoSuchMethodException var8) {
throw new IOException(String.format("%s constructor exception. A class specified in %s must provide an accessible constructor accepting URI and Configuration, or an accessible default constructor.", className, "fs.oss.credentials.provider"), var8);
} catch (IllegalArgumentException | ReflectiveOperationException var9) {
throw new IOException(className + " instantiation exception.", var9);
}
}
return (CredentialsProvider)credentials;
}
public static String maybeAddTrailingSlash(String key) {
return StringUtils.isNotEmpty(key) && !key.endsWith("/") ? key + '/' : key;
}
public static boolean objectRepresentsDirectory(String name, long size) {
return StringUtils.isNotEmpty(name) && name.endsWith("/") && size == 0L;
}
public static File createTmpFileForWrite(String path, long size, Configuration conf) throws IOException {
if (conf.get("fs.oss.buffer.dir") == null) {
conf.set("fs.oss.buffer.dir", conf.get("hadoop.tmp.dir") + "/oss");
}
if (directoryAllocator == null) {
directoryAllocator = new LocalDirAllocator("fs.oss.buffer.dir");
}
return directoryAllocator.createTmpFileForWrite(path, size, conf);
}
static int intOption(Configuration conf, String key, int defVal, int min) {
int v = conf.getInt(key, defVal);
Preconditions.checkArgument(v >= min, String.format("Value of %s: %d is below the minimum value %d", key, v, min));
LOG.debug("Value of {} is {}", key, v);
return v;
}
static long longOption(Configuration conf, String key, long defVal, long min) {
long v = conf.getLong(key, defVal);
Preconditions.checkArgument(v >= min, String.format("Value of %s: %d is below the minimum value %d", key, v, min));
LOG.debug("Value of {} is {}", key, v);
return v;
}
public static long getMultipartSizeProperty(Configuration conf, String property, long defVal) {
long partSize = conf.getLong(property, defVal);
if (partSize < 102400L) {
LOG.warn("{} must be at least 100 KB; configured value is {}", property, partSize);
partSize = 102400L;
} else if (partSize > 2147483647L) {
LOG.warn("oss: {} capped to ~2.14GB(maximum allowed size with current output mechanism)", "fs.oss.multipart.upload.size");
partSize = 2147483647L;
}
return partSize;
}
}
2、AliyunCrendentialsProvider——>MinIoCredentialsProvider
首先是对于Provider中的第一个方法的改造。
-
构造方法对xml中的文件进行读取,所以基于该原由,先改造所有方法的基类。
public MinIoCredentialsProvider(Configuration conf) throws IOException { String accessKeyId; String accessKeySecret; try { ## 获取XML文件accessKeyId accessKeyId = AliyunOSSUtils.getValueWithKey(conf, "fs.oss.accessKeyId"); ## 获取XML文件accessKeySecret accessKeySecret = AliyunOSSUtils.getValueWithKey(conf, "fs.oss.accessKeySecret"); } catch (IOException var7) { throw new InvalidCredentialsException(var7); } String securityToken; try { ## 获取XML文件securityToken securityToken = AliyunOSSUtils.getValueWithKey(conf, "fs.oss.securityToken"); } catch (IOException var6) { securityToken = null; } if (!StringUtils.isEmpty(accessKeyId) && !StringUtils.isEmpty(accessKeySecret)) { if (StringUtils.isNotEmpty(securityToken)) { ## 生成默认权限校验对象 this.credentials = new DefaultCredentials(accessKeyId, accessKeySecret, securityToken); } else { this.credentials = new DefaultCredentials(accessKeyId, accessKeySecret); } } else { throw new InvalidCredentialsException("AccessKeyId and AccessKeySecret should not be null or empty."); } }
3、AliyunOSSFileSystemStore——>MinIOOSSFileSystemStore
改造点:
-
OOSClient—>MinioClient
- MinIOClient中包含的参数较少,可以不用像Aliyun中传递太多参数
- 改造的具体方法是中将
- deleteObject
- deleteObjects
- deleteDirs
- getObjectMetadata
- storeEmptyFile
- copyFile
- multipartCopy
- uploadObject
- listObjects
- retrieve
- purge
- uploadPart
- getUploadId
- completeMultipartUpload
- abortMultipartUpload
- 方法中的OOSClient转换为MinIOClient。因为MinIOClient中的上传方法大体是使用一个ObjectArgs.Builder的对象构造一个方法和AliyunOOS大体上不一致,所以需要重构每个方法中的文件处理方式。
import com.google.common.collect.Iterators; import io.minio.*; import io.minio.errors.*; import io.minio.messages.Item; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.net.URI; import java.security.InvalidKeyException; import java.security.NoSuchAlgorithmException; import java.util.*; public class MinIOOSSFileSystemStore { public static final Logger LOG = LoggerFactory.getLogger(MinIOOSSFileSystemStore.class); private String username; private FileSystem.Statistics statistics; private MinioClient ossClient; private String bucketName; private long uploadPartSize; private int maxKeys; private String serverSideEncryptionAlgorithm; public MinIOOSSFileSystemStore() { } public void initialize(URI uri, Configuration conf, String user, FileSystem.Statistics stat) throws IOException { this.username = user; this.statistics = stat; boolean secureConnections = conf.getBoolean("fs.oss.connection.secure.enabled", true); String proxyHost = conf.getTrimmed("fs.oss.proxy.host", ""); int proxyPort = conf.getInt("fs.oss.proxy.port", -1); String proxyUsername; String cannedACLName; if (StringUtils.isNotEmpty(proxyHost)) { proxyUsername = conf.getTrimmed("fs.oss.proxy.username"); String proxyPassword = conf.getTrimmed("fs.oss.proxy.password"); if (proxyUsername == null != (proxyPassword == null)) { cannedACLName = "Proxy error: fs.oss.proxy.username or fs.oss.proxy.password set without the other."; LOG.error(cannedACLName); throw new IllegalArgumentException(cannedACLName); } } else if (proxyPort >= 0) { proxyUsername = "Proxy error: fs.oss.proxy.port set without fs.oss.proxy.host"; LOG.error(proxyUsername); throw new IllegalArgumentException(proxyUsername); } proxyUsername = conf.getTrimmed("fs.oss.endpoint", ""); if (StringUtils.isEmpty(proxyUsername)) { throw new IllegalArgumentException("MinIO org.apache.hadoop.fs.minio.oss.OSS endpoint should not be null or empty. Please set proper endpoint with 'fs.oss.endpoint'."); } else { MinioClient.Builder builder = MinioClient.builder(); builder.endpoint(conf.get("fs.oss.endpoint")); builder.credentials(conf.get("fs.oss.accessKeyId"), conf.get("fs.oss.accessKeySecret")); // Init the builder this.ossClient = builder.build(); this.uploadPartSize = MinIOOSSUtils.getMultipartSizeProperty(conf, "fs.oss.multipart.upload.size", 104857600L); this.serverSideEncryptionAlgorithm = conf.get("fs.oss.server-side-encryption-algorithm", ""); this.bucketName = uri.getHost(); this.maxKeys = conf.getInt("fs.oss.paging.maximum", 1000); } } public void deleteObject(String filename) throws IOException { RemoveObjectArgs.Builder builder = RemoveObjectArgs.builder(); builder.bucket(bucketName); builder.object(filename); RemoveObjectArgs removeObjectArgs = builder.build(); try { this.ossClient.removeObject(removeObjectArgs); } catch (Exception e) { throw new IOException("Failed to delete MinIO org.apache.hadoop.fs.minio.oss.OSS object "); } this.statistics.incrementWriteOps(1); } public void deleteObjects(List<String> keysToDelete) throws IOException { if (CollectionUtils.isEmpty(keysToDelete)) { LOG.warn("Keys to delete is empty."); } else { List<String> deleteFailed = keysToDelete; for (String fileName : deleteFailed) { this.deleteObject(fileName); this.statistics.incrementWriteOps(1); } } } public void deleteDirs(String key) throws IOException { key = MinIOOSSUtils.maybeAddTrailingSlash(key); ListObjectsArgs.Builder builder = ListObjectsArgs.builder(); builder.bucket(bucketName); builder.prefix(key); builder.delimiter((String) null); builder.maxKeys(this.maxKeys); ListObjectsArgs buildArgs = builder.build(); while (true) { Iterable<Result<Item>> results = this.ossClient.listObjects(buildArgs); this.statistics.incrementReadOps(1); List<String> keysToDelete = new ArrayList(); Iterator var5 = results.iterator(); while (var5.hasNext()) { Result<Item> objectSummary = (Result<Item>) var5.next(); Item item = null; try { item = objectSummary.get(); } catch (Exception e) { throw new IOException(e.getMessage()); } keysToDelete.add(item.objectName()); } this.deleteObjects(keysToDelete); if(Iterators.size(var5) < maxKeys){ break; } } } public void storeEmptyFile(String fileName) throws IOException { byte[] buffer = new byte[0]; ByteArrayInputStream in = new ByteArrayInputStream(buffer); PutObjectArgs.Builder putObjArgs = PutObjectArgs.builder(); putObjArgs.bucket(this.bucketName); putObjArgs.stream(in,0,0); putObjArgs.object(fileName); PutObjectArgs build = putObjArgs.build(); try { this.ossClient.putObject(build); this.statistics.incrementWriteOps(1); } catch (Exception e){ }finally { in.close(); } } private boolean singleCopy(String srcKey, String dstKey) throws ServerException, InsufficientDataException, ErrorResponseException, IOException, NoSuchAlgorithmException, InvalidKeyException, InvalidResponseException, XmlParserException, InternalException { CopyObjectArgs.Builder copyObjectArgs = CopyObjectArgs.builder(); CopySource copySource = CopySource.builder().bucket(this.bucketName).object(dstKey).build(); CopyObjectArgs copyArg = copyObjectArgs.bucket(this.bucketName).object(srcKey) .source(copySource).build(); ObjectWriteResponse objectWriteResponse = this.ossClient.copyObject(copyArg); this.statistics.incrementWriteOps(1); LOG.debug(objectWriteResponse.etag()); return true; } public boolean copyFile(String srcKey, long srcLen, String dstKey) { try { return this.singleCopy(srcKey, dstKey); } catch (Exception var6) { LOG.debug("Exception thrown when copy file: " + srcKey + ", exception: " + var6 + ", use multipartCopy instead"); return false; } } public void uploadObject(String fileName, File file) throws IOException { File object = file.getAbsoluteFile(); FileInputStream fis = new FileInputStream(object); PutObjectArgs.Builder putArgs = PutObjectArgs.builder(); putArgs.stream(fis,object.length(),-1); putArgs.object(fileName); putArgs.bucket(this.bucketName); PutObjectArgs putArg = putArgs.build(); try { ObjectWriteResponse objectWriteResponse = this.ossClient.putObject(putArg); LOG.debug(objectWriteResponse.etag()); this.statistics.incrementWriteOps(1); }catch (Exception e){ LOG.error(e.getMessage()); } finally{ fis.close(); } } public Iterable<Result<Item>> listObjects(String prefix, int maxListingLength, String marker, boolean recursive) { String delimiter = recursive ? null : "/"; prefix = MinIOOSSUtils.maybeAddTrailingSlash(prefix); ListObjectsArgs.Builder listRequest = ListObjectsArgs.builder(); //桶名 listRequest.bucket(this.bucketName); //前缀 listRequest.prefix(prefix); //分隔符 listRequest.delimiter(delimiter); //最大key集合长度 listRequest.maxKeys(maxListingLength); //key集合标记 listRequest.marker(marker); ListObjectsArgs listObjectsArgs = listRequest.build(); Iterable<Result<Item>> results = this.ossClient.listObjects(listObjectsArgs); this.statistics.incrementReadOps(1); return results; } public InputStream retrieve(String key, long byteStart) { try { GetObjectArgs.Builder request = GetObjectArgs.builder(); request.bucket(this.bucketName); request.object(key); request.offset(byteStart); GetObjectArgs build = request.build(); GetObjectResponse getObject = null; try { getObject = this.ossClient.getObject(build); } catch (Exception e) { } this.statistics.incrementReadOps(1); return getObject; } catch (Exception var8) { LOG.error("Exception thrown when store retrieves key: " + key + ", exception: " + var8); return null; } } public void close() { if (this.ossClient != null) { this.ossClient = null; } } public void purge(String prefix) throws IOException, ServerException, InsufficientDataException, ErrorResponseException, NoSuchAlgorithmException, InvalidKeyException, InvalidResponseException, XmlParserException, InternalException { try { Iterable<Result<Item>> results = this.listObjects(prefix, this.maxKeys, (String) null, true); Iterator var4 = results.iterator(); while (var4.hasNext()) { Result<Item> next = (Result<Item>) var4.next(); String key = next.get().objectName(); this.deleteObject(key); this.statistics.incrementWriteOps(1); } this.deleteDirs(prefix); } catch (Exception var6) { LOG.error("Failed to purge " + prefix); } } public RemoteIterator<LocatedFileStatus> singleStatusRemoteIterator(final FileStatus fileStatus, final BlockLocation[] locations) { return new RemoteIterator<LocatedFileStatus>() { private boolean hasNext = true; public boolean hasNext() throws IOException { return fileStatus != null && this.hasNext; } public LocatedFileStatus next() throws IOException { if (this.hasNext()) { LocatedFileStatus s = new LocatedFileStatus(fileStatus, fileStatus.isFile() ? locations : null); this.hasNext = false; return s; } else { throw new NoSuchElementException(); } } }; } public RemoteIterator<LocatedFileStatus> createLocatedFileStatusIterator(final String prefix, final int maxListingLength, final FileSystem fs, final PathFilter filter, final FileStatusAcceptor acceptor, final String delimiter) { return new RemoteIterator<LocatedFileStatus>() { private String nextMarker = null; private boolean firstListing = true; private boolean meetEnd = false; private ListIterator<FileStatus> batchIterator; public boolean hasNext() throws IOException { if (this.firstListing) { this.requestNextBatch(); this.firstListing = false; } return this.batchIterator.hasNext() || this.requestNextBatch(); } public LocatedFileStatus next() throws IOException { if (this.hasNext()) { FileStatus status = (FileStatus) this.batchIterator.next(); BlockLocation[] locations = fs.getFileBlockLocations(status, 0L, status.getLen()); return new LocatedFileStatus(status, status.isFile() ? locations : null); } else { throw new NoSuchElementException(); } } private boolean requestNextBatch() { if (this.meetEnd) { return false; } else { ListObjectsArgs.Builder listRequest = ListObjectsArgs.builder(); listRequest.bucket(bucketName); listRequest.prefix(MinIOOSSUtils.maybeAddTrailingSlash(prefix)); listRequest.maxKeys(maxListingLength); listRequest.marker(this.nextMarker); listRequest.delimiter(delimiter); ListObjectsArgs build = listRequest.build(); Iterable<Result<Item>> results = MinIOOSSFileSystemStore.this.ossClient.listObjects(build); List<FileStatus> stats = new ArrayList(Iterators.size(results.iterator())); String tempNextMaker = null; while (results.iterator().hasNext()) { Result<Item> summary = results.iterator().next(); Item item = null; try { item = summary.get(); } catch (Exception e) { LOG.error(e.getMessage()); } Path path = fs.makeQualified(new Path("/" + item.objectName())); if (filter.accept(path) && acceptor.accept(path, item.objectName(), item.size())) { FileStatus statusx = new OSSFileStatus(item.size(), item.objectName().endsWith("/"), 1, fs.getDefaultBlockSize(path), item.lastModified().toInstant().toEpochMilli(), path, MinIOOSSFileSystemStore.this.username); stats.add(statusx); } tempNextMaker = item.objectName(); } Path pathx = fs.makeQualified(new Path("/" + prefix)); if (filter.accept(pathx) && acceptor.accept(pathx, prefix)) { FileStatus status = new OSSFileStatus(0L, true, 1, 0L, 0L, pathx, MinIOOSSFileSystemStore.this.username); stats.add(status); } this.nextMarker = tempNextMaker; this.batchIterator = stats.listIterator(); MinIOOSSFileSystemStore.this.statistics.incrementReadOps(1); return this.batchIterator.hasNext(); } } }; } public StatObjectResponse getObjectMetadata(String key) { try { StatObjectResponse statObjectResponse = this.ossClient.statObject( StatObjectArgs.builder() .bucket(bucketName) .object(key) .build()); return statObjectResponse; } catch (Exception e) { LOG.error(e.getMessage()); } return null; } public ObjectWriteResponse uploadObjectPart(String key, File blockFile, long blockSize) throws IOException { File object = blockFile.getAbsoluteFile(); FileInputStream fis = new FileInputStream(object); PutObjectArgs.Builder putArgs = PutObjectArgs.builder(); putArgs.stream(fis,object.length(), blockSize); putArgs.object(key); PutObjectArgs putArg = putArgs.build(); try { ObjectWriteResponse objectWriteResponse = this.ossClient.putObject(putArg); LOG.debug(objectWriteResponse.etag()); this.statistics.incrementWriteOps(1); return objectWriteResponse; }catch (Exception e){ LOG.error(e.getMessage()); } finally{ fis.close(); return null; } } }
4、AliyunOSSFileSystem——>MinIOOSSFileSystem
- 里面最主要的有以下几个方法:
- public FileStatus[] listStatus(Path path) throws IOException
- 获取文件状态数组
- Minio和AliyunOSS有一个显著的区别点,AliyunOSS会将文件夹对象和文件对象分成两个字段来进行存储,而Minio只会放在一个对象内,Minio返回的item集合中包含了文件夹也包含了文件,需要根据Item中的isDir将目录划分出后再进行文件状态值的获取。
- public FSDataInputStream open(Path path, int bufferSize) throws IOException
- 所有获取的对象文件都需要通过open方法来获取对象流。
- public FileStatus getFileStatus(Path path) throws IOException
- AliyunOSS首先获取每个对象元数据,先是考虑为文件获取,再考虑为文件夹获取,两次获取均失败后,考虑元对象丢失情况,再根据listObjects方法来查询该文件夹下是否包含Item,将当前文件作为maker来判断后续是否还有文件,如果是最后一个文件即返回当前文件,如果返回的对象为空则文件不存在。——Minio是基于S3协议,所以多个OSS返回的结果值一样。
- public FileStatus[] listStatus(Path path) throws IOException
package org.apache.hadoop.fs.minio.oss;
import com.google.common.collect.Iterators;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import io.minio.Result;
import io.minio.StatObjectResponse;
import io.minio.messages.Item;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.BlockingThreadPoolExecutorService;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.SemaphoredDelegatingExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
public class MinIOOSSFileSystem extends FileSystem {
private static final Logger LOG = LoggerFactory.getLogger(MinIOOSSFileSystem.class);
private URI uri;
private String bucket;
private String username;
private Path workingDir;
private int blockOutputActiveBlocks;
private MinIOOSSFileSystemStore store;
private int maxKeys;
private int maxReadAheadPartNumber;
private int maxConcurrentCopyTasksPerDir;
private ListeningExecutorService boundedThreadPool;
private ListeningExecutorService boundedCopyThreadPool;
private static final PathFilter DEFAULT_FILTER = new PathFilter() {
public boolean accept(Path file) {
return true;
}
};
public MinIOOSSFileSystem() {
}
public FSDataOutputStream append(Path path, int bufferSize, Progressable progress) throws IOException {
throw new IOException("Append is not supported!");
}
public void close() throws IOException {
try {
this.store.close();
this.boundedThreadPool.shutdown();
this.boundedCopyThreadPool.shutdown();
} finally {
super.close();
}
}
public FSDataOutputStream create(Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
String key = this.pathToKey(path);
FileStatus status = null;
try {
status = this.getFileStatus(path);
if (status.isDirectory()) {
throw new FileAlreadyExistsException(path + " is a directory");
}
if (!overwrite) {
throw new FileAlreadyExistsException(path + " already exists");
}
LOG.debug("Overwriting file {}", path);
} catch (FileNotFoundException var13) {
}
long uploadPartSize = MinIOOSSUtils.getMultipartSizeProperty(this.getConf(), "fs.oss.multipart.upload.size", 104857600L);
return new FSDataOutputStream(new MinIOOSSBlockOutputStream(this.getConf(), this.store, key, uploadPartSize, new SemaphoredDelegatingExecutor(this.boundedThreadPool, this.blockOutputActiveBlocks, true)), this.statistics);
}
public FSDataOutputStream createNonRecursive(Path path, FsPermission permission, EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
Path parent = path.getParent();
if (parent != null && !this.getFileStatus(parent).isDirectory()) {
throw new FileAlreadyExistsException("Not a directory: " + parent);
} else {
return this.create(path, permission, flags.contains(CreateFlag.OVERWRITE), bufferSize, replication, blockSize, progress);
}
}
public boolean delete(Path path, boolean recursive) throws IOException {
try {
return this.innerDelete(this.getFileStatus(path), recursive);
} catch (FileNotFoundException var4) {
LOG.debug("Couldn't delete {} - does not exist", path);
return false;
}
}
private boolean innerDelete(FileStatus status, boolean recursive) throws IOException {
Path f = status.getPath();
String p = f.toUri().getPath();
FileStatus[] statuses;
if (p.equals("/")) {
statuses = this.listStatus(status.getPath());
boolean isEmptyDir = statuses.length <= 0;
return this.rejectRootDirectoryDelete(isEmptyDir, recursive);
} else {
String key = this.pathToKey(f);
if (status.isDirectory()) {
if (!recursive) {
statuses = this.listStatus(status.getPath());
if (statuses.length > 0) {
throw new IOException("Cannot remove directory " + f + ": It is not empty!");
}
key = MinIOOSSUtils.maybeAddTrailingSlash(key);
this.store.deleteObject(key);
} else {
this.store.deleteDirs(key);
}
} else {
this.store.deleteObject(key);
}
this.createFakeDirectoryIfNecessary(f);
return true;
}
}
private boolean rejectRootDirectoryDelete(boolean isEmptyDir, boolean recursive) throws IOException {
LOG.info("oss delete the {} root directory of {}", this.bucket, recursive);
if (isEmptyDir) {
return true;
} else if (recursive) {
return false;
} else {
throw new PathIOException(this.bucket, "Cannot delete root path");
}
}
private void createFakeDirectoryIfNecessary(Path f) throws IOException {
String key = this.pathToKey(f);
if (StringUtils.isNotEmpty(key) && !this.exists(f)) {
LOG.debug("Creating new fake directory at {}", f);
this.mkdir(this.pathToKey(f.getParent()));
}
}
public FileStatus getFileStatus(Path path) throws IOException {
Path qualifiedPath = path.makeQualified(this.uri, this.workingDir);
String key = this.pathToKey(qualifiedPath);
if (key.length() == 0) {
return new OSSFileStatus(0L, true, 1, 0L, 0L, qualifiedPath, this.username);
} else {
StatObjectResponse meta = this.store.getObjectMetadata(key);
if (meta == null && !key.endsWith("/")) {
key = key + "/";
meta = this.store.getObjectMetadata(key);
}
// 阿里考虑到元数据全部丢失的情况,不亏是阿里
if (meta == null) {
Iterable<Result<Item>> results = this.store.listObjects(key, 1, (String) null, false);
if (Iterators.size(results.iterator()) == 0) {
throw new FileNotFoundException(path + ": No such file or directory!");
} else {
return new OSSFileStatus(0L, true, 1, 0L, 0L, qualifiedPath, this.username);
}
} else {
return MinIOOSSUtils.objectRepresentsDirectory(key, 0) ? new OSSFileStatus(0L, true, 1, 0L, 0L, qualifiedPath, this.username) : new OSSFileStatus(meta.size(), false, 1, this.getDefaultBlockSize(path), meta.lastModified().toInstant().toEpochMilli(), qualifiedPath, this.username);
}
}
}
public String getScheme() {
return "oss";
}
public URI getUri() {
return this.uri;
}
public int getDefaultPort() {
return -1;
}
public Path getWorkingDirectory() {
return this.workingDir;
}
/**
* @deprecated
*/
@Deprecated
public long getDefaultBlockSize() {
return this.getConf().getLong("fs.oss.block.size", 67108864L);
}
public String getCanonicalServiceName() {
return null;
}
public void initialize(URI name, Configuration conf) throws IOException {
super.initialize(name, conf);
this.bucket = name.getHost();
this.uri = URI.create(name.getScheme() + "://" + name.getAuthority());
this.username = UserGroupInformation.getCurrentUser().getShortUserName();
this.workingDir = (new Path("/user", this.username)).makeQualified(this.uri, (Path) null);
long keepAliveTime = MinIOOSSUtils.longOption(conf, "fs.oss.threads.keepalivetime", 60L, 0L);
this.blockOutputActiveBlocks = MinIOOSSUtils.intOption(conf, "fs.oss.upload.active.blocks", 4, 1);
this.store = new MinIOOSSFileSystemStore();
this.store.initialize(name, conf, this.username, this.statistics);
this.maxKeys = conf.getInt("fs.oss.paging.maximum", 1000);
int threadNum = MinIOOSSUtils.intPositiveOption(conf, "fs.oss.multipart.download.threads", 10);
int totalTasks = MinIOOSSUtils.intPositiveOption(conf, "fs.oss.max.total.tasks", 128);
this.maxReadAheadPartNumber = MinIOOSSUtils.intPositiveOption(conf, "fs.oss.multipart.download.ahead.part.max.number", 4);
this.boundedThreadPool = BlockingThreadPoolExecutorService.newInstance(threadNum, totalTasks, keepAliveTime, TimeUnit.SECONDS, "oss-transfer-shared");
this.maxConcurrentCopyTasksPerDir = MinIOOSSUtils.intPositiveOption(conf, "fs.oss.max.copy.tasks.per.dir", 5);
int maxCopyThreads = MinIOOSSUtils.intPositiveOption(conf, "fs.oss.max.copy.threads", 25);
int maxCopyTasks = MinIOOSSUtils.intPositiveOption(conf, "fs.oss.max.copy.tasks", 10485760);
this.boundedCopyThreadPool = BlockingThreadPoolExecutorService.newInstance(maxCopyThreads, maxCopyTasks, 60L, TimeUnit.SECONDS, "oss-copy-unbounded");
this.setConf(conf);
}
private String pathToKey(Path path) {
if (!path.isAbsolute()) {
path = new Path(this.workingDir, path);
}
return path.toUri().getPath().substring(1);
}
private Path keyToPath(String key) {
return new Path("/" + key);
}
public FileStatus[] listStatus(Path path) throws IOException {
String key = this.pathToKey(path);
if (LOG.isDebugEnabled()) {
LOG.debug("List status for path: " + path);
}
List<FileStatus> result = new ArrayList();
FileStatus fileStatus = this.getFileStatus(path);
if (fileStatus.isDirectory()) {
if (LOG.isDebugEnabled()) {
LOG.debug("listStatus: doing listObjects for directory " + key);
}
Iterable<Result<Item>> results = this.store.listObjects(key, this.maxKeys, (String) null, false);
while (true) {
Iterator<Result<Item>> var6 = results.iterator();
String tempNextMaker = null;
List<Item> dirItemList = new ArrayList<>();
while (var6.hasNext()) {
Result<Item> objectSummary = var6.next();
String objKey = null;
Item item = null;
try {
item = objectSummary.get();
} catch (Exception e) {
throw new IOException(e);
}
objKey = item.objectName();
if (objKey.equals(key + "/")) {
if (LOG.isDebugEnabled()) {
LOG.debug("Ignoring: " + objKey);
}
} else {
Path keyPath = this.keyToPath(objKey).makeQualified(this.uri, this.workingDir);
if (LOG.isDebugEnabled()) {
LOG.debug("Adding: fi: " + keyPath);
}
if (!item.isDir()) {
result.add(new OSSFileStatus(item.size(), item.isDir(), 1, this.getDefaultBlockSize(keyPath), item.isDir() ? System.currentTimeMillis() : item.lastModified().toInstant().toEpochMilli(), keyPath, this.username));
tempNextMaker = item.objectName();
}else{
dirItemList.add(item);
}
}
}
for (Item item : dirItemList) {
String prefix = item.objectName();
if (prefix.equals(key + "/")) {
if (LOG.isDebugEnabled()) {
LOG.debug("Ignoring: " + prefix);
}
} else {
Path keyPath = this.keyToPath(prefix).makeQualified(this.uri, this.workingDir);
if (LOG.isDebugEnabled()) {
LOG.debug("Adding: rd: " + keyPath);
}
result.add(this.getFileStatus(keyPath));
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("listStatus: list truncated - getting next batch");
}
if (Iterators.size(var6) < maxKeys) {
break;
}
String nextMarker = tempNextMaker;
results = this.store.listObjects(key, this.maxKeys, nextMarker, false);
}
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding: rd (not a dir): " + path);
}
result.add(fileStatus);
}
return (FileStatus[]) result.toArray(new FileStatus[result.size()]);
}
public RemoteIterator<LocatedFileStatus> listFiles(final Path f, boolean recursive) throws IOException {
Path qualifiedPath = f.makeQualified(this.uri, this.workingDir);
final FileStatus status = this.getFileStatus(qualifiedPath);
PathFilter filter = new PathFilter() {
public boolean accept(Path path) {
return status.isFile() || !path.equals(f);
}
};
FileStatusAcceptor acceptor = new FileStatusAcceptor.AcceptFilesOnly(qualifiedPath);
return this.innerList(f, status, filter, acceptor, recursive);
}
public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f) throws IOException {
return this.listLocatedStatus(f, DEFAULT_FILTER);
}
public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f, PathFilter filter) throws IOException {
Path qualifiedPath = f.makeQualified(this.uri, this.workingDir);
FileStatus status = this.getFileStatus(qualifiedPath);
FileStatusAcceptor acceptor = new FileStatusAcceptor.AcceptAllButSelf(qualifiedPath);
return this.innerList(f, status, filter, acceptor, false);
}
private RemoteIterator<LocatedFileStatus> innerList(Path f, FileStatus status, PathFilter filter, FileStatusAcceptor acceptor, boolean recursive) throws IOException {
Path qualifiedPath = f.makeQualified(this.uri, this.workingDir);
String key = this.pathToKey(qualifiedPath);
if (status.isFile()) {
LOG.debug("{} is a File", qualifiedPath);
BlockLocation[] locations = this.getFileBlockLocations(status, 0L, status.getLen());
return this.store.singleStatusRemoteIterator(filter.accept(f) ? status : null, locations);
} else {
return this.store.createLocatedFileStatusIterator(key, this.maxKeys, this, filter, acceptor, recursive ? null : "/");
}
}
private boolean mkdir(String key) throws IOException {
String dirName = key;
if (StringUtils.isNotEmpty(key)) {
if (!key.endsWith("/")) {
dirName = key + "/";
}
this.store.storeEmptyFile(dirName);
}
return true;
}
public boolean mkdirs(Path path, FsPermission permission) throws IOException {
try {
FileStatus fileStatus = this.getFileStatus(path);
if (fileStatus.isDirectory()) {
return true;
} else {
throw new FileAlreadyExistsException("Path is a file: " + path);
}
} catch (FileNotFoundException var5) {
this.validatePath(path);
String key = this.pathToKey(path);
return this.mkdir(key);
}
}
private void validatePath(Path path) throws IOException {
Path fPart = path.getParent();
while (true) {
try {
FileStatus fileStatus = this.getFileStatus(fPart);
if (!fileStatus.isDirectory()) {
throw new FileAlreadyExistsException(String.format("Can't make directory for path '%s', it is a file.", fPart));
}
break;
} catch (FileNotFoundException var4) {
fPart = fPart.getParent();
if (fPart == null) {
break;
}
}
}
}
public FSDataInputStream open(Path path, int bufferSize) throws IOException {
FileStatus fileStatus = this.getFileStatus(path);
if (fileStatus.isDirectory()) {
throw new FileNotFoundException("Can't open " + path + " because it is a directory");
} else {
return new FSDataInputStream(new MinIOOSSInputStream(this.getConf(), new SemaphoredDelegatingExecutor(this.boundedThreadPool, this.maxReadAheadPartNumber, true), this.maxReadAheadPartNumber, this.store, this.pathToKey(path), fileStatus.getLen(), this.statistics));
}
}
public boolean rename(Path srcPath, Path dstPath) throws IOException {
if (srcPath.isRoot()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Cannot rename the root of a filesystem");
}
return false;
} else {
Path parent;
for (parent = dstPath.getParent(); parent != null && !srcPath.equals(parent); parent = parent.getParent()) {
}
if (parent != null) {
return false;
} else {
FileStatus srcStatus = this.getFileStatus(srcPath);
FileStatus dstStatus;
try {
dstStatus = this.getFileStatus(dstPath);
} catch (FileNotFoundException var9) {
dstStatus = null;
}
if (dstStatus == null) {
dstStatus = this.getFileStatus(dstPath.getParent());
if (!dstStatus.isDirectory()) {
throw new IOException(String.format("Failed to rename %s to %s, %s is a file", srcPath, dstPath, dstPath.getParent()));
}
} else {
if (srcStatus.getPath().equals(dstStatus.getPath())) {
return !srcStatus.isDirectory();
}
if (!dstStatus.isDirectory()) {
throw new FileAlreadyExistsException(String.format("Failed to rename %s to %s, file already exists!", srcPath, dstPath));
}
dstPath = new Path(dstPath, srcPath.getName());
FileStatus[] statuses;
try {
statuses = this.listStatus(dstPath);
} catch (FileNotFoundException var8) {
statuses = null;
}
if (statuses != null && statuses.length > 0) {
throw new FileAlreadyExistsException(String.format("Failed to rename %s to %s, file already exists or not empty!", srcPath, dstPath));
}
}
boolean succeed;
if (srcStatus.isDirectory()) {
succeed = this.copyDirectory(srcPath, dstPath);
} else {
succeed = this.copyFile(srcPath, srcStatus.getLen(), dstPath);
}
return srcPath.equals(dstPath) || succeed && this.delete(srcPath, true);
}
}
}
private boolean copyFile(Path srcPath, long srcLen, Path dstPath) {
String srcKey = this.pathToKey(srcPath);
String dstKey = this.pathToKey(dstPath);
return this.store.copyFile(srcKey, srcLen, dstKey);
}
private boolean copyDirectory(Path srcPath, Path dstPath) throws IOException {
String srcKey = MinIOOSSUtils.maybeAddTrailingSlash(this.pathToKey(srcPath));
String dstKey = MinIOOSSUtils.maybeAddTrailingSlash(this.pathToKey(dstPath));
if (dstKey.startsWith(srcKey)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Cannot rename a directory to a subdirectory of self");
}
return false;
} else {
this.store.storeEmptyFile(dstKey);
MinIOOSSCopyFileContext copyFileContext = new MinIOOSSCopyFileContext();
ExecutorService executorService = MoreExecutors.listeningDecorator(new SemaphoredDelegatingExecutor(this.boundedCopyThreadPool, this.maxConcurrentCopyTasksPerDir, true));
Iterable<Result<Item>> objects = this.store.listObjects(srcKey, this.maxKeys, (String) null, true);
int copiesToFinish = 0;
while (true) {
Iterator var9 = objects.iterator();
String tempNextMaker = null;
while (var9.hasNext()) {
Result<Item> objectSummary = (Result<Item>) var9.next();
Item item = null;
try {
item = objectSummary.get();
} catch (Exception e) {
LOG.error(e.getMessage());
}
String newKey = dstKey.concat(item.objectName().substring(srcKey.length()));
executorService.execute(new MinIOOSSCopyFileTask(this.store, item.objectName(), Iterators.size(var9), newKey, copyFileContext));
++copiesToFinish;
if (copyFileContext.isCopyFailure()) {
break;
}
tempNextMaker = item.objectName();
}
if (Iterators.size(var9) < maxKeys) {
copyFileContext.lock();
try {
copyFileContext.awaitAllFinish(copiesToFinish);
} catch (InterruptedException var15) {
LOG.warn("interrupted when wait copies to finish");
} finally {
copyFileContext.unlock();
}
return !copyFileContext.isCopyFailure();
}
String nextMarker = tempNextMaker;
objects = this.store.listObjects(srcKey, this.maxKeys, nextMarker, true);
}
}
}
public void setWorkingDirectory(Path dir) {
this.workingDir = dir;
}
public MinIOOSSFileSystemStore getStore() {
return this.store;
}
}
5、AliyunOSSBlockOutputStream——>MinIOOSSBlockOutputStream
- this.store.uploadObjectPart(this.key,this.blockFile,this.blockSize);为主要实现分片上传方法,因为Minio已经实现了分片上传,所有就直接使用Minio中的分片上传方法。AliyunOSS中的上传方法为将大文件按照每个文件5MB大小统一上传后进行合并,生成文件存储在OSS中。
package org.apache.hadoop.fs.minio.oss;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ExecutorService;
public class MinIOOSSBlockOutputStream extends OutputStream {
private static final Logger LOG = LoggerFactory.getLogger(MinIOOSSBlockOutputStream.class);
private MinIOOSSFileSystemStore store;
private Configuration conf;
private boolean closed;
private String key;
private File blockFile;
private Map<Integer, File> blockFiles = new HashMap();
private long blockSize;
private int blockId = 0;
private long blockWritten = 0L;
private String uploadId = null;
private final ListeningExecutorService executorService;
private OutputStream blockStream;
private final byte[] singleByte = new byte[1];
public MinIOOSSBlockOutputStream(Configuration conf, MinIOOSSFileSystemStore store, String key, Long blockSize, ExecutorService executorService) throws IOException {
this.store = store;
this.conf = conf;
this.key = key;
this.blockSize = blockSize;
this.blockFile = this.newBlockFile();
this.blockStream = new BufferedOutputStream(new FileOutputStream(this.blockFile));
this.executorService = MoreExecutors.listeningDecorator(executorService);
}
private File newBlockFile() throws IOException {
return MinIOOSSUtils.createTmpFileForWrite(String.format("oss-block-%04d-", this.blockId), this.blockSize, this.conf);
}
public synchronized void flush() throws IOException {
this.blockStream.flush();
}
public synchronized void close() throws IOException {
if (!this.closed) {
this.blockStream.flush();
this.blockStream.close();
if (!this.blockFiles.values().contains(this.blockFile)) {
++this.blockId;
this.blockFiles.put(this.blockId, this.blockFile);
}
try {
if (this.blockFiles.size() == 1) {
this.store.uploadObject(this.key, this.blockFile);
} else {
this.store.uploadObjectPart(this.key,this.blockFile,this.blockSize);
}
} finally {
this.removeTemporaryFiles();
this.closed = true;
}
}
}
public synchronized void write(int b) throws IOException {
this.singleByte[0] = (byte)b;
this.write(this.singleByte, 0, 1);
}
public synchronized void write(byte[] b, int off, int len) throws IOException {
if (this.closed) {
throw new IOException("Stream closed.");
} else {
this.blockStream.write(b, off, len);
this.blockWritten += (long)len;
if (this.blockWritten >= this.blockSize) {
this.uploadCurrentPart();
this.blockWritten = 0L;
}
}
}
private void removeTemporaryFiles() {
Iterator var1 = this.blockFiles.values().iterator();
while(var1.hasNext()) {
File file = (File)var1.next();
if (file != null && file.exists() && !file.delete()) {
LOG.warn("Failed to delete temporary file {}", file);
}
}
}
private void uploadCurrentPart() throws IOException {
this.blockStream.flush();
this.blockStream.close();
++this.blockId;
this.blockFiles.put(this.blockId, this.blockFile);
File currentFile = this.blockFile;
this.store.uploadObjectPart(this.key, currentFile, blockSize);
this.blockFile = this.newBlockFile();
this.blockStream = new BufferedOutputStream(new FileOutputStream(this.blockFile));
}
}
6、AliyunOSSFileReaderTask——>MinIOOSSFileReaderTask
- MinIO和阿里云的OSS有一个不一样的地方,MinIO的retire方法是根据offset和length来读取stream流,这个和阿里云中直接ByteBuffer.getByteStart和ByteBuffer.getByteEnd不太一样。getByteEnd需要再末尾再+1,才能让读取数据完整。
package org.apache.hadoop.fs.minio.oss;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryPolicy.RetryAction.RetryDecision;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
public class MinIOOSSFileReaderTask implements Runnable {
public static final Logger LOG = LoggerFactory.getLogger(MinIOOSSFileReaderTask.class);
private String key;
private MinIOOSSFileSystemStore store;
private ReadBuffer readBuffer;
private static final int MAX_RETRIES = 3;
private RetryPolicy retryPolicy;
public MinIOOSSFileReaderTask(String key, MinIOOSSFileSystemStore store, ReadBuffer readBuffer) {
this.key = key;
this.store = store;
this.readBuffer = readBuffer;
RetryPolicy defaultPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(3, 3L, TimeUnit.SECONDS);
Map<Class<? extends Exception>, RetryPolicy> policies = new HashMap();
policies.put(IOException.class, defaultPolicy);
policies.put(IndexOutOfBoundsException.class, RetryPolicies.TRY_ONCE_THEN_FAIL);
policies.put(NullPointerException.class, RetryPolicies.TRY_ONCE_THEN_FAIL);
this.retryPolicy = RetryPolicies.retryByException(defaultPolicy, policies);
}
public void run() {
int retries = 0;
this.readBuffer.lock();
try {
while (true) {
try {
InputStream in = this.store.retrieve(this.key, this.readBuffer.getByteStart(), this.readBuffer.getByteEnd()+1);
Throwable var29 = null;
try {
int start = 0;
int ret;
for (int toRead = this.readBuffer.getBuffer().length; toRead > 0; start += ret) {
ret = in.read(this.readBuffer.getBuffer(), start, toRead);
if (ret < 0) {
throw new IOException("Premature EOF from inputStream");
}
toRead -= ret;
}
this.readBuffer.setStatus(ReadBuffer.STATUS.SUCCESS);
break;
} catch (Throwable var22) {
var29 = var22;
throw var22;
} finally {
if (in != null) {
if (var29 != null) {
try {
in.close();
} catch (Throwable var23) {
var29.addSuppressed(var23);
}
} else {
in.close();
}
}
}
} catch (Exception var26) {
Exception e = var26;
LOG.warn("Exception thrown when retrieve key: " + this.key + ", exception: " + var26);
try {
RetryPolicy.RetryAction rc = this.retryPolicy.shouldRetry(e, retries++, 0, true);
if (rc.action != RetryDecision.RETRY) {
break;
}
Thread.sleep(rc.delayMillis);
} catch (Exception var25) {
LOG.warn("Exception thrown when call shouldRetry, exception " + var25);
break;
}
}
}
if (this.readBuffer.getStatus() != ReadBuffer.STATUS.SUCCESS) {
this.readBuffer.setStatus(ReadBuffer.STATUS.ERROR);
}
this.readBuffer.signalAll();
} finally {
this.readBuffer.unlock();
}
}
}
五、总结
-
Spark使用local模式压力测试:
-
一次写入10w行数据无压力
-
一次写入50w行数据会导致batch_num太高,爆出Cannot seek after EOF的错误
- 该错误要是屏蔽报错也会因元数据已经将原本文件大小规定完毕,读取的总的Content_length和实际文件大小预期不符,导致文件写入失败。
-
-
影响性能最主要的因素有两个:
- Spark使用的运行模式是否是集群模式
- Spark的可以使用的内存大小
六、其他
- 阿里云OOS参数文档
- https://help.aliyun.com/document_detail/84841.html
- Minio接口文档
- https://docs.min.io/docs/java-client-api-reference.html#listObjects
- 中文文档
- http://docs.minio.org.cn/docs/master/java-client-api-reference#putObject
- Minio元数据介绍
- https://blog.51cto.com/u_14625168/2511642
- Spark与S3交互
- https://blog.csdn.net/helloword4217/article/details/99691961
- Apache Hudi 与 阿里云OSS交互
- https://hudi.apache.org/cn/docs/next/oss_hoodie
- Spark操作Hudi
- https://blog.csdn.net/weixin_39636364/article/details/120358892
- Hadoop EOF Exception
- https://cwiki.apache.org/confluence/display/HADOOP2/EOFException