Hudi 集成 Minio 源码修改指南

Hudi 集成 Minio

一、基于的组件

  • Hadoop-aliyun:3.2.1.jar
  • minio:7.0.2.jar

二、Hudi写入OSS代码

1. 引入

云上对象存储的廉价让不少公司将其作为主要的存储方案,而Hudi作为数据湖解决方案,支持对象存储也是必不可少。之前AWS EMR已经内置集成Hudi,也意味着可以在S3上无缝使用Hudi。当然国内用户可能更多使用阿里云OSS作为云上存储方案,那么如果用户想基于OSS构建数据湖,那么Hudi是否支持呢?随着Hudi社区主分支已经合并了支持OSS的PR,现在只需要基于master分支build版本即可,或者等待下一个版本释出便可直接使用,经过简单的配置便可将数据写入OSS。

2. 配置

2.1 pom依赖

需要额外添加的主要pom依赖如下

<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-aliyun</artifactId>
    <version>3.2.1</version>
</dependency>
<dependency>
    <groupId>com.aliyun.oss</groupId>
    <artifactId>aliyun-sdk-oss</artifactId>
    <version>3.8.1</version>
</dependency>

2.2 core-site.xml配置

若需访问OSS,需要修改core-site.xml,关键配置如下

<property>
        <name>fs.defaultFS</name>
        <value>oss://bucketname/</value>
    </property>

    <property>
      <name>fs.oss.endpoint</name>
      <value>oss-endpoint-address</value>
      <description>Aliyun OSS endpoint to connect to.</description>
    </property>

    <property>
      <name>fs.oss.accessKeyId</name>
      <value>oss_key</value>
      <description>Aliyun access key ID</description>
    </property>

    <property>
      <name>fs.oss.accessKeySecret</name>
      <value>oss-secret</value>
      <description>Aliyun access key secret</description>
    </property>

    <property>
      <name>fs.oss.impl</name>
      <value>org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem</value>
    </property>

3. 源码

示例源码如下

import org.apache.hudi.QuickstartUtils.*;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

import java.io.IOException;
import java.util.List;

import static org.apache.hudi.QuickstartUtils.convertToStringList;
import static org.apache.hudi.QuickstartUtils.getQuickstartWriteConfigs;
import static org.apache.hudi.config.HoodieWriteConfig.TABLE_NAME;
import static org.apache.spark.sql.SaveMode.Overwrite;

public class OssHudiDemo {
    public static void main(String[] args) throws IOException {
        SparkSession spark = SparkSession.builder().appName("Hoodie Datasource test")
                .master("local[2]")
                .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
                .config("spark.io.compression.codec", "snappy")
                .config("spark.sql.hive.convertMetastoreParquet", "false")
                .getOrCreate();
        JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());

        String tableName = "hudi_trips_cow";
        String basePath = "/tmp/hudi_trips_cow";
        DataGenerator dataGen = new DataGenerator();

        List<String> inserts = convertToStringList(dataGen.generateInserts(10));
        Dataset<Row> df = spark.read().json(jsc.parallelize(inserts, 2));
        df.write().format("org.apache.hudi").
                options(getQuickstartWriteConfigs()).
                option(TABLE_NAME, tableName).
                mode(Overwrite).
                save(basePath);

        Dataset<Row> roViewDF = spark.read().format("org.apache.hudi").load(basePath + "/*/*/*");
        roViewDF.registerTempTable("hudi_ro_table");
        spark.sql("select *  from  hudi_ro_table").show(false);
        spark.stop();

    }
}

即先写入OSS,下图可以看到OSS的Bucket中已经成功写入了数据,然后再通过spark查询写入的结果。

img

三、数据写入Minio代码

1、pom.xml的依赖

	<dependency>
           <groupId>junit</groupId>
           <artifactId>junit</artifactId>
           <version>4.12</version>
           <scope>test</scope>
    </dependency>


    <!--MINIO-->
    <dependency>
        <groupId>io.minio</groupId>
        <artifactId>minio</artifactId>
        <version>8.3.4</version>
    </dependency>

    <dependency>
        <groupId>com.squareup.okhttp3</groupId>
        <artifactId>okhttp</artifactId>
        <version>4.9.3</version>
    </dependency>

2、minio上传数据

public void init(){
        minioClient = MinioClient.builder().endpoint(endpoint).credentials(accessKey, secretKey).build();
    }    

	@Test
    public void upload(){
        File file = new File("/Users/shenyunsese/Desktop/pic3.png");
        String objectName="test/pic3.png";
    try {
        FileInputStream fileInputStream=new FileInputStream(file);
 
        minioClient.putObject(PutObjectArgs.builder().bucket(bucket)
                .object(objectName)
                .contentType("image/png")
                .stream(fileInputStream, fileInputStream.available(), -1).build());
    }catch (Exception e){
        e.printStackTrace();
    }
    System.out.println("finished");
}

3、minio下载数据

@Test
    public void download(){
        String objectName="test/pic3.png";
        String fileName="/Users/shenyunsese/Desktop/download2.png";
        try {
            StatObjectResponse response = minioClient.statObject(
                    StatObjectArgs.builder().bucket(bucket).object(objectName).build()
            );
            if (response != null) {
                minioClient.downloadObject(DownloadObjectArgs.builder()
                        .bucket(bucket)
                        .object(objectName)
                        .filename(fileName)
                        .build());
            }
        }catch (Exception e){
            e.printStackTrace();
        }
        System.out.println("finished");
    }

四、源码分析

4.1 Hadoop-aliyun:3.2.1.jar
1、AliyunCredentialsProvider
package org.apache.hadoop.fs.aliyun.oss;

import com.aliyun.oss.common.auth.Credentials;
import com.aliyun.oss.common.auth.CredentialsProvider;
import com.aliyun.oss.common.auth.DefaultCredentials;
import com.aliyun.oss.common.auth.InvalidCredentialsException;
import java.io.IOException;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;

## 阿里云权限校验
public class AliyunCredentialsProvider implements CredentialsProvider {
    private Credentials credentials = null;

    public AliyunCredentialsProvider(Configuration conf) throws IOException {
        String accessKeyId;
        String accessKeySecret;
        try {
        	## 获取XML文件accessKeyId
            accessKeyId = AliyunOSSUtils.getValueWithKey(conf, "fs.oss.accessKeyId");
            ## 获取XML文件accessKeySecret
            accessKeySecret = AliyunOSSUtils.getValueWithKey(conf, "fs.oss.accessKeySecret");
        } catch (IOException var7) {
            throw new InvalidCredentialsException(var7);
        }
        
        String securityToken;
        try {
        	## 获取XML文件securityToken
            securityToken = AliyunOSSUtils.getValueWithKey(conf, "fs.oss.securityToken");
        } catch (IOException var6) {
            securityToken = null;
        }

        if (!StringUtils.isEmpty(accessKeyId) && !StringUtils.isEmpty(accessKeySecret)) {
            if (StringUtils.isNotEmpty(securityToken)) {
            	## 生成默认权限校验对象
                this.credentials = new DefaultCredentials(accessKeyId, accessKeySecret, securityToken);
            } else {
                this.credentials = new DefaultCredentials(accessKeyId, accessKeySecret);
            }

        } else {
            throw new InvalidCredentialsException("AccessKeyId and AccessKeySecret should not be null or empty.");
        }
    }

    public void setCredentials(Credentials creds) {
        if (creds == null) {
            throw new InvalidCredentialsException("Credentials should not be null.");
        } else {
            this.credentials = creds;
        }
    }

    public Credentials getCredentials() {
        if (this.credentials == null) {
            throw new InvalidCredentialsException("Invalid credentials");
        } else {
            return this.credentials;
        }
    }
}
2、AliyunOSSBlockOutputStream
package org.apache.hadoop.fs.aliyun.oss;

import com.aliyun.oss.model.PartETag;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

## 阿里云OSS块输出流
public class AliyunOSSBlockOutputStream extends OutputStream {
    private static final Logger LOG = LoggerFactory.getLogger(AliyunOSSBlockOutputStream.class);
    private AliyunOSSFileSystemStore store;
    private Configuration conf;
    private boolean closed;
    private String key;
    ## 文件块
    private File blockFile;
    private Map<Integer, File> blockFiles = new HashMap();
    private long blockSize;
    private int blockId = 0;
    private long blockWritten = 0L;
    private String uploadId = null;
    private final List<ListenableFuture<PartETag>> partETagsFutures;
    private final ListeningExecutorService executorService;
    ## 输出流
    private OutputStream blockStream;
    private final byte[] singleByte = new byte[1];

    public AliyunOSSBlockOutputStream(Configuration conf, AliyunOSSFileSystemStore store, String key, Long blockSize, ExecutorService executorService) throws IOException {
        this.store = store;
        this.conf = conf;
        this.key = key;
        this.blockSize = blockSize;
        this.blockFile = this.newBlockFile();
        this.blockStream = new BufferedOutputStream(new FileOutputStream(this.blockFile));
        this.partETagsFutures = new ArrayList(2);
        this.executorService = MoreExecutors.listeningDecorator(executorService);
    }

    private File newBlockFile() throws IOException {
    	## 创建阿里云临时文件写入——>阿里云OSS工具类
        return AliyunOSSUtils.createTmpFileForWrite(String.format("oss-block-%04d-", this.blockId), this.blockSize, this.conf);
    }

    public synchronized void flush() throws IOException {
        this.blockStream.flush();
    }

    public synchronized void close() throws IOException {
        if (!this.closed) {
            this.blockStream.flush();
            this.blockStream.close();
            ## 如果块文件Map不包含该块文件,将块文件写入Map最后
            if (!this.blockFiles.values().contains(this.blockFile)) {
                ++this.blockId;
                this.blockFiles.put(this.blockId, this.blockFile);
            }

            try {
                if (this.blockFiles.size() == 1) {
                	# 如果只存在一个文件直接上传对象
                    this.store.uploadObject(this.key, this.blockFile);
                } else {
                    if (this.blockWritten > 0L) {
                    
                    ## 使用ListenableFuture Guava帮我们检测Future是否完成了,如果完成就自动调用回调函数,这样可以减少并发程序的复杂度。
                        ListenableFuture<PartETag> partETagFuture = this.executorService.submit(() -> {
                        	
                        	## 上传对象
                            PartETag partETag = this.store.uploadPart(this.blockFile, this.key, this.uploadId, this.blockId);
                            return partETag;
                        });
                        
                        ## 将已完成的Future加入集合
                        this.partETagsFutures.add(partETagFuture);
                    }
					
				   ## 等待所有部分都上传完
                    List<PartETag> partETags = this.waitForAllPartUploads();
                    
                    
                    if (null == partETags) {
                        throw new IOException("Failed to multipart upload to oss, abort it.");
                    }

				  ## 完成文件上传
                    this.store.completeMultipartUpload(this.key, this.uploadId, new ArrayList(partETags));
                }
            } finally {
            	
            	## 移除临时文件
                this.removeTemporaryFiles();
                this.closed = true;
            }

        }
    }

   ## 写入(可复用)
    public synchronized void write(int b) throws IOException {
        this.singleByte[0] = (byte)b;
        this.write(this.singleByte, 0, 1);
    }

   ## 写入(可复用)
    public synchronized void write(byte[] b, int off, int len) throws IOException {
        if (this.closed) {
            throw new IOException("Stream closed.");
        } else {
            this.blockStream.write(b, off, len);
            this.blockWritten += (long)len;
            if (this.blockWritten >= this.blockSize) {
                this.uploadCurrentPart();
                this.blockWritten = 0L;
            }

        }
    }

   ## 移除临时文件(可复用)
    private void removeTemporaryFiles() {
        Iterator var1 = this.blockFiles.values().iterator();

        while(var1.hasNext()) {
            File file = (File)var1.next();
            if (file != null && file.exists() && !file.delete()) {
                LOG.warn("Failed to delete temporary file {}", file);
            }
        }

    }

   ## 移除块存储部分文件(可复用)
    private void removePartFiles() throws IOException {
        Iterator var1 = this.partETagsFutures.iterator();

        while(var1.hasNext()) {
            ListenableFuture<PartETag> partETagFuture = (ListenableFuture)var1.next();
            if (partETagFuture.isDone()) {
                try {
                    File blockFile = (File)this.blockFiles.get(((PartETag)partETagFuture.get()).getPartNumber());
                    if (blockFile != null && blockFile.exists() && !blockFile.delete()) {
                        LOG.warn("Failed to delete temporary file {}", blockFile);
                    }
                } catch (ExecutionException | InterruptedException var4) {
                    throw new IOException(var4);
                }
            }
        }

    }

	## 上传当前部分块文件(可复用)
    private void uploadCurrentPart() throws IOException {
        this.blockStream.flush();
        this.blockStream.close();
        if (this.blockId == 0) {
            this.uploadId = this.store.getUploadId(this.key);
        }

        ++this.blockId;
        this.blockFiles.put(this.blockId, this.blockFile);
        File currentFile = this.blockFile;
        int currentBlockId = this.blockId;
        ListenableFuture<PartETag> partETagFuture = this.executorService.submit(() -> {
            PartETag partETag = this.store.uploadPart(currentFile, this.key, this.uploadId, currentBlockId);
            return partETag;
        });
        this.partETagsFutures.add(partETagFuture);
        this.removePartFiles();
        this.blockFile = this.newBlockFile();
        this.blockStream = new BufferedOutputStream(new FileOutputStream(this.blockFile));
    }

    ## 等待所有块文件上传完毕(可复用)
    private List<PartETag> waitForAllPartUploads() throws IOException {
        LOG.debug("Waiting for {} uploads to complete", this.partETagsFutures.size());

        try {
            return (List)Futures.allAsList(this.partETagsFutures).get();
        } catch (InterruptedException var4) {
            LOG.warn("Interrupted partUpload", var4);
            Thread.currentThread().interrupt();
            return null;
        } catch (ExecutionException var5) {
            LOG.debug("While waiting for upload completion", var5);
            LOG.debug("Cancelling futures");
            Iterator var2 = this.partETagsFutures.iterator();

            while(var2.hasNext()) {
                ListenableFuture<PartETag> future = (ListenableFuture)var2.next();
                future.cancel(true);
            }

            this.store.abortMultipartUpload(this.key, this.uploadId);
            throw new IOException("Multi-part upload with id '" + this.uploadId + "' to " + this.key, var5);
        }
    }
}
3、AliyunOSSCopyFileContext
package org.apache.hadoop.fs.aliyun.oss;

import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;

## 阿里云复制文件内容
public class AliyunOSSCopyFileContext {
    private final ReentrantLock lock = new ReentrantLock();
    private Condition readyCondition;
    private boolean copyFailure;
    private int copiesFinish;

    public AliyunOSSCopyFileContext() {
        this.readyCondition = this.lock.newCondition();
        this.copyFailure = false;
        this.copiesFinish = 0;
    }

    public void lock() {
        this.lock.lock();
    }

    public void unlock() {
        this.lock.unlock();
    }

    public void awaitAllFinish(int copiesToFinish) throws InterruptedException {
        while(this.copiesFinish != copiesToFinish) {
            this.readyCondition.await();
        }

    }

    public void signalAll() {
        this.readyCondition.signalAll();
    }

    public boolean isCopyFailure() {
        return this.copyFailure;
    }

    public void setCopyFailure(boolean copyFailure) {
        this.copyFailure = copyFailure;
    }

    public void incCopiesFinish() {
        ++this.copiesFinish;
    }
}
4、AliyunOSSCopyFileTask
package org.apache.hadoop.fs.aliyun.oss;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

## 阿里云复制文件任务
public class AliyunOSSCopyFileTask implements Runnable {
    public static final Logger LOG = LoggerFactory.getLogger(AliyunOSSCopyFileTask.class);
    private AliyunOSSFileSystemStore store;
    private String srcKey;
    private long srcLen;
    private String dstKey;
    private AliyunOSSCopyFileContext copyFileContext;

    public AliyunOSSCopyFileTask(AliyunOSSFileSystemStore store, String srcKey, long srcLen, String dstKey, AliyunOSSCopyFileContext copyFileContext) {
        this.store = store;
        this.srcKey = srcKey;
        this.srcLen = srcLen;
        this.dstKey = dstKey;
        this.copyFileContext = copyFileContext;
    }

    public void run() {
        boolean fail = false;

        try {
            fail = !this.store.copyFile(this.srcKey, this.srcLen, this.dstKey);
        } catch (Exception var6) {
            LOG.warn("Exception thrown when copy from " + this.srcKey + " to " + this.dstKey + ", exception: " + var6);
            fail = true;
        } finally {
            this.copyFileContext.lock();
            if (fail) {
                this.copyFileContext.setCopyFailure(fail);
            }

            this.copyFileContext.incCopiesFinish();
            this.copyFileContext.signalAll();
            this.copyFileContext.unlock();
        }

    }
}
5、AliyunOSSFileReaderTask
package org.apache.hadoop.fs.aliyun.oss;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.fs.aliyun.oss.ReadBuffer.STATUS;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryPolicy.RetryAction.RetryDecision;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

## 阿里云文件读取任务
public class AliyunOSSFileReaderTask implements Runnable {
    public static final Logger LOG = LoggerFactory.getLogger(AliyunOSSFileReaderTask.class);
    private String key;
    private AliyunOSSFileSystemStore store;
    private ReadBuffer readBuffer;
    private static final int MAX_RETRIES = 3;
    private RetryPolicy retryPolicy;

    public AliyunOSSFileReaderTask(String key, AliyunOSSFileSystemStore store, ReadBuffer readBuffer) {
        this.key = key;
        this.store = store;
        this.readBuffer = readBuffer;
        RetryPolicy defaultPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(3, 3L, TimeUnit.SECONDS);
        Map<Class<? extends Exception>, RetryPolicy> policies = new HashMap();
        policies.put(IOException.class, defaultPolicy);
        policies.put(IndexOutOfBoundsException.class, RetryPolicies.TRY_ONCE_THEN_FAIL);
        policies.put(NullPointerException.class, RetryPolicies.TRY_ONCE_THEN_FAIL);
        this.retryPolicy = RetryPolicies.retryByException(defaultPolicy, policies);
    }

    public void run() {
        int retries = 0;
        this.readBuffer.lock();

        try {
            while(true) {
                try {
                    InputStream in = this.store.retrieve(this.key, this.readBuffer.getByteStart(), this.readBuffer.getByteEnd());
                    Throwable var29 = null;

                    try {
                        IOUtils.readFully(in, this.readBuffer.getBuffer(), 0, this.readBuffer.getBuffer().length);
                        this.readBuffer.setStatus(STATUS.SUCCESS);
                        break;
                    } catch (Throwable var22) {
                        var29 = var22;
                        throw var22;
                    } finally {
                        if (in != null) {
                            if (var29 != null) {
                                try {
                                    in.close();
                                } catch (Throwable var23) {
                                    var29.addSuppressed(var23);
                                }
                            } else {
                                in.close();
                            }
                        }

                    }
                } catch (Exception var26) {
                    Exception e = var26;
                    LOG.warn("Exception thrown when retrieve key: " + this.key + ", exception: " + var26);

                    try {
                        RetryPolicy.RetryAction rc = this.retryPolicy.shouldRetry(e, retries++, 0, true);
                        if (rc.action != RetryDecision.RETRY) {
                            break;
                        }

                        Thread.sleep(rc.delayMillis);
                    } catch (Exception var25) {
                        LOG.warn("Exception thrown when call shouldRetry, exception " + var25);
                        break;
                    }
                }
            }

            if (this.readBuffer.getStatus() != STATUS.SUCCESS) {
                this.readBuffer.setStatus(STATUS.ERROR);
            }

            this.readBuffer.signalAll();
        } finally {
            this.readBuffer.unlock();
        }

    }
}
6、AliyunOSSFileSystem
package org.apache.hadoop.fs.aliyun.oss;

import com.aliyun.oss.model.OSSObjectSummary;
import com.aliyun.oss.model.ObjectListing;
import com.aliyun.oss.model.ObjectMetadata;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.BlockingThreadPoolExecutorService;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.SemaphoredDelegatingExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class AliyunOSSFileSystem extends FileSystem {
    private static final Logger LOG = LoggerFactory.getLogger(AliyunOSSFileSystem.class);
    private URI uri;
    private String bucket;
    private String username;
    private Path workingDir;
    private int blockOutputActiveBlocks;
    private AliyunOSSFileSystemStore store;
    private int maxKeys;
    private int maxReadAheadPartNumber;
    private int maxConcurrentCopyTasksPerDir;
    private ListeningExecutorService boundedThreadPool;
    private ListeningExecutorService boundedCopyThreadPool;
    private static final PathFilter DEFAULT_FILTER = new PathFilter() {
        public boolean accept(Path file) {
            return true;
        }
    };

    public AliyunOSSFileSystem() {
    }

    public FSDataOutputStream append(Path path, int bufferSize, Progressable progress) throws IOException {
        throw new IOException("Append is not supported!");
    }

    public void close() throws IOException {
        try {
            this.store.close();
            this.boundedThreadPool.shutdown();
            this.boundedCopyThreadPool.shutdown();
        } finally {
            super.close();
        }

    }

    public FSDataOutputStream create(Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
        String key = this.pathToKey(path);
        FileStatus status = null;

        try {
            status = this.getFileStatus(path);
            if (status.isDirectory()) {
                throw new FileAlreadyExistsException(path + " is a directory");
            }

            if (!overwrite) {
                throw new FileAlreadyExistsException(path + " already exists");
            }

            LOG.debug("Overwriting file {}", path);
        } catch (FileNotFoundException var13) {
        }

        long uploadPartSize = AliyunOSSUtils.getMultipartSizeProperty(this.getConf(), "fs.oss.multipart.upload.size", 104857600L);
        return new FSDataOutputStream(new AliyunOSSBlockOutputStream(this.getConf(), this.store, key, uploadPartSize, new SemaphoredDelegatingExecutor(this.boundedThreadPool, this.blockOutputActiveBlocks, true)), this.statistics);
    }

    public FSDataOutputStream createNonRecursive(Path path, FsPermission permission, EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
        Path parent = path.getParent();
        if (parent != null && !this.getFileStatus(parent).isDirectory()) {
            throw new FileAlreadyExistsException("Not a directory: " + parent);
        } else {
            return this.create(path, permission, flags.contains(CreateFlag.OVERWRITE), bufferSize, replication, blockSize, progress);
        }
    }

    public boolean delete(Path path, boolean recursive) throws IOException {
        try {
            return this.innerDelete(this.getFileStatus(path), recursive);
        } catch (FileNotFoundException var4) {
            LOG.debug("Couldn't delete {} - does not exist", path);
            return false;
        }
    }

    private boolean innerDelete(FileStatus status, boolean recursive) throws IOException {
        Path f = status.getPath();
        String p = f.toUri().getPath();
        FileStatus[] statuses;
        
        ## 如果根目录拒绝文件删除
        if (p.equals("/")) {
            statuses = this.listStatus(status.getPath());
            boolean isEmptyDir = statuses.length <= 0;
            return this.rejectRootDirectoryDelete(isEmptyDir, recursive);
        } else {
            String key = this.pathToKey(f);
            if (status.isDirectory()) {
                ## 如果非递归文件夹
                if (!recursive) {
                    ## 列出文件列表
                    statuses = this.listStatus(status.getPath());
                    ## 非空不可移除文件夹
                    if (statuses.length > 0) {
                        throw new IOException("Cannot remove directory " + f + ": It is not empty!");
                    }
				  
                    ## 对文件路径进行格式化
                    key = AliyunOSSUtils.maybeAddTrailingSlash(key);
                    this.store.deleteObject(key);
                } else {
                    this.store.deleteDirs(key);
                }
            } else {
                this.store.deleteObject(key);
            }

            this.createFakeDirectoryIfNecessary(f);
            return true;
        }
    }

    private boolean rejectRootDirectoryDelete(boolean isEmptyDir, boolean recursive) throws IOException {
        LOG.info("oss delete the {} root directory of {}", this.bucket, recursive);
        if (isEmptyDir) {
            return true;
        } else if (recursive) {
            return false;
        } else {
            throw new PathIOException(this.bucket, "Cannot delete root path");
        }
    }

    ## 创建假文件夹
    private void createFakeDirectoryIfNecessary(Path f) throws IOException {
        String key = this.pathToKey(f);
        if (StringUtils.isNotEmpty(key) && !this.exists(f)) {
            LOG.debug("Creating new fake directory at {}", f);
            this.mkdir(this.pathToKey(f.getParent()));
        }

    }

    ## 获取文件状态
    public FileStatus getFileStatus(Path path) throws IOException {
        ## 文件路径格式校验
        Path qualifiedPath = path.makeQualified(this.uri, this.workingDir);
        String key = this.pathToKey(qualifiedPath);
        if (key.length() == 0) {
            return new OSSFileStatus(0L, true, 1, 0L, 0L, qualifiedPath, this.username);
        } else {
            ## 获取文件元对象
            ObjectMetadata meta = this.store.getObjectMetadata(key);
            if (meta == null && !key.endsWith("/")) {
                key = key + "/";
                meta = this.store.getObjectMetadata(key);
            }

            ## 判断元数据是否为空
            if (meta == null) {
                ObjectListing listing = this.store.listObjects(key, 1, (String)null, false);
                if (!CollectionUtils.isNotEmpty(listing.getObjectSummaries()) && !CollectionUtils.isNotEmpty(listing.getCommonPrefixes())) {
                    throw new FileNotFoundException(path + ": No such file or directory!");
                } else {
                    return new OSSFileStatus(0L, true, 1, 0L, 0L, qualifiedPath, this.username);
                }
            } else {
                return AliyunOSSUtils.objectRepresentsDirectory(key, meta.getContentLength()) ? new OSSFileStatus(0L, true, 1, 0L, meta.getLastModified().getTime(), qualifiedPath, this.username) : new OSSFileStatus(meta.getContentLength(), false, 1, this.getDefaultBlockSize(path), meta.getLastModified().getTime(), qualifiedPath, this.username);
            }
        }
    }

    public String getScheme() {
        return "oss";
    }

    public URI getUri() {
        return this.uri;
    }

    public int getDefaultPort() {
        return -1;
    }

    public Path getWorkingDirectory() {
        return this.workingDir;
    }

    /** @deprecated */
    @Deprecated
    public long getDefaultBlockSize() {
        return this.getConf().getLong("fs.oss.block.size", 67108864L);
    }

    public String getCanonicalServiceName() {
        return null;
    }

    public void initialize(URI name, Configuration conf) throws IOException {
        super.initialize(name, conf);
        this.bucket = name.getHost();
        this.uri = URI.create(name.getScheme() + "://" + name.getAuthority());
        this.username = UserGroupInformation.getCurrentUser().getShortUserName();
        this.workingDir = (new Path("/user", this.username)).makeQualified(this.uri, (Path)null);
        long keepAliveTime = AliyunOSSUtils.longOption(conf, "fs.oss.threads.keepalivetime", 60L, 0L);
        this.blockOutputActiveBlocks = AliyunOSSUtils.intOption(conf, "fs.oss.upload.active.blocks", 4, 1);
        this.store = new AliyunOSSFileSystemStore();
        this.store.initialize(name, conf, this.username, this.statistics);
        this.maxKeys = conf.getInt("fs.oss.paging.maximum", 1000);
        int threadNum = AliyunOSSUtils.intPositiveOption(conf, "fs.oss.multipart.download.threads", 10);
        int totalTasks = AliyunOSSUtils.intPositiveOption(conf, "fs.oss.max.total.tasks", 128);
        this.maxReadAheadPartNumber = AliyunOSSUtils.intPositiveOption(conf, "fs.oss.multipart.download.ahead.part.max.number", 4);
        this.boundedThreadPool = BlockingThreadPoolExecutorService.newInstance(threadNum, totalTasks, keepAliveTime, TimeUnit.SECONDS, "oss-transfer-shared");
        this.maxConcurrentCopyTasksPerDir = AliyunOSSUtils.intPositiveOption(conf, "fs.oss.max.copy.tasks.per.dir", 5);
        int maxCopyThreads = AliyunOSSUtils.intPositiveOption(conf, "fs.oss.max.copy.threads", 25);
        int maxCopyTasks = AliyunOSSUtils.intPositiveOption(conf, "fs.oss.max.copy.tasks", 10485760);
        this.boundedCopyThreadPool = BlockingThreadPoolExecutorService.newInstance(maxCopyThreads, maxCopyTasks, 60L, TimeUnit.SECONDS, "oss-copy-unbounded");
        this.setConf(conf);
    }

    private String pathToKey(Path path) {
        if (!path.isAbsolute()) {
            path = new Path(this.workingDir, path);
        }

        return path.toUri().getPath().substring(1);
    }

    private Path keyToPath(String key) {
        return new Path("/" + key);
    }

    public FileStatus[] listStatus(Path path) throws IOException {
        String key = this.pathToKey(path);
        if (LOG.isDebugEnabled()) {
            LOG.debug("List status for path: " + path);
        }

        List<FileStatus> result = new ArrayList();
        FileStatus fileStatus = this.getFileStatus(path);
        if (fileStatus.isDirectory()) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("listStatus: doing listObjects for directory " + key);
            }

            ObjectListing objects = this.store.listObjects(key, this.maxKeys, (String)null, false);

            while(true) {
                Iterator var6 = objects.getObjectSummaries().iterator();

                while(var6.hasNext()) {
                    OSSObjectSummary objectSummary = (OSSObjectSummary)var6.next();
                    String objKey = objectSummary.getKey();
                    if (objKey.equals(key + "/")) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Ignoring: " + objKey);
                        }
                    } else {
                        Path keyPath = this.keyToPath(objectSummary.getKey()).makeQualified(this.uri, this.workingDir);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Adding: fi: " + keyPath);
                        }

                        result.add(new OSSFileStatus(objectSummary.getSize(), false, 1, this.getDefaultBlockSize(keyPath), objectSummary.getLastModified().getTime(), keyPath, this.username));
                    }
                }

                var6 = objects.getCommonPrefixes().iterator();

                while(var6.hasNext()) {
                    String prefix = (String)var6.next();
                    if (prefix.equals(key + "/")) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Ignoring: " + prefix);
                        }
                    } else {
                        Path keyPath = this.keyToPath(prefix).makeQualified(this.uri, this.workingDir);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Adding: rd: " + keyPath);
                        }

                        result.add(this.getFileStatus(keyPath));
                    }
                }

                if (!objects.isTruncated()) {
                    break;
                }

                if (LOG.isDebugEnabled()) {
                    LOG.debug("listStatus: list truncated - getting next batch");
                }

                String nextMarker = objects.getNextMarker();
                objects = this.store.listObjects(key, this.maxKeys, nextMarker, false);
            }
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Adding: rd (not a dir): " + path);
            }

            result.add(fileStatus);
        }

        return (FileStatus[])result.toArray(new FileStatus[result.size()]);
    }

    public RemoteIterator<LocatedFileStatus> listFiles(final Path f, boolean recursive) throws IOException {
        Path qualifiedPath = f.makeQualified(this.uri, this.workingDir);
        final FileStatus status = this.getFileStatus(qualifiedPath);
        PathFilter filter = new PathFilter() {
            public boolean accept(Path path) {
                return status.isFile() || !path.equals(f);
            }
        };
        ## 文件块路径对象接收器
        FileStatusAcceptor acceptor = new FileStatusAcceptor.AcceptFilesOnly(qualifiedPath);
        return this.innerList(f, status, filter, acceptor, recursive);
    }

    ## 获取列表中的块路径对象
    public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f) throws IOException {
        return this.listLocatedStatus(f, DEFAULT_FILTER);
    }

    public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f, PathFilter filter) throws IOException {
        ## 文件路径校验
        Path qualifiedPath = f.makeQualified(this.uri, this.workingDir);
        FileStatus status = this.getFileStatus(qualifiedPath);
        FileStatusAcceptor acceptor = new FileStatusAcceptor.AcceptAllButSelf(qualifiedPath);
        return this.innerList(f, status, filter, acceptor, false);
    }

    ## 获取列表文件中的文件块路径对象
    private RemoteIterator<LocatedFileStatus> innerList(Path f, FileStatus status, PathFilter filter, FileStatusAcceptor acceptor, boolean recursive) throws IOException {
        Path qualifiedPath = f.makeQualified(this.uri, this.workingDir);
        String key = this.pathToKey(qualifiedPath);
        if (status.isFile()) {
            LOG.debug("{} is a File", qualifiedPath);
            BlockLocation[] locations = this.getFileBlockLocations(status, 0L, status.getLen());
            return this.store.singleStatusRemoteIterator(filter.accept(f) ? status : null, locations);
        } else {
            return this.store.createLocatedFileStatusIterator(key, this.maxKeys, this, filter, acceptor, recursive ? null : "/");
        }
    }

    ## 可复用
    private boolean mkdir(String key) throws IOException {
        String dirName = key;
        if (StringUtils.isNotEmpty(key)) {
            if (!key.endsWith("/")) {
                dirName = key + "/";
            }

            this.store.storeEmptyFile(dirName);
        }

        return true;
    }

    ## 可复用
    public boolean mkdirs(Path path, FsPermission permission) throws IOException {
        try {
            FileStatus fileStatus = this.getFileStatus(path);
            if (fileStatus.isDirectory()) {
                return true;
            } else {
                throw new FileAlreadyExistsException("Path is a file: " + path);
            }
        } catch (FileNotFoundException var5) {
            this.validatePath(path);
            String key = this.pathToKey(path);
            return this.mkdir(key);
        }
    }

    ## 可复用
    private void validatePath(Path path) throws IOException {
        Path fPart = path.getParent();

        while(true) {
            try {
                FileStatus fileStatus = this.getFileStatus(fPart);
                if (!fileStatus.isDirectory()) {
                    throw new FileAlreadyExistsException(String.format("Can't make directory for path '%s', it is a file.", fPart));
                }
                break;
            } catch (FileNotFoundException var4) {
                fPart = fPart.getParent();
                if (fPart == null) {
                    break;
                }
            }
        }

    }

    ## 打开文件对象
    public FSDataInputStream open(Path path, int bufferSize) throws IOException {
        FileStatus fileStatus = this.getFileStatus(path);
        if (fileStatus.isDirectory()) {
            throw new FileNotFoundException("Can't open " + path + " because it is a directory");
        } else {
            return new FSDataInputStream(new AliyunOSSInputStream(this.getConf(), new SemaphoredDelegatingExecutor(this.boundedThreadPool, this.maxReadAheadPartNumber, true), this.maxReadAheadPartNumber, this.store, this.pathToKey(path), fileStatus.getLen(), this.statistics));
        }
    }

    ## 重命名文件路径(可复用)
    public boolean rename(Path srcPath, Path dstPath) throws IOException {
        if (srcPath.isRoot()) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Cannot rename the root of a filesystem");
            }

            return false;
        } else {
            Path parent;
            for(parent = dstPath.getParent(); parent != null && !srcPath.equals(parent); parent = parent.getParent()) {
            }

            if (parent != null) {
                return false;
            } else {
                FileStatus srcStatus = this.getFileStatus(srcPath);

                FileStatus dstStatus;
                try {
                    dstStatus = this.getFileStatus(dstPath);
                } catch (FileNotFoundException var9) {
                    dstStatus = null;
                }

                if (dstStatus == null) {
                    dstStatus = this.getFileStatus(dstPath.getParent());
                    if (!dstStatus.isDirectory()) {
                        throw new IOException(String.format("Failed to rename %s to %s, %s is a file", srcPath, dstPath, dstPath.getParent()));
                    }
                } else {
                    if (srcStatus.getPath().equals(dstStatus.getPath())) {
                        return !srcStatus.isDirectory();
                    }

                    if (!dstStatus.isDirectory()) {
                        throw new FileAlreadyExistsException(String.format("Failed to rename %s to %s, file already exists!", srcPath, dstPath));
                    }

                    dstPath = new Path(dstPath, srcPath.getName());

                    FileStatus[] statuses;
                    try {
                        statuses = this.listStatus(dstPath);
                    } catch (FileNotFoundException var8) {
                        statuses = null;
                    }

                    if (statuses != null && statuses.length > 0) {
                        throw new FileAlreadyExistsException(String.format("Failed to rename %s to %s, file already exists or not empty!", srcPath, dstPath));
                    }
                }

                boolean succeed;
                if (srcStatus.isDirectory()) {
                    succeed = this.copyDirectory(srcPath, dstPath);
                } else {
                    succeed = this.copyFile(srcPath, srcStatus.getLen(), dstPath);
                }

                return srcPath.equals(dstPath) || succeed && this.delete(srcPath, true);
            }
        }
    }

    ## 复制文件(可复用)
    private boolean copyFile(Path srcPath, long srcLen, Path dstPath) {
        String srcKey = this.pathToKey(srcPath);
        String dstKey = this.pathToKey(dstPath);
        return this.store.copyFile(srcKey, srcLen, dstKey);
    }

    ## 复制文件夹(可复用)
    private boolean copyDirectory(Path srcPath, Path dstPath) throws IOException {
        ## 文件路径校验加上格式符
        String srcKey = AliyunOSSUtils.maybeAddTrailingSlash(this.pathToKey(srcPath));
        ## 文件路径校验加上格式符
        String dstKey = AliyunOSSUtils.maybeAddTrailingSlash(this.pathToKey(dstPath));
        if (dstKey.startsWith(srcKey)) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Cannot rename a directory to a subdirectory of self");
            }

            return false;
        } else {
            this.store.storeEmptyFile(dstKey);
            AliyunOSSCopyFileContext copyFileContext = new AliyunOSSCopyFileContext();
            ExecutorService executorService = MoreExecutors.listeningDecorator(new SemaphoredDelegatingExecutor(this.boundedCopyThreadPool, this.maxConcurrentCopyTasksPerDir, true));
            ObjectListing objects = this.store.listObjects(srcKey, this.maxKeys, (String)null, true);
            int copiesToFinish = 0;

            while(true) {
                Iterator var9 = objects.getObjectSummaries().iterator();

                while(var9.hasNext()) {
                    OSSObjectSummary objectSummary = (OSSObjectSummary)var9.next();
                    String newKey = dstKey.concat(objectSummary.getKey().substring(srcKey.length()));
                    executorService.execute(new AliyunOSSCopyFileTask(this.store, objectSummary.getKey(), objectSummary.getSize(), newKey, copyFileContext));
                    ++copiesToFinish;
                    if (copyFileContext.isCopyFailure()) {
                        break;
                    }
                }

                if (!objects.isTruncated()) {
                    copyFileContext.lock();

                    try {
                        copyFileContext.awaitAllFinish(copiesToFinish);
                    } catch (InterruptedException var15) {
                        LOG.warn("interrupted when wait copies to finish");
                    } finally {
                        copyFileContext.unlock();
                    }

                    return !copyFileContext.isCopyFailure();
                }

                String nextMarker = objects.getNextMarker();
                objects = this.store.listObjects(srcKey, this.maxKeys, nextMarker, true);
            }
        }
    }

    public void setWorkingDirectory(Path dir) {
        this.workingDir = dir;
    }

    public AliyunOSSFileSystemStore getStore() {
        return this.store;
    }
}
7、AliyunOSSFileSystemStore
package org.apache.hadoop.fs.aliyun.oss;

import com.aliyun.oss.ClientConfiguration;
import com.aliyun.oss.ClientException;
import com.aliyun.oss.OSSClient;
import com.aliyun.oss.OSSException;
import com.aliyun.oss.common.auth.CredentialsProvider;
import com.aliyun.oss.common.comm.Protocol;
import com.aliyun.oss.model.AbortMultipartUploadRequest;
import com.aliyun.oss.model.CannedAccessControlList;
import com.aliyun.oss.model.CompleteMultipartUploadRequest;
import com.aliyun.oss.model.CompleteMultipartUploadResult;
import com.aliyun.oss.model.CopyObjectResult;
import com.aliyun.oss.model.DeleteObjectsRequest;
import com.aliyun.oss.model.DeleteObjectsResult;
import com.aliyun.oss.model.GenericRequest;
import com.aliyun.oss.model.GetObjectRequest;
import com.aliyun.oss.model.InitiateMultipartUploadRequest;
import com.aliyun.oss.model.InitiateMultipartUploadResult;
import com.aliyun.oss.model.ListObjectsRequest;
import com.aliyun.oss.model.OSSObjectSummary;
import com.aliyun.oss.model.ObjectListing;
import com.aliyun.oss.model.ObjectMetadata;
import com.aliyun.oss.model.PartETag;
import com.aliyun.oss.model.PutObjectResult;
import com.aliyun.oss.model.UploadPartCopyRequest;
import com.aliyun.oss.model.UploadPartCopyResult;
import com.aliyun.oss.model.UploadPartRequest;
import com.aliyun.oss.model.UploadPartResult;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.NoSuchElementException;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.util.VersionInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

## 阿里云OSS文件系统存储
public class AliyunOSSFileSystemStore {
    public static final Logger LOG = LoggerFactory.getLogger(AliyunOSSFileSystemStore.class);
    private String username;
    private FileSystem.Statistics statistics;
    private OSSClient ossClient;
    private String bucketName;
    private long uploadPartSize;
    private int maxKeys;
    private String serverSideEncryptionAlgorithm;

    public AliyunOSSFileSystemStore() {
    }

    public void initialize(URI uri, Configuration conf, String user, FileSystem.Statistics stat) throws IOException {
        this.username = user;
        this.statistics = stat;
        ClientConfiguration clientConf = new ClientConfiguration();
        clientConf.setMaxConnections(conf.getInt("fs.oss.connection.maximum", 32));
        boolean secureConnections = conf.getBoolean("fs.oss.connection.secure.enabled", true);
        clientConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP);
        clientConf.setMaxErrorRetry(conf.getInt("fs.oss.attempts.maximum", 10));
        clientConf.setConnectionTimeout(conf.getInt("fs.oss.connection.establish.timeout", 50000));
        clientConf.setSocketTimeout(conf.getInt("fs.oss.connection.timeout", 200000));
        clientConf.setUserAgent(conf.get("fs.oss.user.agent.prefix", Constants.USER_AGENT_PREFIX_DEFAULT) + ", Hadoop/" + VersionInfo.getVersion());
        String proxyHost = conf.getTrimmed("fs.oss.proxy.host", "");
        int proxyPort = conf.getInt("fs.oss.proxy.port", -1);
        String proxyUsername;
        String cannedACLName;
        if (StringUtils.isNotEmpty(proxyHost)) {
            clientConf.setProxyHost(proxyHost);
            if (proxyPort >= 0) {
                clientConf.setProxyPort(proxyPort);
            } else if (secureConnections) {
                LOG.warn("Proxy host set without port. Using HTTPS default 443");
                clientConf.setProxyPort(443);
            } else {
                LOG.warn("Proxy host set without port. Using HTTP default 80");
                clientConf.setProxyPort(80);
            }

            proxyUsername = conf.getTrimmed("fs.oss.proxy.username");
            String proxyPassword = conf.getTrimmed("fs.oss.proxy.password");
            if (proxyUsername == null != (proxyPassword == null)) {
                cannedACLName = "Proxy error: fs.oss.proxy.username or fs.oss.proxy.password set without the other.";
                LOG.error(cannedACLName);
                throw new IllegalArgumentException(cannedACLName);
            }

            clientConf.setProxyUsername(proxyUsername);
            clientConf.setProxyPassword(proxyPassword);
            clientConf.setProxyDomain(conf.getTrimmed("fs.oss.proxy.domain"));
            clientConf.setProxyWorkstation(conf.getTrimmed("fs.oss.proxy.workstation"));
        } else if (proxyPort >= 0) {
            proxyUsername = "Proxy error: fs.oss.proxy.port set without fs.oss.proxy.host";
            LOG.error(proxyUsername);
            throw new IllegalArgumentException(proxyUsername);
        }

        proxyUsername = conf.getTrimmed("fs.oss.endpoint", "");
        if (StringUtils.isEmpty(proxyUsername)) {
            throw new IllegalArgumentException("Aliyun OSS endpoint should not be null or empty. Please set proper endpoint with 'fs.oss.endpoint'.");
        } else {
            CredentialsProvider provider = AliyunOSSUtils.getCredentialsProvider(uri, conf);
            this.ossClient = new OSSClient(proxyUsername, provider, clientConf);
            this.uploadPartSize = AliyunOSSUtils.getMultipartSizeProperty(conf, "fs.oss.multipart.upload.size", 104857600L);
            this.serverSideEncryptionAlgorithm = conf.get("fs.oss.server-side-encryption-algorithm", "");
            this.bucketName = uri.getHost();
            cannedACLName = conf.get("fs.oss.acl.default", "");
            if (StringUtils.isNotEmpty(cannedACLName)) {
                CannedAccessControlList cannedACL = CannedAccessControlList.valueOf(cannedACLName);
                this.ossClient.setBucketAcl(this.bucketName, cannedACL);
                this.statistics.incrementWriteOps(1);
            }

            this.maxKeys = conf.getInt("fs.oss.paging.maximum", 1000);
        }
    }

    public void deleteObject(String key) {
        this.ossClient.deleteObject(this.bucketName, key);
        this.statistics.incrementWriteOps(1);
    }

    public void deleteObjects(List<String> keysToDelete) throws IOException {
        if (CollectionUtils.isEmpty(keysToDelete)) {
            LOG.warn("Keys to delete is empty.");
        } else {
            int retry = 10;
            int tries = 0;
            List<String> deleteFailed = keysToDelete;

            while(CollectionUtils.isNotEmpty(deleteFailed)) {
                DeleteObjectsRequest deleteRequest = new DeleteObjectsRequest(this.bucketName);
                deleteRequest.setKeys(deleteFailed);
                deleteRequest.setQuiet(true);
                DeleteObjectsResult result = this.ossClient.deleteObjects(deleteRequest);
                this.statistics.incrementWriteOps(1);
                deleteFailed = result.getDeletedObjects();
                ++tries;
                if (tries == retry) {
                    break;
                }
            }

            if (tries == retry && CollectionUtils.isNotEmpty(deleteFailed)) {
                throw new IOException("Failed to delete Aliyun OSS objects for " + tries + " times.");
            }
        }
    }

    public void deleteDirs(String key) throws IOException {
        key = AliyunOSSUtils.maybeAddTrailingSlash(key);
        ListObjectsRequest listRequest = new ListObjectsRequest(this.bucketName);
        listRequest.setPrefix(key);
        listRequest.setDelimiter((String)null);
        listRequest.setMaxKeys(this.maxKeys);

        while(true) {
            ObjectListing objects = this.ossClient.listObjects(listRequest);
            this.statistics.incrementReadOps(1);
            List<String> keysToDelete = new ArrayList();
            Iterator var5 = objects.getObjectSummaries().iterator();

            while(var5.hasNext()) {
                OSSObjectSummary objectSummary = (OSSObjectSummary)var5.next();
                keysToDelete.add(objectSummary.getKey());
            }

            this.deleteObjects(keysToDelete);
            if (!objects.isTruncated()) {
                return;
            }

            listRequest.setMarker(objects.getNextMarker());
        }
    }

    public ObjectMetadata getObjectMetadata(String key) {
        try {
            GenericRequest request = new GenericRequest(this.bucketName, key);
            request.setLogEnabled(false);
            ObjectMetadata objectMeta = this.ossClient.getObjectMetadata(request);
            this.statistics.incrementReadOps(1);
            return objectMeta;
        } catch (OSSException var4) {
            LOG.debug("Exception thrown when get object meta: " + key + ", exception: " + var4);
            return null;
        }
    }

    public void storeEmptyFile(String key) throws IOException {
        ObjectMetadata dirMeta = new ObjectMetadata();
        byte[] buffer = new byte[0];
        ByteArrayInputStream in = new ByteArrayInputStream(buffer);
        dirMeta.setContentLength(0L);

        try {
            this.ossClient.putObject(this.bucketName, key, in, dirMeta);
            this.statistics.incrementWriteOps(1);
        } finally {
            in.close();
        }

    }

    public boolean copyFile(String srcKey, long srcLen, String dstKey) {
        try {
            return this.singleCopy(srcKey, dstKey);
        } catch (Exception var6) {
            LOG.debug("Exception thrown when copy file: " + srcKey + ", exception: " + var6 + ", use multipartCopy instead");
            return this.multipartCopy(srcKey, srcLen, dstKey);
        }
    }

    private boolean singleCopy(String srcKey, String dstKey) {
        CopyObjectResult copyResult = this.ossClient.copyObject(this.bucketName, srcKey, this.bucketName, dstKey);
        this.statistics.incrementWriteOps(1);
        LOG.debug(copyResult.getETag());
        return true;
    }

    private boolean multipartCopy(String srcKey, long contentLength, String dstKey) {
        long realPartSize = AliyunOSSUtils.calculatePartSize(contentLength, this.uploadPartSize);
        int partNum = (int)(contentLength / realPartSize);
        if (contentLength % realPartSize != 0L) {
            ++partNum;
        }

        InitiateMultipartUploadRequest initiateMultipartUploadRequest = new InitiateMultipartUploadRequest(this.bucketName, dstKey);
        ObjectMetadata meta = new ObjectMetadata();
        if (StringUtils.isNotEmpty(this.serverSideEncryptionAlgorithm)) {
            meta.setServerSideEncryption(this.serverSideEncryptionAlgorithm);
        }

        initiateMultipartUploadRequest.setObjectMetadata(meta);
        InitiateMultipartUploadResult initiateMultipartUploadResult = this.ossClient.initiateMultipartUpload(initiateMultipartUploadRequest);
        String uploadId = initiateMultipartUploadResult.getUploadId();
        List<PartETag> partETags = new ArrayList();

        try {
            for(int i = 0; i < partNum; ++i) {
                long skipBytes = realPartSize * (long)i;
                long size = realPartSize < contentLength - skipBytes ? realPartSize : contentLength - skipBytes;
                UploadPartCopyRequest partCopyRequest = new UploadPartCopyRequest();
                partCopyRequest.setSourceBucketName(this.bucketName);
                partCopyRequest.setSourceKey(srcKey);
                partCopyRequest.setBucketName(this.bucketName);
                partCopyRequest.setKey(dstKey);
                partCopyRequest.setUploadId(uploadId);
                partCopyRequest.setPartSize(size);
                partCopyRequest.setBeginIndex(skipBytes);
                partCopyRequest.setPartNumber(i + 1);
                UploadPartCopyResult partCopyResult = this.ossClient.uploadPartCopy(partCopyRequest);
                this.statistics.incrementWriteOps(1);
                this.statistics.incrementBytesWritten(size);
                partETags.add(partCopyResult.getPartETag());
            }

            CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest(this.bucketName, dstKey, uploadId, partETags);
            CompleteMultipartUploadResult completeMultipartUploadResult = this.ossClient.completeMultipartUpload(completeMultipartUploadRequest);
            LOG.debug(completeMultipartUploadResult.getETag());
            return true;
        } catch (ClientException | OSSException var20) {
            AbortMultipartUploadRequest abortMultipartUploadRequest = new AbortMultipartUploadRequest(this.bucketName, dstKey, uploadId);
            this.ossClient.abortMultipartUpload(abortMultipartUploadRequest);
            return false;
        }
    }

    public void uploadObject(String key, File file) throws IOException {
        File object = file.getAbsoluteFile();
        FileInputStream fis = new FileInputStream(object);
        ObjectMetadata meta = new ObjectMetadata();
        meta.setContentLength(object.length());
        if (StringUtils.isNotEmpty(this.serverSideEncryptionAlgorithm)) {
            meta.setServerSideEncryption(this.serverSideEncryptionAlgorithm);
        }

        try {
            PutObjectResult result = this.ossClient.putObject(this.bucketName, key, fis, meta);
            LOG.debug(result.getETag());
            this.statistics.incrementWriteOps(1);
        } finally {
            fis.close();
        }

    }

    public ObjectListing listObjects(String prefix, int maxListingLength, String marker, boolean recursive) {
        String delimiter = recursive ? null : "/";
        prefix = AliyunOSSUtils.maybeAddTrailingSlash(prefix);
        ListObjectsRequest listRequest = new ListObjectsRequest(this.bucketName);
        listRequest.setPrefix(prefix);
        listRequest.setDelimiter(delimiter);
        listRequest.setMaxKeys(maxListingLength);
        listRequest.setMarker(marker);
        ObjectListing listing = this.ossClient.listObjects(listRequest);
        this.statistics.incrementReadOps(1);
        return listing;
    }

    public InputStream retrieve(String key, long byteStart, long byteEnd) {
        try {
            GetObjectRequest request = new GetObjectRequest(this.bucketName, key);
            request.setRange(byteStart, byteEnd);
            InputStream in = this.ossClient.getObject(request).getObjectContent();
            this.statistics.incrementReadOps(1);
            return in;
        } catch (ClientException | OSSException var8) {
            LOG.error("Exception thrown when store retrieves key: " + key + ", exception: " + var8);
            return null;
        }
    }

    public void close() {
        if (this.ossClient != null) {
            this.ossClient.shutdown();
            this.ossClient = null;
        }

    }

    public void purge(String prefix) throws IOException {
        try {
            ObjectListing objects = this.listObjects(prefix, this.maxKeys, (String)null, true);
            Iterator var4 = objects.getObjectSummaries().iterator();

            while(var4.hasNext()) {
                OSSObjectSummary object = (OSSObjectSummary)var4.next();
                String key = object.getKey();
                this.ossClient.deleteObject(this.bucketName, key);
                this.statistics.incrementWriteOps(1);
            }

            var4 = objects.getCommonPrefixes().iterator();

            while(var4.hasNext()) {
                String dir = (String)var4.next();
                this.deleteDirs(dir);
            }
        } catch (ClientException | OSSException var6) {
            LOG.error("Failed to purge " + prefix);
        }

    }

    public RemoteIterator<LocatedFileStatus> singleStatusRemoteIterator(final FileStatus fileStatus, final BlockLocation[] locations) {
        return new RemoteIterator<LocatedFileStatus>() {
            private boolean hasNext = true;

            public boolean hasNext() throws IOException {
                return fileStatus != null && this.hasNext;
            }

            public LocatedFileStatus next() throws IOException {
                if (this.hasNext()) {
                    LocatedFileStatus s = new LocatedFileStatus(fileStatus, fileStatus.isFile() ? locations : null);
                    this.hasNext = false;
                    return s;
                } else {
                    throw new NoSuchElementException();
                }
            }
        };
    }

    public RemoteIterator<LocatedFileStatus> createLocatedFileStatusIterator(final String prefix, final int maxListingLength, final FileSystem fs, final PathFilter filter, final FileStatusAcceptor acceptor, final String delimiter) {
        return new RemoteIterator<LocatedFileStatus>() {
            private String nextMarker = null;
            private boolean firstListing = true;
            private boolean meetEnd = false;
            private ListIterator<FileStatus> batchIterator;

            public boolean hasNext() throws IOException {
                if (this.firstListing) {
                    this.requestNextBatch();
                    this.firstListing = false;
                }

                return this.batchIterator.hasNext() || this.requestNextBatch();
            }

            public LocatedFileStatus next() throws IOException {
                if (this.hasNext()) {
                    FileStatus status = (FileStatus)this.batchIterator.next();
                    BlockLocation[] locations = fs.getFileBlockLocations(status, 0L, status.getLen());
                    return new LocatedFileStatus(status, status.isFile() ? locations : null);
                } else {
                    throw new NoSuchElementException();
                }
            }

            private boolean requestNextBatch() {
                if (this.meetEnd) {
                    return false;
                } else {
                    ListObjectsRequest listRequest = new ListObjectsRequest(AliyunOSSFileSystemStore.this.bucketName);
                    listRequest.setPrefix(AliyunOSSUtils.maybeAddTrailingSlash(prefix));
                    listRequest.setMaxKeys(maxListingLength);
                    listRequest.setMarker(this.nextMarker);
                    listRequest.setDelimiter(delimiter);
                    ObjectListing listing = AliyunOSSFileSystemStore.this.ossClient.listObjects(listRequest);
                    List<FileStatus> stats = new ArrayList(listing.getObjectSummaries().size() + listing.getCommonPrefixes().size());
                    Iterator var4 = listing.getObjectSummaries().iterator();

                    while(var4.hasNext()) {
                        OSSObjectSummary summary = (OSSObjectSummary)var4.next();
                        String key = summary.getKey();
                        Path path = fs.makeQualified(new Path("/" + key));
                        if (filter.accept(path) && acceptor.accept(path, summary)) {
                            FileStatus statusx = new OSSFileStatus(summary.getSize(), key.endsWith("/"), 1, fs.getDefaultBlockSize(path), summary.getLastModified().getTime(), path, AliyunOSSFileSystemStore.this.username);
                            stats.add(statusx);
                        }
                    }

                    var4 = listing.getCommonPrefixes().iterator();

                    while(var4.hasNext()) {
                        String commonPrefix = (String)var4.next();
                        Path pathx = fs.makeQualified(new Path("/" + commonPrefix));
                        if (filter.accept(pathx) && acceptor.accept(pathx, commonPrefix)) {
                            FileStatus status = new OSSFileStatus(0L, true, 1, 0L, 0L, pathx, AliyunOSSFileSystemStore.this.username);
                            stats.add(status);
                        }
                    }

                    this.batchIterator = stats.listIterator();
                    if (listing.isTruncated()) {
                        this.nextMarker = listing.getNextMarker();
                    } else {
                        this.meetEnd = true;
                    }

                    AliyunOSSFileSystemStore.this.statistics.incrementReadOps(1);
                    return this.batchIterator.hasNext();
                }
            }
        };
    }

    public PartETag uploadPart(File file, String key, String uploadId, int idx) throws IOException {
        InputStream instream = null;
        Exception caught = null;

        for(int tries = 3; tries > 0; --tries) {
            try {
                instream = new FileInputStream(file);
                UploadPartRequest uploadRequest = new UploadPartRequest();
                uploadRequest.setBucketName(this.bucketName);
                uploadRequest.setKey(key);
                uploadRequest.setUploadId(uploadId);
                uploadRequest.setInputStream(instream);
                uploadRequest.setPartSize(file.length());
                uploadRequest.setPartNumber(idx);
                UploadPartResult uploadResult = this.ossClient.uploadPart(uploadRequest);
                this.statistics.incrementWriteOps(1);
                PartETag var10 = uploadResult.getPartETag();
                return var10;
            } catch (Exception var14) {
                LOG.debug("Failed to upload " + file.getPath() + ", try again.", var14);
                caught = var14;
            } finally {
                if (instream != null) {
                    instream.close();
                    instream = null;
                }

            }
        }

        assert caught != null;

        throw new IOException("Failed to upload " + file.getPath() + " for 3 times.", caught);
    }

    public String getUploadId(String key) {
        InitiateMultipartUploadRequest initiateMultipartUploadRequest = new InitiateMultipartUploadRequest(this.bucketName, key);
        InitiateMultipartUploadResult initiateMultipartUploadResult = this.ossClient.initiateMultipartUpload(initiateMultipartUploadRequest);
        return initiateMultipartUploadResult.getUploadId();
    }

    public CompleteMultipartUploadResult completeMultipartUpload(String key, String uploadId, List<PartETag> partETags) {
        Collections.sort(partETags, new PartNumberAscendComparator());
        CompleteMultipartUploadRequest completeMultipartUploadRequest = new CompleteMultipartUploadRequest(this.bucketName, key, uploadId, partETags);
        return this.ossClient.completeMultipartUpload(completeMultipartUploadRequest);
    }

    public void abortMultipartUpload(String key, String uploadId) {
        AbortMultipartUploadRequest request = new AbortMultipartUploadRequest(this.bucketName, key, uploadId);
        this.ossClient.abortMultipartUpload(request);
    }

    private static class PartNumberAscendComparator implements Comparator<PartETag>, Serializable {
        private PartNumberAscendComparator() {
        }

        public int compare(PartETag o1, PartETag o2) {
            return o1.getPartNumber() > o2.getPartNumber() ? 1 : -1;
        }
    }
}
8、AliyunOSSInputStream
package org.apache.hadoop.fs.aliyun.oss;

import com.google.common.util.concurrent.MoreExecutors;
import java.io.EOFException;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.Queue;
import java.util.concurrent.ExecutorService;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.aliyun.oss.ReadBuffer.STATUS;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class AliyunOSSInputStream extends FSInputStream {
    public static final Logger LOG = LoggerFactory.getLogger(AliyunOSSInputStream.class);
    private final long downloadPartSize;
    private AliyunOSSFileSystemStore store;
    private final String key;
    private FileSystem.Statistics statistics;
    private boolean closed;
    private long contentLength;
    private long position;
    private long partRemaining;
    private byte[] buffer;
    private int maxReadAheadPartNumber;
    private long expectNextPos;
    private long lastByteStart;
    private ExecutorService readAheadExecutorService;
    private Queue<ReadBuffer> readBufferQueue = new ArrayDeque();

    public AliyunOSSInputStream(Configuration conf, ExecutorService readAheadExecutorService, int maxReadAheadPartNumber, AliyunOSSFileSystemStore store, String key, Long contentLength, FileSystem.Statistics statistics) throws IOException {
        this.readAheadExecutorService = MoreExecutors.listeningDecorator(readAheadExecutorService);
        this.store = store;
        this.key = key;
        this.statistics = statistics;
        this.contentLength = contentLength;
        this.downloadPartSize = conf.getLong("fs.oss.multipart.download.size", 524288L);
        this.maxReadAheadPartNumber = maxReadAheadPartNumber;
        this.expectNextPos = 0L;
        this.lastByteStart = -1L;
        this.reopen(0L);
        this.closed = false;
    }

    ## 重新打开输入流
    private synchronized void reopen(long pos) throws IOException {
        if (pos < 0L) {
            throw new EOFException("Cannot seek at negative position:" + pos);
        } else if (pos > this.contentLength) {
            throw new EOFException("Cannot seek after EOF, contentLength:" + this.contentLength + " position:" + pos);
        } else {
            long partSize;
            if (pos + this.downloadPartSize > this.contentLength) {
                partSize = this.contentLength - pos;
            } else {
                partSize = this.downloadPartSize;
            }

            if (this.buffer != null) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Aborting old stream to open at pos " + pos);
                }

                this.buffer = null;
            }

            boolean isRandomIO = true;
            if (pos == this.expectNextPos) {
                isRandomIO = false;
            } else {
                while(this.readBufferQueue.size() != 0 && ((ReadBuffer)this.readBufferQueue.element()).getByteStart() != pos) {
                    this.readBufferQueue.poll();
                }
            }

            this.expectNextPos = pos + partSize;
            int currentSize = this.readBufferQueue.size();
            if (currentSize == 0) {
                this.lastByteStart = pos - partSize;
            } else {
                ReadBuffer[] readBuffers = (ReadBuffer[])this.readBufferQueue.toArray(new ReadBuffer[currentSize]);
                this.lastByteStart = readBuffers[currentSize - 1].getByteStart();
            }

            int maxLen = this.maxReadAheadPartNumber - currentSize;

            for(int i = 0; i < maxLen && i < (currentSize + 1) * 2 && this.lastByteStart + partSize * (long)(i + 1) <= this.contentLength; ++i) {
                long byteStart = this.lastByteStart + partSize * (long)(i + 1);
                long byteEnd = byteStart + partSize - 1L;
                if (byteEnd >= this.contentLength) {
                    byteEnd = this.contentLength - 1L;
                }

                ReadBuffer readBuffer = new ReadBuffer(byteStart, byteEnd);
                if (readBuffer.getBuffer().length == 0) {
                    readBuffer.setStatus(STATUS.SUCCESS);
                } else {
                    this.readAheadExecutorService.execute(new AliyunOSSFileReaderTask(this.key, this.store, readBuffer));
                }

                this.readBufferQueue.add(readBuffer);
                if (isRandomIO) {
                    break;
                }
            }

            ReadBuffer readBuffer = (ReadBuffer)this.readBufferQueue.poll();
            readBuffer.lock();

            try {
                readBuffer.await(STATUS.INIT);
                if (readBuffer.getStatus() == STATUS.ERROR) {
                    this.buffer = null;
                } else {
                    this.buffer = readBuffer.getBuffer();
                }
            } catch (InterruptedException var17) {
                LOG.warn("interrupted when wait a read buffer");
            } finally {
                readBuffer.unlock();
            }

            if (this.buffer == null) {
                throw new IOException("Null IO stream");
            } else {
                this.position = pos;
                this.partRemaining = partSize;
            }
        }
    }

    ## 输入流读取
    public synchronized int read() throws IOException {
        this.checkNotClosed();
        if (this.partRemaining <= 0L && this.position < this.contentLength) {
            this.reopen(this.position);
        }

        int byteRead = -1;
        if (this.partRemaining != 0L) {
            byteRead = this.buffer[this.buffer.length - (int)this.partRemaining] & 255;
        }

        if (byteRead >= 0) {
            ++this.position;
            --this.partRemaining;
        }

        if (this.statistics != null && byteRead >= 0) {
            this.statistics.incrementBytesRead((long)byteRead);
        }

        return byteRead;
    }

    private void checkNotClosed() throws IOException {
        if (this.closed) {
            throw new IOException("Stream is closed!");
        }
    }

    public synchronized int read(byte[] buf, int off, int len) throws IOException {
        this.checkNotClosed();
        if (buf == null) {
            throw new NullPointerException();
        } else if (off >= 0 && len >= 0 && len <= buf.length - off) {
            if (len == 0) {
                return 0;
            } else {
                int bytesRead = 0;

                while(this.position < this.contentLength && bytesRead < len) {
                    if (this.partRemaining == 0L) {
                        this.reopen(this.position);
                    }

                    int bytes = 0;

                    for(int i = this.buffer.length - (int)this.partRemaining; i < this.buffer.length; ++i) {
                        buf[off + bytesRead] = this.buffer[i];
                        ++bytes;
                        ++bytesRead;
                        if (off + bytesRead >= len) {
                            break;
                        }
                    }

                    if (bytes > 0) {
                        this.position += (long)bytes;
                        this.partRemaining -= (long)bytes;
                    } else if (this.partRemaining != 0L) {
                        throw new IOException("Failed to read from stream. Remaining:" + this.partRemaining);
                    }
                }

                if (this.statistics != null && bytesRead > 0) {
                    this.statistics.incrementBytesRead((long)bytesRead);
                }

                return bytesRead == 0 && len > 0 ? -1 : bytesRead;
            }
        } else {
            throw new IndexOutOfBoundsException();
        }
    }

    public synchronized void close() throws IOException {
        if (!this.closed) {
            this.closed = true;
            this.buffer = null;
        }
    }

    public synchronized int available() throws IOException {
        this.checkNotClosed();
        long remaining = this.contentLength - this.position;
        return remaining > 2147483647L ? Integer.MAX_VALUE : (int)remaining;
    }

    public synchronized void seek(long pos) throws IOException {
        this.checkNotClosed();
        if (this.position != pos) {
            if (pos > this.position && pos < this.position + this.partRemaining) {
                long len = pos - this.position;
                this.position = pos;
                this.partRemaining -= len;
            } else {
                this.reopen(pos);
            }

        }
    }

    public synchronized long getPos() throws IOException {
        this.checkNotClosed();
        return this.position;
    }

    public boolean seekToNewSource(long targetPos) throws IOException {
        this.checkNotClosed();
        return false;
    }

    public long getExpectNextPos() {
        return this.expectNextPos;
    }
}
9、AliyunOSSUtils
package org.apache.hadoop.fs.aliyun.oss;

import com.aliyun.oss.common.auth.CredentialsProvider;
import com.google.common.base.Preconditions;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.security.ProviderUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class AliyunOSSUtils {
    private static final Logger LOG = LoggerFactory.getLogger(AliyunOSSUtils.class);
    private static LocalDirAllocator directoryAllocator;

    private AliyunOSSUtils() {
    }

    public static int intPositiveOption(Configuration conf, String key, int defVal) {
        int v = conf.getInt(key, defVal);
        if (v <= 0) {
            LOG.warn(key + " is configured to " + v + ", will use default value: " + defVal);
            v = defVal;
        }

        return v;
    }

    ## 通过key获取值
    public static String getValueWithKey(Configuration conf, String key) throws IOException {
        try {
            char[] pass = conf.getPassword(key);
            return pass != null ? (new String(pass)).trim() : "";
        } catch (IOException var3) {
            throw new IOException("Cannot find password option " + key, var3);
        }
    }

    ## 计算分区块大小
    public static long calculatePartSize(long contentLength, long minPartSize) {
        long tmpPartSize = contentLength / 10000L + 1L;
        return Math.max(minPartSize, tmpPartSize);
    }

    ## 获取鉴权提供器
    public static CredentialsProvider getCredentialsProvider(URI uri, Configuration conf) throws IOException {
        String className = conf.getTrimmed("fs.oss.credentials.provider");
        Object credentials;
        if (StringUtils.isEmpty(className)) {
            Configuration newConf = ProviderUtils.excludeIncompatibleCredentialProviders(conf, AliyunOSSFileSystem.class);
            credentials = new AliyunCredentialsProvider(newConf);
        } else {
            try {
                LOG.debug("Credential provider class is:" + className);
                Class<?> credClass = Class.forName(className);

                try {
                    credentials = (CredentialsProvider)credClass.getDeclaredConstructor(URI.class, Configuration.class).newInstance(uri, conf);
                } catch (SecurityException | NoSuchMethodException var6) {
                    credentials = (CredentialsProvider)credClass.getDeclaredConstructor().newInstance();
                }
            } catch (ClassNotFoundException var7) {
                throw new IOException(className + " not found.", var7);
            } catch (SecurityException | NoSuchMethodException var8) {
                throw new IOException(String.format("%s constructor exception.  A class specified in %s must provide an accessible constructor accepting URI and Configuration, or an accessible default constructor.", className, "fs.oss.credentials.provider"), var8);
            } catch (IllegalArgumentException | ReflectiveOperationException var9) {
                throw new IOException(className + " instantiation exception.", var9);
            }
        }

        return (CredentialsProvider)credentials;
    }

    public static String maybeAddTrailingSlash(String key) {
        return StringUtils.isNotEmpty(key) && !key.endsWith("/") ? key + '/' : key;
    }

    public static boolean objectRepresentsDirectory(String name, long size) {
        return StringUtils.isNotEmpty(name) && name.endsWith("/") && size == 0L;
    }

    public static File createTmpFileForWrite(String path, long size, Configuration conf) throws IOException {
        if (conf.get("fs.oss.buffer.dir") == null) {
            conf.set("fs.oss.buffer.dir", conf.get("hadoop.tmp.dir") + "/oss");
        }

        if (directoryAllocator == null) {
            directoryAllocator = new LocalDirAllocator("fs.oss.buffer.dir");
        }

        return directoryAllocator.createTmpFileForWrite(path, size, conf);
    }

    static int intOption(Configuration conf, String key, int defVal, int min) {
        int v = conf.getInt(key, defVal);
        Preconditions.checkArgument(v >= min, String.format("Value of %s: %d is below the minimum value %d", key, v, min));
        LOG.debug("Value of {} is {}", key, v);
        return v;
    }

    static long longOption(Configuration conf, String key, long defVal, long min) {
        long v = conf.getLong(key, defVal);
        Preconditions.checkArgument(v >= min, String.format("Value of %s: %d is below the minimum value %d", key, v, min));
        LOG.debug("Value of {} is {}", key, v);
        return v;
    }

    public static long getMultipartSizeProperty(Configuration conf, String property, long defVal) {
        long partSize = conf.getLong(property, defVal);
        if (partSize < 102400L) {
            LOG.warn("{} must be at least 100 KB; configured value is {}", property, partSize);
            partSize = 102400L;
        } else if (partSize > 2147483647L) {
            LOG.warn("oss: {} capped to ~2.14GB(maximum allowed size with current output mechanism)", "fs.oss.multipart.upload.size");
            partSize = 2147483647L;
        }

        return partSize;
    }
}
10、Constants
package org.apache.hadoop.fs.aliyun.oss;

import com.aliyun.oss.common.utils.VersionInfoUtils;

public final class Constants {
    public static final String USER_AGENT_PREFIX = "fs.oss.user.agent.prefix";
    public static final String USER_AGENT_PREFIX_DEFAULT = VersionInfoUtils.getDefaultUserAgent();
    public static final String CREDENTIALS_PROVIDER_KEY = "fs.oss.credentials.provider";
    public static final int OSS_DEFAULT_PORT = -1;
    public static final String ACCESS_KEY_ID = "fs.oss.accessKeyId";
    public static final String ACCESS_KEY_SECRET = "fs.oss.accessKeySecret";
    public static final String SECURITY_TOKEN = "fs.oss.securityToken";
    public static final String MAXIMUM_CONNECTIONS_KEY = "fs.oss.connection.maximum";
    public static final int MAXIMUM_CONNECTIONS_DEFAULT = 32;
    public static final String SECURE_CONNECTIONS_KEY = "fs.oss.connection.secure.enabled";
    public static final boolean SECURE_CONNECTIONS_DEFAULT = true;
    public static final String ENDPOINT_KEY = "fs.oss.endpoint";
    public static final String PROXY_HOST_KEY = "fs.oss.proxy.host";
    public static final String PROXY_PORT_KEY = "fs.oss.proxy.port";
    public static final String PROXY_USERNAME_KEY = "fs.oss.proxy.username";
    public static final String PROXY_PASSWORD_KEY = "fs.oss.proxy.password";
    public static final String PROXY_DOMAIN_KEY = "fs.oss.proxy.domain";
    public static final String PROXY_WORKSTATION_KEY = "fs.oss.proxy.workstation";
    public static final String MAX_ERROR_RETRIES_KEY = "fs.oss.attempts.maximum";
    public static final int MAX_ERROR_RETRIES_DEFAULT = 10;
    public static final String ESTABLISH_TIMEOUT_KEY = "fs.oss.connection.establish.timeout";
    public static final int ESTABLISH_TIMEOUT_DEFAULT = 50000;
    public static final String SOCKET_TIMEOUT_KEY = "fs.oss.connection.timeout";
    public static final int SOCKET_TIMEOUT_DEFAULT = 200000;
    public static final String MAX_PAGING_KEYS_KEY = "fs.oss.paging.maximum";
    public static final int MAX_PAGING_KEYS_DEFAULT = 1000;
    public static final String MULTIPART_UPLOAD_PART_SIZE_KEY = "fs.oss.multipart.upload.size";
    public static final long MULTIPART_UPLOAD_PART_SIZE_DEFAULT = 104857600L;
    public static final int MULTIPART_MIN_SIZE = 102400;
    public static final int MULTIPART_UPLOAD_PART_NUM_LIMIT = 10000;
    public static final String MIN_MULTIPART_UPLOAD_THRESHOLD_KEY = "fs.oss.multipart.upload.threshold";
    public static final long MIN_MULTIPART_UPLOAD_THRESHOLD_DEFAULT = 20971520L;
    public static final String MULTIPART_DOWNLOAD_SIZE_KEY = "fs.oss.multipart.download.size";
    public static final long MULTIPART_DOWNLOAD_SIZE_DEFAULT = 524288L;
    public static final String MULTIPART_DOWNLOAD_THREAD_NUMBER_KEY = "fs.oss.multipart.download.threads";
    public static final int MULTIPART_DOWNLOAD_THREAD_NUMBER_DEFAULT = 10;
    public static final String MAX_TOTAL_TASKS_KEY = "fs.oss.max.total.tasks";
    public static final int MAX_TOTAL_TASKS_DEFAULT = 128;
    public static final String MULTIPART_DOWNLOAD_AHEAD_PART_MAX_NUM_KEY = "fs.oss.multipart.download.ahead.part.max.number";
    public static final int MULTIPART_DOWNLOAD_AHEAD_PART_MAX_NUM_DEFAULT = 4;
    public static final String MAX_COPY_TASKS_KEY = "fs.oss.max.copy.tasks";
    public static final int MAX_COPY_TASKS_DEFAULT = 10485760;
    public static final String MAX_COPY_THREADS_NUM_KEY = "fs.oss.max.copy.threads";
    public static final int MAX_COPY_THREADS_DEFAULT = 25;
    public static final String MAX_CONCURRENT_COPY_TASKS_PER_DIR_KEY = "fs.oss.max.copy.tasks.per.dir";
    public static final int MAX_CONCURRENT_COPY_TASKS_PER_DIR_DEFAULT = 5;
    public static final String BUFFER_DIR_KEY = "fs.oss.buffer.dir";
    public static final String CANNED_ACL_KEY = "fs.oss.acl.default";
    public static final String CANNED_ACL_DEFAULT = "";
    public static final String SERVER_SIDE_ENCRYPTION_ALGORITHM_KEY = "fs.oss.server-side-encryption-algorithm";
    public static final String FS_OSS_BLOCK_SIZE_KEY = "fs.oss.block.size";
    public static final int FS_OSS_BLOCK_SIZE_DEFAULT = 67108864;
    public static final String FS_OSS = "oss";
    public static final String KEEPALIVE_TIME_KEY = "fs.oss.threads.keepalivetime";
    public static final int KEEPALIVE_TIME_DEFAULT = 60;
    public static final String UPLOAD_ACTIVE_BLOCKS_KEY = "fs.oss.upload.active.blocks";
    public static final int UPLOAD_ACTIVE_BLOCKS_DEFAULT = 4;

    private Constants() {
    }
}
11、FileStatusAcceptor
package org.apache.hadoop.fs.aliyun.oss;

import com.aliyun.oss.model.OSSObjectSummary;
import org.apache.hadoop.fs.Path;

## 文件状态接收器
public interface FileStatusAcceptor {
    boolean accept(Path var1, OSSObjectSummary var2);

    boolean accept(Path var1, String var2);

    public static class AcceptAllButSelf implements FileStatusAcceptor {
        private final Path qualifiedPath;

        public AcceptAllButSelf(Path qualifiedPath) {
            this.qualifiedPath = qualifiedPath;
        }

        public boolean accept(Path keyPath, OSSObjectSummary summary) {
            return !keyPath.equals(this.qualifiedPath);
        }

        public boolean accept(Path keyPath, String prefix) {
            return !keyPath.equals(this.qualifiedPath);
        }
    }

    public static class AcceptFilesOnly implements FileStatusAcceptor {
        private final Path qualifiedPath;

        public AcceptFilesOnly(Path qualifiedPath) {
            this.qualifiedPath = qualifiedPath;
        }

        public boolean accept(Path keyPath, OSSObjectSummary summary) {
            return !keyPath.equals(this.qualifiedPath) && !AliyunOSSUtils.objectRepresentsDirectory(summary.getKey(), summary.getSize());
        }

        public boolean accept(Path keyPath, String prefix) {
            return false;
        }
    }
}
12、OSS
package org.apache.hadoop.fs.aliyun.oss;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DelegateToFileSystem;

## OSS文件系统父类
public class OSS extends DelegateToFileSystem {
    public OSS(URI theUri, Configuration conf) throws IOException, URISyntaxException {
        super(theUri, new AliyunOSSFileSystem(), conf, "oss", false);
    }

    public int getUriDefaultPort() {
        return -1;
    }
}
13、OSSFileStatus
package org.apache.hadoop.fs.aliyun.oss;

import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;

@Private
@Evolving
## OSS文件状态值
public class OSSFileStatus extends FileStatus {
    public OSSFileStatus(long length, boolean isdir, int blockReplication, long blocksize, long modTime, Path path, String user) {
        super(length, isdir, blockReplication, blocksize, modTime, path);
        this.setOwner(user);
        this.setGroup(user);
    }
}
14、ReadBuffer
package org.apache.hadoop.fs.aliyun.oss;

import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;

## 读取字节缓存块
public class ReadBuffer {
    private final ReentrantLock lock = new ReentrantLock();
    private Condition readyCondition;
    private byte[] buffer;
    private STATUS status;
    private long byteStart;
    private long byteEnd;

    public ReadBuffer(long byteStart, long byteEnd) {
        this.readyCondition = this.lock.newCondition();
        this.buffer = new byte[(int)(byteEnd - byteStart) + 1];
        this.status = ReadBuffer.STATUS.INIT;
        this.byteStart = byteStart;
        this.byteEnd = byteEnd;
    }

    public void lock() {
        this.lock.lock();
    }

    public void unlock() {
        this.lock.unlock();
    }

    public void await(STATUS waitStatus) throws InterruptedException {
        while(this.status == waitStatus) {
            this.readyCondition.await();
        }

    }

    public void signalAll() {
        this.readyCondition.signalAll();
    }

    public byte[] getBuffer() {
        return this.buffer;
    }

    public STATUS getStatus() {
        return this.status;
    }

    public void setStatus(STATUS status) {
        this.status = status;
    }

    public long getByteStart() {
        return this.byteStart;
    }

    public long getByteEnd() {
        return this.byteEnd;
    }

    static enum STATUS {
        INIT,
        SUCCESS,
        ERROR;

        private STATUS() {
        }
    }
}
4.2 minio:7.0.2.jar 中对应的类进行对比改造
1、AliyunOSSUtils——>MinIoUtils
  • 改造点
    • private static final Logger LOG = LoggerFactory.getLogger(AliyunOSSUtils.class);——>类名的修改
    • com.aliyun.oss.common.auth.CredentialsProvider ——>对应的MinIo中的StaticProvider进行修改替换
    • AliyunCredentialsProvider——>MinIoCredentialsProvider进行替换和修改
package org.apache.hadoop.fs.aliyun.oss;

import com.aliyun.oss.common.auth.CredentialsProvider;
import com.google.common.base.Preconditions;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.security.ProviderUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class AliyunOSSUtils {
    private static final Logger LOG = LoggerFactory.getLogger(AliyunOSSUtils.class);
    private static LocalDirAllocator directoryAllocator;

    private AliyunOSSUtils() {
    }
    
    public static int intPositiveOption(Configuration conf, String key, int defVal) {
        int v = conf.getInt(key, defVal);
        if (v <= 0) {
            LOG.warn(key + " is configured to " + v + ", will use default value: " + defVal);
            v = defVal;
        }
    
        return v;
    }
    
    ## 通过key获取值
    public static String getValueWithKey(Configuration conf, String key) throws IOException {
        try {
            char[] pass = conf.getPassword(key);
            return pass != null ? (new String(pass)).trim() : "";
        } catch (IOException var3) {
            throw new IOException("Cannot find password option " + key, var3);
        }
    }
    
    ## 计算分区块大小
    public static long calculatePartSize(long contentLength, long minPartSize) {
        long tmpPartSize = contentLength / 10000L + 1L;
        return Math.max(minPartSize, tmpPartSize);
    }
    
    ## 获取鉴权提供器
    public static CredentialsProvider getCredentialsProvider(URI uri, Configuration conf) throws IOException {
        String className = conf.getTrimmed("fs.oss.credentials.provider");
        Object credentials;
        if (StringUtils.isEmpty(className)) {
            Configuration newConf = ProviderUtils.excludeIncompatibleCredentialProviders(conf, AliyunOSSFileSystem.class);
            credentials = new AliyunCredentialsProvider(newConf);
        } else {
            try {
                LOG.debug("Credential provider class is:" + className);
                Class<?> credClass = Class.forName(className);
    
                try {
                    credentials = (CredentialsProvider)credClass.getDeclaredConstructor(URI.class, Configuration.class).newInstance(uri, conf);
                } catch (SecurityException | NoSuchMethodException var6) {
                    credentials = (CredentialsProvider)credClass.getDeclaredConstructor().newInstance();
                }
            } catch (ClassNotFoundException var7) {
                throw new IOException(className + " not found.", var7);
            } catch (SecurityException | NoSuchMethodException var8) {
                throw new IOException(String.format("%s constructor exception.  A class specified in %s must provide an accessible constructor accepting URI and Configuration, or an accessible default constructor.", className, "fs.oss.credentials.provider"), var8);
            } catch (IllegalArgumentException | ReflectiveOperationException var9) {
                throw new IOException(className + " instantiation exception.", var9);
            }
        }
    
        return (CredentialsProvider)credentials;
    }
    
    public static String maybeAddTrailingSlash(String key) {
        return StringUtils.isNotEmpty(key) && !key.endsWith("/") ? key + '/' : key;
    }
    
    public static boolean objectRepresentsDirectory(String name, long size) {
        return StringUtils.isNotEmpty(name) && name.endsWith("/") && size == 0L;
    }
    
    public static File createTmpFileForWrite(String path, long size, Configuration conf) throws IOException {
        if (conf.get("fs.oss.buffer.dir") == null) {
            conf.set("fs.oss.buffer.dir", conf.get("hadoop.tmp.dir") + "/oss");
        }
    
        if (directoryAllocator == null) {
            directoryAllocator = new LocalDirAllocator("fs.oss.buffer.dir");
        }
    
        return directoryAllocator.createTmpFileForWrite(path, size, conf);
    }
    
    static int intOption(Configuration conf, String key, int defVal, int min) {
        int v = conf.getInt(key, defVal);
        Preconditions.checkArgument(v >= min, String.format("Value of %s: %d is below the minimum value %d", key, v, min));
        LOG.debug("Value of {} is {}", key, v);
        return v;
    }
    
    static long longOption(Configuration conf, String key, long defVal, long min) {
        long v = conf.getLong(key, defVal);
        Preconditions.checkArgument(v >= min, String.format("Value of %s: %d is below the minimum value %d", key, v, min));
        LOG.debug("Value of {} is {}", key, v);
        return v;
    }
    
    public static long getMultipartSizeProperty(Configuration conf, String property, long defVal) {
        long partSize = conf.getLong(property, defVal);
        if (partSize < 102400L) {
            LOG.warn("{} must be at least 100 KB; configured value is {}", property, partSize);
            partSize = 102400L;
        } else if (partSize > 2147483647L) {
            LOG.warn("oss: {} capped to ~2.14GB(maximum allowed size with current output mechanism)", "fs.oss.multipart.upload.size");
            partSize = 2147483647L;
        }
    
        return partSize;
    }

}
2、AliyunCrendentialsProvider——>MinIoCredentialsProvider

首先是对于Provider中的第一个方法的改造。

  • 构造方法对xml中的文件进行读取,所以基于该原由,先改造所有方法的基类。

    public MinIoCredentialsProvider(Configuration conf) throws IOException {
        String accessKeyId;
        String accessKeySecret;
        try {
        	## 获取XML文件accessKeyId
            accessKeyId = AliyunOSSUtils.getValueWithKey(conf, "fs.oss.accessKeyId");
            ## 获取XML文件accessKeySecret
            accessKeySecret = AliyunOSSUtils.getValueWithKey(conf, "fs.oss.accessKeySecret");
        } catch (IOException var7) {
            throw new InvalidCredentialsException(var7);
        }
        
    
        String securityToken;
        try {
        	## 获取XML文件securityToken
            securityToken = AliyunOSSUtils.getValueWithKey(conf, "fs.oss.securityToken");
        } catch (IOException var6) {
            securityToken = null;
        }
        
        if (!StringUtils.isEmpty(accessKeyId) && !StringUtils.isEmpty(accessKeySecret)) {
            if (StringUtils.isNotEmpty(securityToken)) {
            	## 生成默认权限校验对象
                this.credentials = new DefaultCredentials(accessKeyId, accessKeySecret, securityToken);
            } else {
                this.credentials = new DefaultCredentials(accessKeyId, accessKeySecret);
            }
        
        } else {
            throw new InvalidCredentialsException("AccessKeyId and AccessKeySecret should not be null or empty.");
        }
    
    }
    
3、AliyunOSSFileSystemStore——>MinIOOSSFileSystemStore

改造点:

  • OOSClient—>MinioClient

    • MinIOClient中包含的参数较少,可以不用像Aliyun中传递太多参数
    • 改造的具体方法是中将
      • deleteObject
      • deleteObjects
      • deleteDirs
      • getObjectMetadata
      • storeEmptyFile
      • copyFile
      • multipartCopy
      • uploadObject
      • listObjects
      • retrieve
      • purge
      • uploadPart
      • getUploadId
      • completeMultipartUpload
      • abortMultipartUpload
    • 方法中的OOSClient转换为MinIOClient。因为MinIOClient中的上传方法大体是使用一个ObjectArgs.Builder的对象构造一个方法和AliyunOOS大体上不一致,所以需要重构每个方法中的文件处理方式。
    import com.google.common.collect.Iterators;
    import io.minio.*;
    import io.minio.errors.*;
    import io.minio.messages.Item;
    import org.apache.commons.collections.CollectionUtils;
    import org.apache.commons.lang3.StringUtils;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.*;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import java.io.*;
    import java.net.URI;
    import java.security.InvalidKeyException;
    import java.security.NoSuchAlgorithmException;
    import java.util.*;
    
    public class MinIOOSSFileSystemStore {
        public static final Logger LOG = LoggerFactory.getLogger(MinIOOSSFileSystemStore.class);
        private String username;
        private FileSystem.Statistics statistics;
        private MinioClient ossClient;
        private String bucketName;
        private long uploadPartSize;
        private int maxKeys;
        private String serverSideEncryptionAlgorithm;
    
        public MinIOOSSFileSystemStore() {
        }
    
        public void initialize(URI uri, Configuration conf, String user, FileSystem.Statistics stat) throws IOException {
            this.username = user;
            this.statistics = stat;
    
            boolean secureConnections = conf.getBoolean("fs.oss.connection.secure.enabled", true);
            String proxyHost = conf.getTrimmed("fs.oss.proxy.host", "");
            int proxyPort = conf.getInt("fs.oss.proxy.port", -1);
            String proxyUsername;
            String cannedACLName;
            if (StringUtils.isNotEmpty(proxyHost)) {
    
    
                proxyUsername = conf.getTrimmed("fs.oss.proxy.username");
                String proxyPassword = conf.getTrimmed("fs.oss.proxy.password");
                if (proxyUsername == null != (proxyPassword == null)) {
                    cannedACLName = "Proxy error: fs.oss.proxy.username or fs.oss.proxy.password set without the other.";
                    LOG.error(cannedACLName);
                    throw new IllegalArgumentException(cannedACLName);
                }
    
            } else if (proxyPort >= 0) {
                proxyUsername = "Proxy error: fs.oss.proxy.port set without fs.oss.proxy.host";
                LOG.error(proxyUsername);
                throw new IllegalArgumentException(proxyUsername);
            }
    
            proxyUsername = conf.getTrimmed("fs.oss.endpoint", "");
            if (StringUtils.isEmpty(proxyUsername)) {
                throw new IllegalArgumentException("MinIO org.apache.hadoop.fs.minio.oss.OSS endpoint should not be null or empty. Please set proper endpoint with 'fs.oss.endpoint'.");
            } else {
                MinioClient.Builder builder = MinioClient.builder();
                builder.endpoint(conf.get("fs.oss.endpoint"));
                builder.credentials(conf.get("fs.oss.accessKeyId"), conf.get("fs.oss.accessKeySecret"));
                // Init the builder
                this.ossClient = builder.build();
                this.uploadPartSize = MinIOOSSUtils.getMultipartSizeProperty(conf, "fs.oss.multipart.upload.size", 104857600L);
                this.serverSideEncryptionAlgorithm = conf.get("fs.oss.server-side-encryption-algorithm", "");
                this.bucketName = uri.getHost();
                this.maxKeys = conf.getInt("fs.oss.paging.maximum", 1000);
            }
        }
    
    
        public void deleteObject(String filename) throws IOException {
            RemoveObjectArgs.Builder builder = RemoveObjectArgs.builder();
            builder.bucket(bucketName);
            builder.object(filename);
            RemoveObjectArgs removeObjectArgs = builder.build();
            try {
                this.ossClient.removeObject(removeObjectArgs);
            } catch (Exception e) {
                throw new IOException("Failed to delete MinIO org.apache.hadoop.fs.minio.oss.OSS object ");
            }
            this.statistics.incrementWriteOps(1);
        }
    
    
        public void deleteObjects(List<String> keysToDelete) throws IOException {
            if (CollectionUtils.isEmpty(keysToDelete)) {
                LOG.warn("Keys to delete is empty.");
            } else {
                List<String> deleteFailed = keysToDelete;
    
                for (String fileName : deleteFailed) {
                    this.deleteObject(fileName);
                    this.statistics.incrementWriteOps(1);
                }
            }
        }
    
        public void deleteDirs(String key) throws IOException {
            key = MinIOOSSUtils.maybeAddTrailingSlash(key);
            ListObjectsArgs.Builder builder = ListObjectsArgs.builder();
            builder.bucket(bucketName);
            builder.prefix(key);
            builder.delimiter((String) null);
            builder.maxKeys(this.maxKeys);
            ListObjectsArgs buildArgs = builder.build();
    
            while (true) {
                Iterable<Result<Item>> results = this.ossClient.listObjects(buildArgs);
                this.statistics.incrementReadOps(1);
                List<String> keysToDelete = new ArrayList();
                Iterator var5 = results.iterator();
    
                while (var5.hasNext()) {
                    Result<Item> objectSummary = (Result<Item>) var5.next();
                    Item item = null;
                    try {
                        item = objectSummary.get();
                    } catch (Exception e) {
                       throw new IOException(e.getMessage());
                    }
                    keysToDelete.add(item.objectName());
                }
    
                this.deleteObjects(keysToDelete);
                if(Iterators.size(var5) < maxKeys){
                    break;
                }
            }
        }
    
        public void storeEmptyFile(String fileName) throws IOException {
            byte[] buffer = new byte[0];
            ByteArrayInputStream in = new ByteArrayInputStream(buffer);
            PutObjectArgs.Builder putObjArgs = PutObjectArgs.builder();
            putObjArgs.bucket(this.bucketName);
            putObjArgs.stream(in,0,0);
            putObjArgs.object(fileName);
            PutObjectArgs build = putObjArgs.build();
            try {
                this.ossClient.putObject(build);
                this.statistics.incrementWriteOps(1);
            } catch (Exception e){
    
            }finally {
                in.close();
            }
    
        }
    
    
        private boolean singleCopy(String srcKey, String dstKey) throws ServerException, InsufficientDataException, ErrorResponseException, IOException, NoSuchAlgorithmException, InvalidKeyException, InvalidResponseException, XmlParserException, InternalException {
            CopyObjectArgs.Builder copyObjectArgs = CopyObjectArgs.builder();
            CopySource copySource = CopySource.builder().bucket(this.bucketName).object(dstKey).build();
            CopyObjectArgs copyArg = copyObjectArgs.bucket(this.bucketName).object(srcKey)
                    .source(copySource).build();
            ObjectWriteResponse objectWriteResponse = this.ossClient.copyObject(copyArg);
            this.statistics.incrementWriteOps(1);
            LOG.debug(objectWriteResponse.etag());
            return true;
        }
    
        public boolean copyFile(String srcKey, long srcLen, String dstKey) {
            try {
                return this.singleCopy(srcKey, dstKey);
            } catch (Exception var6) {
                LOG.debug("Exception thrown when copy file: " + srcKey + ", exception: " + var6 + ", use multipartCopy instead");
                return false;
            }
        }
    
    
        public void uploadObject(String fileName, File file) throws IOException {
            File object = file.getAbsoluteFile();
            FileInputStream fis = new FileInputStream(object);
            PutObjectArgs.Builder putArgs = PutObjectArgs.builder();
            putArgs.stream(fis,object.length(),-1);
            putArgs.object(fileName);
            putArgs.bucket(this.bucketName);
            PutObjectArgs putArg = putArgs.build();
            try {
                ObjectWriteResponse objectWriteResponse = this.ossClient.putObject(putArg);
                LOG.debug(objectWriteResponse.etag());
                this.statistics.incrementWriteOps(1);
            }catch (Exception e){
                LOG.error(e.getMessage());
            } finally{
                fis.close();
            }
    
        }
    
        public Iterable<Result<Item>> listObjects(String prefix, int maxListingLength, String marker, boolean recursive) {
            String delimiter = recursive ? null : "/";
            prefix = MinIOOSSUtils.maybeAddTrailingSlash(prefix);
            ListObjectsArgs.Builder listRequest = ListObjectsArgs.builder();
            //桶名
            listRequest.bucket(this.bucketName);
            //前缀
            listRequest.prefix(prefix);
            //分隔符
            listRequest.delimiter(delimiter);
            //最大key集合长度
            listRequest.maxKeys(maxListingLength);
            //key集合标记
            listRequest.marker(marker);
    
            ListObjectsArgs listObjectsArgs = listRequest.build();
            Iterable<Result<Item>> results = this.ossClient.listObjects(listObjectsArgs);
            this.statistics.incrementReadOps(1);
            return results;
        }
    
        public InputStream retrieve(String key, long byteStart) {
            try {
                GetObjectArgs.Builder request = GetObjectArgs.builder();
                request.bucket(this.bucketName);
                request.object(key);
                request.offset(byteStart);
                GetObjectArgs build = request.build();
                GetObjectResponse getObject = null;
                try {
                    getObject = this.ossClient.getObject(build);
                } catch (Exception e) {
    
                }
                this.statistics.incrementReadOps(1);
                return getObject;
            } catch (Exception var8) {
                LOG.error("Exception thrown when store retrieves key: " + key + ", exception: " + var8);
                return null;
            }
        }
    
        public void close() {
            if (this.ossClient != null) {
                this.ossClient = null;
            }
        }
    
        public void purge(String prefix) throws IOException, ServerException, InsufficientDataException, ErrorResponseException, NoSuchAlgorithmException, InvalidKeyException, InvalidResponseException, XmlParserException, InternalException {
            try {
                Iterable<Result<Item>> results = this.listObjects(prefix, this.maxKeys, (String) null, true);
                Iterator var4 = results.iterator();
    
                while (var4.hasNext()) {
                    Result<Item> next = (Result<Item>) var4.next();
                    String key = next.get().objectName();
                    this.deleteObject(key);
                    this.statistics.incrementWriteOps(1);
                }
    
                this.deleteDirs(prefix);
    
            } catch (Exception var6) {
                LOG.error("Failed to purge " + prefix);
            }
    
        }
    
        public RemoteIterator<LocatedFileStatus> singleStatusRemoteIterator(final FileStatus fileStatus, final BlockLocation[] locations) {
            return new RemoteIterator<LocatedFileStatus>() {
                private boolean hasNext = true;
    
                public boolean hasNext() throws IOException {
                    return fileStatus != null && this.hasNext;
                }
    
                public LocatedFileStatus next() throws IOException {
                    if (this.hasNext()) {
                        LocatedFileStatus s = new LocatedFileStatus(fileStatus, fileStatus.isFile() ? locations : null);
                        this.hasNext = false;
                        return s;
                    } else {
                        throw new NoSuchElementException();
                    }
                }
            };
        }
    
        public RemoteIterator<LocatedFileStatus> createLocatedFileStatusIterator(final String prefix, final int maxListingLength, final FileSystem fs, final PathFilter filter, final FileStatusAcceptor acceptor, final String delimiter) {
            return new RemoteIterator<LocatedFileStatus>() {
                private String nextMarker = null;
                private boolean firstListing = true;
                private boolean meetEnd = false;
                private ListIterator<FileStatus> batchIterator;
    
                public boolean hasNext() throws IOException {
                    if (this.firstListing) {
                        this.requestNextBatch();
                        this.firstListing = false;
                    }
    
                    return this.batchIterator.hasNext() || this.requestNextBatch();
                }
    
                public LocatedFileStatus next() throws IOException {
                    if (this.hasNext()) {
                        FileStatus status = (FileStatus) this.batchIterator.next();
                        BlockLocation[] locations = fs.getFileBlockLocations(status, 0L, status.getLen());
                        return new LocatedFileStatus(status, status.isFile() ? locations : null);
                    } else {
                        throw new NoSuchElementException();
                    }
                }
    
                private boolean requestNextBatch()  {
                    if (this.meetEnd) {
                        return false;
                    } else {
                        ListObjectsArgs.Builder listRequest = ListObjectsArgs.builder();
                        listRequest.bucket(bucketName);
                        listRequest.prefix(MinIOOSSUtils.maybeAddTrailingSlash(prefix));
                        listRequest.maxKeys(maxListingLength);
                        listRequest.marker(this.nextMarker);
                        listRequest.delimiter(delimiter);
                        ListObjectsArgs build = listRequest.build();
                        Iterable<Result<Item>> results = MinIOOSSFileSystemStore.this.ossClient.listObjects(build);
                        List<FileStatus> stats = new ArrayList(Iterators.size(results.iterator()));
                        String tempNextMaker = null;
                        while (results.iterator().hasNext()) {
                            Result<Item> summary = results.iterator().next();
                            Item item = null;
                            try {
                                item = summary.get();
                            } catch (Exception e) {
                               LOG.error(e.getMessage());
                            }
                            Path path = fs.makeQualified(new Path("/" + item.objectName()));
                            if (filter.accept(path) && acceptor.accept(path, item.objectName(), item.size())) {
                                FileStatus statusx = new OSSFileStatus(item.size(), item.objectName().endsWith("/"), 1, fs.getDefaultBlockSize(path), item.lastModified().toInstant().toEpochMilli(), path, MinIOOSSFileSystemStore.this.username);
                                stats.add(statusx);
                            }
                            tempNextMaker = item.objectName();
                        }
    
                            Path pathx = fs.makeQualified(new Path("/" + prefix));
                            if (filter.accept(pathx) && acceptor.accept(pathx, prefix)) {
                                FileStatus status = new OSSFileStatus(0L, true, 1, 0L, 0L, pathx, MinIOOSSFileSystemStore.this.username);
                                stats.add(status);
                            }
    
                        this.nextMarker = tempNextMaker;
                        this.batchIterator = stats.listIterator();
                        MinIOOSSFileSystemStore.this.statistics.incrementReadOps(1);
                        return this.batchIterator.hasNext();
                    }
                }
            };
        }
    
    
        public StatObjectResponse getObjectMetadata(String key) {
            try {
                StatObjectResponse statObjectResponse = this.ossClient.statObject(
                        StatObjectArgs.builder()
                                .bucket(bucketName)
                                .object(key)
                                .build());
                return statObjectResponse;
            } catch (Exception e) {
               LOG.error(e.getMessage());
            }
            return null;
        }
    
        public ObjectWriteResponse uploadObjectPart(String key, File blockFile, long blockSize) throws IOException {
            File object = blockFile.getAbsoluteFile();
            FileInputStream fis = new FileInputStream(object);
            PutObjectArgs.Builder putArgs = PutObjectArgs.builder();
            putArgs.stream(fis,object.length(), blockSize);
            putArgs.object(key);
            PutObjectArgs putArg = putArgs.build();
            try {
                ObjectWriteResponse objectWriteResponse = this.ossClient.putObject(putArg);
                LOG.debug(objectWriteResponse.etag());
                this.statistics.incrementWriteOps(1);
                return objectWriteResponse;
            }catch (Exception e){
                LOG.error(e.getMessage());
            } finally{
                fis.close();
                return null;
            }
        }
    }
    
    
4、AliyunOSSFileSystem——>MinIOOSSFileSystem
  • 里面最主要的有以下几个方法:
    • public FileStatus[] listStatus(Path path) throws IOException
      • 获取文件状态数组
      • Minio和AliyunOSS有一个显著的区别点,AliyunOSS会将文件夹对象和文件对象分成两个字段来进行存储,而Minio只会放在一个对象内,Minio返回的item集合中包含了文件夹也包含了文件,需要根据Item中的isDir将目录划分出后再进行文件状态值的获取。
    • public FSDataInputStream open(Path path, int bufferSize) throws IOException
      • 所有获取的对象文件都需要通过open方法来获取对象流。
    • public FileStatus getFileStatus(Path path) throws IOException
      • AliyunOSS首先获取每个对象元数据,先是考虑为文件获取,再考虑为文件夹获取,两次获取均失败后,考虑元对象丢失情况,再根据listObjects方法来查询该文件夹下是否包含Item,将当前文件作为maker来判断后续是否还有文件,如果是最后一个文件即返回当前文件,如果返回的对象为空则文件不存在。——Minio是基于S3协议,所以多个OSS返回的结果值一样。
package org.apache.hadoop.fs.minio.oss;

import com.google.common.collect.Iterators;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import io.minio.Result;
import io.minio.StatObjectResponse;
import io.minio.messages.Item;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.BlockingThreadPoolExecutorService;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.SemaphoredDelegatingExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;

public class MinIOOSSFileSystem extends FileSystem {
    private static final Logger LOG = LoggerFactory.getLogger(MinIOOSSFileSystem.class);
    private URI uri;
    private String bucket;
    private String username;
    private Path workingDir;
    private int blockOutputActiveBlocks;
    private MinIOOSSFileSystemStore store;
    private int maxKeys;
    private int maxReadAheadPartNumber;
    private int maxConcurrentCopyTasksPerDir;
    private ListeningExecutorService boundedThreadPool;
    private ListeningExecutorService boundedCopyThreadPool;
    private static final PathFilter DEFAULT_FILTER = new PathFilter() {
        public boolean accept(Path file) {
            return true;
        }
    };

    public MinIOOSSFileSystem() {
    }

    public FSDataOutputStream append(Path path, int bufferSize, Progressable progress) throws IOException {
        throw new IOException("Append is not supported!");
    }

    public void close() throws IOException {
        try {
            this.store.close();
            this.boundedThreadPool.shutdown();
            this.boundedCopyThreadPool.shutdown();
        } finally {
            super.close();
        }

    }

    public FSDataOutputStream create(Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
        String key = this.pathToKey(path);
        FileStatus status = null;

        try {
            status = this.getFileStatus(path);
            if (status.isDirectory()) {
                throw new FileAlreadyExistsException(path + " is a directory");
            }

            if (!overwrite) {
                throw new FileAlreadyExistsException(path + " already exists");
            }

            LOG.debug("Overwriting file {}", path);
        } catch (FileNotFoundException var13) {
        }

        long uploadPartSize = MinIOOSSUtils.getMultipartSizeProperty(this.getConf(), "fs.oss.multipart.upload.size", 104857600L);
        return new FSDataOutputStream(new MinIOOSSBlockOutputStream(this.getConf(), this.store, key, uploadPartSize, new SemaphoredDelegatingExecutor(this.boundedThreadPool, this.blockOutputActiveBlocks, true)), this.statistics);
    }

    public FSDataOutputStream createNonRecursive(Path path, FsPermission permission, EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
        Path parent = path.getParent();
        if (parent != null && !this.getFileStatus(parent).isDirectory()) {
            throw new FileAlreadyExistsException("Not a directory: " + parent);
        } else {
            return this.create(path, permission, flags.contains(CreateFlag.OVERWRITE), bufferSize, replication, blockSize, progress);
        }
    }

    public boolean delete(Path path, boolean recursive) throws IOException {
        try {
            return this.innerDelete(this.getFileStatus(path), recursive);
        } catch (FileNotFoundException var4) {
            LOG.debug("Couldn't delete {} - does not exist", path);
            return false;
        }
    }

    private boolean innerDelete(FileStatus status, boolean recursive) throws IOException {
        Path f = status.getPath();
        String p = f.toUri().getPath();
        FileStatus[] statuses;
        if (p.equals("/")) {
            statuses = this.listStatus(status.getPath());
            boolean isEmptyDir = statuses.length <= 0;
            return this.rejectRootDirectoryDelete(isEmptyDir, recursive);
        } else {
            String key = this.pathToKey(f);
            if (status.isDirectory()) {
                if (!recursive) {
                    statuses = this.listStatus(status.getPath());
                    if (statuses.length > 0) {
                        throw new IOException("Cannot remove directory " + f + ": It is not empty!");
                    }

                    key = MinIOOSSUtils.maybeAddTrailingSlash(key);
                    this.store.deleteObject(key);
                } else {
                    this.store.deleteDirs(key);
                }
            } else {
                this.store.deleteObject(key);
            }

            this.createFakeDirectoryIfNecessary(f);
            return true;
        }
    }

    private boolean rejectRootDirectoryDelete(boolean isEmptyDir, boolean recursive) throws IOException {
        LOG.info("oss delete the {} root directory of {}", this.bucket, recursive);
        if (isEmptyDir) {
            return true;
        } else if (recursive) {
            return false;
        } else {
            throw new PathIOException(this.bucket, "Cannot delete root path");
        }
    }

    private void createFakeDirectoryIfNecessary(Path f) throws IOException {
        String key = this.pathToKey(f);
        if (StringUtils.isNotEmpty(key) && !this.exists(f)) {
            LOG.debug("Creating new fake directory at {}", f);
            this.mkdir(this.pathToKey(f.getParent()));
        }

    }

    public FileStatus getFileStatus(Path path) throws IOException {
        Path qualifiedPath = path.makeQualified(this.uri, this.workingDir);
        String key = this.pathToKey(qualifiedPath);
        if (key.length() == 0) {
            return new OSSFileStatus(0L, true, 1, 0L, 0L, qualifiedPath, this.username);
        } else {
            StatObjectResponse meta = this.store.getObjectMetadata(key);
            if (meta == null && !key.endsWith("/")) {
                key = key + "/";
                meta = this.store.getObjectMetadata(key);
            }

            // 阿里考虑到元数据全部丢失的情况,不亏是阿里
            if (meta == null) {
                Iterable<Result<Item>> results = this.store.listObjects(key, 1, (String) null, false);
                if (Iterators.size(results.iterator()) == 0) {
                    throw new FileNotFoundException(path + ": No such file or directory!");
                } else {
                    return new OSSFileStatus(0L, true, 1, 0L, 0L, qualifiedPath, this.username);
                }
            } else {
                return MinIOOSSUtils.objectRepresentsDirectory(key, 0) ? new OSSFileStatus(0L, true, 1, 0L, 0L, qualifiedPath, this.username) : new OSSFileStatus(meta.size(), false, 1, this.getDefaultBlockSize(path), meta.lastModified().toInstant().toEpochMilli(), qualifiedPath, this.username);
            }
        }
    }

    public String getScheme() {
        return "oss";
    }

    public URI getUri() {
        return this.uri;
    }

    public int getDefaultPort() {
        return -1;
    }

    public Path getWorkingDirectory() {
        return this.workingDir;
    }

    /**
     * @deprecated
     */
    @Deprecated
    public long getDefaultBlockSize() {
        return this.getConf().getLong("fs.oss.block.size", 67108864L);
    }

    public String getCanonicalServiceName() {
        return null;
    }

    public void initialize(URI name, Configuration conf) throws IOException {
        super.initialize(name, conf);
        this.bucket = name.getHost();
        this.uri = URI.create(name.getScheme() + "://" + name.getAuthority());
        this.username = UserGroupInformation.getCurrentUser().getShortUserName();
        this.workingDir = (new Path("/user", this.username)).makeQualified(this.uri, (Path) null);
        long keepAliveTime = MinIOOSSUtils.longOption(conf, "fs.oss.threads.keepalivetime", 60L, 0L);
        this.blockOutputActiveBlocks = MinIOOSSUtils.intOption(conf, "fs.oss.upload.active.blocks", 4, 1);
        this.store = new MinIOOSSFileSystemStore();
        this.store.initialize(name, conf, this.username, this.statistics);
        this.maxKeys = conf.getInt("fs.oss.paging.maximum", 1000);
        int threadNum = MinIOOSSUtils.intPositiveOption(conf, "fs.oss.multipart.download.threads", 10);
        int totalTasks = MinIOOSSUtils.intPositiveOption(conf, "fs.oss.max.total.tasks", 128);
        this.maxReadAheadPartNumber = MinIOOSSUtils.intPositiveOption(conf, "fs.oss.multipart.download.ahead.part.max.number", 4);
        this.boundedThreadPool = BlockingThreadPoolExecutorService.newInstance(threadNum, totalTasks, keepAliveTime, TimeUnit.SECONDS, "oss-transfer-shared");
        this.maxConcurrentCopyTasksPerDir = MinIOOSSUtils.intPositiveOption(conf, "fs.oss.max.copy.tasks.per.dir", 5);
        int maxCopyThreads = MinIOOSSUtils.intPositiveOption(conf, "fs.oss.max.copy.threads", 25);
        int maxCopyTasks = MinIOOSSUtils.intPositiveOption(conf, "fs.oss.max.copy.tasks", 10485760);
        this.boundedCopyThreadPool = BlockingThreadPoolExecutorService.newInstance(maxCopyThreads, maxCopyTasks, 60L, TimeUnit.SECONDS, "oss-copy-unbounded");
        this.setConf(conf);
    }

    private String pathToKey(Path path) {
        if (!path.isAbsolute()) {
            path = new Path(this.workingDir, path);
        }

        return path.toUri().getPath().substring(1);
    }

    private Path keyToPath(String key) {
        return new Path("/" + key);
    }

    public FileStatus[] listStatus(Path path) throws IOException {
        String key = this.pathToKey(path);
        if (LOG.isDebugEnabled()) {
            LOG.debug("List status for path: " + path);
        }

        List<FileStatus> result = new ArrayList();
        FileStatus fileStatus = this.getFileStatus(path);
        if (fileStatus.isDirectory()) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("listStatus: doing listObjects for directory " + key);
            }

            Iterable<Result<Item>> results = this.store.listObjects(key, this.maxKeys, (String) null, false);

            while (true) {
                Iterator<Result<Item>> var6 = results.iterator();
                String tempNextMaker = null;
                List<Item> dirItemList = new ArrayList<>();
                while (var6.hasNext()) {
                    Result<Item> objectSummary = var6.next();
                    String objKey = null;
                    Item item = null;
                    try {
                        item = objectSummary.get();

                    } catch (Exception e) {
                        throw new IOException(e);
                    }
                    objKey = item.objectName();
                    if (objKey.equals(key + "/")) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Ignoring: " + objKey);
                        }
                    } else {
                        Path keyPath = this.keyToPath(objKey).makeQualified(this.uri, this.workingDir);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Adding: fi: " + keyPath);
                        }
                        if (!item.isDir()) {
                            result.add(new OSSFileStatus(item.size(), item.isDir(), 1, this.getDefaultBlockSize(keyPath), item.isDir() ? System.currentTimeMillis() : item.lastModified().toInstant().toEpochMilli(), keyPath, this.username));
                            tempNextMaker = item.objectName();
                        }else{
                            dirItemList.add(item);
                        }
                    }

                }

                for (Item item : dirItemList) {
                    String prefix = item.objectName();
                    if (prefix.equals(key + "/")) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Ignoring: " + prefix);
                        }
                    } else {
                        Path keyPath = this.keyToPath(prefix).makeQualified(this.uri, this.workingDir);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Adding: rd: " + keyPath);
                        }
                        result.add(this.getFileStatus(keyPath));
                    }
                }

                if (LOG.isDebugEnabled()) {
                    LOG.debug("listStatus: list truncated - getting next batch");
                }

                if (Iterators.size(var6) < maxKeys) {
                    break;
                }
                String nextMarker = tempNextMaker;
                results = this.store.listObjects(key, this.maxKeys, nextMarker, false);
            }
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Adding: rd (not a dir): " + path);
            }

            result.add(fileStatus);
        }

        return (FileStatus[]) result.toArray(new FileStatus[result.size()]);
    }

    public RemoteIterator<LocatedFileStatus> listFiles(final Path f, boolean recursive) throws IOException {
        Path qualifiedPath = f.makeQualified(this.uri, this.workingDir);
        final FileStatus status = this.getFileStatus(qualifiedPath);
        PathFilter filter = new PathFilter() {
            public boolean accept(Path path) {
                return status.isFile() || !path.equals(f);
            }
        };
        FileStatusAcceptor acceptor = new FileStatusAcceptor.AcceptFilesOnly(qualifiedPath);
        return this.innerList(f, status, filter, acceptor, recursive);
    }

    public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f) throws IOException {
        return this.listLocatedStatus(f, DEFAULT_FILTER);
    }

    public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f, PathFilter filter) throws IOException {
        Path qualifiedPath = f.makeQualified(this.uri, this.workingDir);
        FileStatus status = this.getFileStatus(qualifiedPath);
        FileStatusAcceptor acceptor = new FileStatusAcceptor.AcceptAllButSelf(qualifiedPath);
        return this.innerList(f, status, filter, acceptor, false);
    }

    private RemoteIterator<LocatedFileStatus> innerList(Path f, FileStatus status, PathFilter filter, FileStatusAcceptor acceptor, boolean recursive) throws IOException {
        Path qualifiedPath = f.makeQualified(this.uri, this.workingDir);
        String key = this.pathToKey(qualifiedPath);
        if (status.isFile()) {
            LOG.debug("{} is a File", qualifiedPath);
            BlockLocation[] locations = this.getFileBlockLocations(status, 0L, status.getLen());
            return this.store.singleStatusRemoteIterator(filter.accept(f) ? status : null, locations);
        } else {
            return this.store.createLocatedFileStatusIterator(key, this.maxKeys, this, filter, acceptor, recursive ? null : "/");
        }
    }

    private boolean mkdir(String key) throws IOException {
        String dirName = key;
        if (StringUtils.isNotEmpty(key)) {
            if (!key.endsWith("/")) {
                dirName = key + "/";
            }

            this.store.storeEmptyFile(dirName);
        }

        return true;
    }

    public boolean mkdirs(Path path, FsPermission permission) throws IOException {
        try {
            FileStatus fileStatus = this.getFileStatus(path);
            if (fileStatus.isDirectory()) {
                return true;
            } else {
                throw new FileAlreadyExistsException("Path is a file: " + path);
            }
        } catch (FileNotFoundException var5) {
            this.validatePath(path);
            String key = this.pathToKey(path);
            return this.mkdir(key);
        }
    }

    private void validatePath(Path path) throws IOException {
        Path fPart = path.getParent();

        while (true) {
            try {
                FileStatus fileStatus = this.getFileStatus(fPart);
                if (!fileStatus.isDirectory()) {
                    throw new FileAlreadyExistsException(String.format("Can't make directory for path '%s', it is a file.", fPart));
                }
                break;
            } catch (FileNotFoundException var4) {
                fPart = fPart.getParent();
                if (fPart == null) {
                    break;
                }
            }
        }

    }

    public FSDataInputStream open(Path path, int bufferSize) throws IOException {
        FileStatus fileStatus = this.getFileStatus(path);
        if (fileStatus.isDirectory()) {
            throw new FileNotFoundException("Can't open " + path + " because it is a directory");
        } else {
            return new FSDataInputStream(new MinIOOSSInputStream(this.getConf(), new SemaphoredDelegatingExecutor(this.boundedThreadPool, this.maxReadAheadPartNumber, true), this.maxReadAheadPartNumber, this.store, this.pathToKey(path), fileStatus.getLen(), this.statistics));
        }
    }

    public boolean rename(Path srcPath, Path dstPath) throws IOException {
        if (srcPath.isRoot()) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Cannot rename the root of a filesystem");
            }

            return false;
        } else {
            Path parent;
            for (parent = dstPath.getParent(); parent != null && !srcPath.equals(parent); parent = parent.getParent()) {
            }

            if (parent != null) {
                return false;
            } else {
                FileStatus srcStatus = this.getFileStatus(srcPath);

                FileStatus dstStatus;
                try {
                    dstStatus = this.getFileStatus(dstPath);
                } catch (FileNotFoundException var9) {
                    dstStatus = null;
                }

                if (dstStatus == null) {
                    dstStatus = this.getFileStatus(dstPath.getParent());
                    if (!dstStatus.isDirectory()) {
                        throw new IOException(String.format("Failed to rename %s to %s, %s is a file", srcPath, dstPath, dstPath.getParent()));
                    }
                } else {
                    if (srcStatus.getPath().equals(dstStatus.getPath())) {
                        return !srcStatus.isDirectory();
                    }

                    if (!dstStatus.isDirectory()) {
                        throw new FileAlreadyExistsException(String.format("Failed to rename %s to %s, file already exists!", srcPath, dstPath));
                    }

                    dstPath = new Path(dstPath, srcPath.getName());

                    FileStatus[] statuses;
                    try {
                        statuses = this.listStatus(dstPath);
                    } catch (FileNotFoundException var8) {
                        statuses = null;
                    }

                    if (statuses != null && statuses.length > 0) {
                        throw new FileAlreadyExistsException(String.format("Failed to rename %s to %s, file already exists or not empty!", srcPath, dstPath));
                    }
                }

                boolean succeed;
                if (srcStatus.isDirectory()) {
                    succeed = this.copyDirectory(srcPath, dstPath);
                } else {
                    succeed = this.copyFile(srcPath, srcStatus.getLen(), dstPath);
                }

                return srcPath.equals(dstPath) || succeed && this.delete(srcPath, true);
            }
        }
    }

    private boolean copyFile(Path srcPath, long srcLen, Path dstPath) {
        String srcKey = this.pathToKey(srcPath);
        String dstKey = this.pathToKey(dstPath);
        return this.store.copyFile(srcKey, srcLen, dstKey);
    }

    private boolean copyDirectory(Path srcPath, Path dstPath) throws IOException {
        String srcKey = MinIOOSSUtils.maybeAddTrailingSlash(this.pathToKey(srcPath));
        String dstKey = MinIOOSSUtils.maybeAddTrailingSlash(this.pathToKey(dstPath));
        if (dstKey.startsWith(srcKey)) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Cannot rename a directory to a subdirectory of self");
            }

            return false;
        } else {
            this.store.storeEmptyFile(dstKey);
            MinIOOSSCopyFileContext copyFileContext = new MinIOOSSCopyFileContext();
            ExecutorService executorService = MoreExecutors.listeningDecorator(new SemaphoredDelegatingExecutor(this.boundedCopyThreadPool, this.maxConcurrentCopyTasksPerDir, true));
            Iterable<Result<Item>> objects = this.store.listObjects(srcKey, this.maxKeys, (String) null, true);
            int copiesToFinish = 0;

            while (true) {
                Iterator var9 = objects.iterator();
                String tempNextMaker = null;
                while (var9.hasNext()) {
                    Result<Item> objectSummary = (Result<Item>) var9.next();
                    Item item = null;
                    try {
                        item = objectSummary.get();
                    } catch (Exception e) {

                        LOG.error(e.getMessage());
                    }
                    String newKey = dstKey.concat(item.objectName().substring(srcKey.length()));
                    executorService.execute(new MinIOOSSCopyFileTask(this.store, item.objectName(), Iterators.size(var9), newKey, copyFileContext));
                    ++copiesToFinish;
                    if (copyFileContext.isCopyFailure()) {
                        break;
                    }
                    tempNextMaker = item.objectName();
                }

                if (Iterators.size(var9) < maxKeys) {
                    copyFileContext.lock();

                    try {
                        copyFileContext.awaitAllFinish(copiesToFinish);
                    } catch (InterruptedException var15) {
                        LOG.warn("interrupted when wait copies to finish");
                    } finally {
                        copyFileContext.unlock();
                    }

                    return !copyFileContext.isCopyFailure();
                }

                String nextMarker = tempNextMaker;
                objects = this.store.listObjects(srcKey, this.maxKeys, nextMarker, true);
            }
        }
    }

    public void setWorkingDirectory(Path dir) {
        this.workingDir = dir;
    }

    public MinIOOSSFileSystemStore getStore() {
        return this.store;
    }


}
5、AliyunOSSBlockOutputStream——>MinIOOSSBlockOutputStream
  • this.store.uploadObjectPart(this.key,this.blockFile,this.blockSize);为主要实现分片上传方法,因为Minio已经实现了分片上传,所有就直接使用Minio中的分片上传方法。AliyunOSS中的上传方法为将大文件按照每个文件5MB大小统一上传后进行合并,生成文件存储在OSS中。
package org.apache.hadoop.fs.minio.oss;

import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import org.apache.hadoop.conf.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ExecutorService;

public class MinIOOSSBlockOutputStream extends OutputStream {
    private static final Logger LOG = LoggerFactory.getLogger(MinIOOSSBlockOutputStream.class);
    private MinIOOSSFileSystemStore store;
    private Configuration conf;
    private boolean closed;
    private String key;
    private File blockFile;
    private Map<Integer, File> blockFiles = new HashMap();
    private long blockSize;
    private int blockId = 0;
    private long blockWritten = 0L;
    private String uploadId = null;
    private final ListeningExecutorService executorService;
    private OutputStream blockStream;
    private final byte[] singleByte = new byte[1];

    public MinIOOSSBlockOutputStream(Configuration conf, MinIOOSSFileSystemStore store, String key, Long blockSize, ExecutorService executorService) throws IOException {
        this.store = store;
        this.conf = conf;
        this.key = key;
        this.blockSize = blockSize;
        this.blockFile = this.newBlockFile();
        this.blockStream = new BufferedOutputStream(new FileOutputStream(this.blockFile));
        this.executorService = MoreExecutors.listeningDecorator(executorService);
    }

    private File newBlockFile() throws IOException {
        return MinIOOSSUtils.createTmpFileForWrite(String.format("oss-block-%04d-", this.blockId), this.blockSize, this.conf);
    }

    public synchronized void flush() throws IOException {
        this.blockStream.flush();
    }

    public synchronized void close() throws IOException {
        if (!this.closed) {
            this.blockStream.flush();
            this.blockStream.close();
            if (!this.blockFiles.values().contains(this.blockFile)) {
                ++this.blockId;
                this.blockFiles.put(this.blockId, this.blockFile);
            }

            try {
                if (this.blockFiles.size() == 1) {
                    this.store.uploadObject(this.key, this.blockFile);
                } else {
                    this.store.uploadObjectPart(this.key,this.blockFile,this.blockSize);

                }
            } finally {
                this.removeTemporaryFiles();
                this.closed = true;
            }

        }
    }

    public synchronized void write(int b) throws IOException {
        this.singleByte[0] = (byte)b;
        this.write(this.singleByte, 0, 1);
    }

    public synchronized void write(byte[] b, int off, int len) throws IOException {
        if (this.closed) {
            throw new IOException("Stream closed.");
        } else {
            this.blockStream.write(b, off, len);
            this.blockWritten += (long)len;
            if (this.blockWritten >= this.blockSize) {
                this.uploadCurrentPart();
                this.blockWritten = 0L;
            }

        }
    }

    private void removeTemporaryFiles() {
        Iterator var1 = this.blockFiles.values().iterator();

        while(var1.hasNext()) {
            File file = (File)var1.next();
            if (file != null && file.exists() && !file.delete()) {
                LOG.warn("Failed to delete temporary file {}", file);
            }
        }

    }



    private void uploadCurrentPart() throws IOException {
        this.blockStream.flush();
        this.blockStream.close();

        ++this.blockId;
        this.blockFiles.put(this.blockId, this.blockFile);
        File currentFile = this.blockFile;
        this.store.uploadObjectPart(this.key, currentFile, blockSize);
        this.blockFile = this.newBlockFile();
        this.blockStream = new BufferedOutputStream(new FileOutputStream(this.blockFile));
    }


}
6、AliyunOSSFileReaderTask——>MinIOOSSFileReaderTask
  • MinIO和阿里云的OSS有一个不一样的地方,MinIO的retire方法是根据offset和length来读取stream流,这个和阿里云中直接ByteBuffer.getByteStart和ByteBuffer.getByteEnd不太一样。getByteEnd需要再末尾再+1,才能让读取数据完整。
package org.apache.hadoop.fs.minio.oss;

import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryPolicy.RetryAction.RetryDecision;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;

public class MinIOOSSFileReaderTask implements Runnable {
    public static final Logger LOG = LoggerFactory.getLogger(MinIOOSSFileReaderTask.class);
    private String key;
    private MinIOOSSFileSystemStore store;
    private ReadBuffer readBuffer;

    private static final int MAX_RETRIES = 3;
    private RetryPolicy retryPolicy;

    public MinIOOSSFileReaderTask(String key, MinIOOSSFileSystemStore store, ReadBuffer readBuffer) {
        this.key = key;
        this.store = store;
        this.readBuffer = readBuffer;
        RetryPolicy defaultPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(3, 3L, TimeUnit.SECONDS);
        Map<Class<? extends Exception>, RetryPolicy> policies = new HashMap();
        policies.put(IOException.class, defaultPolicy);
        policies.put(IndexOutOfBoundsException.class, RetryPolicies.TRY_ONCE_THEN_FAIL);
        policies.put(NullPointerException.class, RetryPolicies.TRY_ONCE_THEN_FAIL);
        this.retryPolicy = RetryPolicies.retryByException(defaultPolicy, policies);
    }

    public void run() {
        int retries = 0;
        this.readBuffer.lock();

        try {
            while (true) {
                try {
                    InputStream in = this.store.retrieve(this.key, this.readBuffer.getByteStart(), this.readBuffer.getByteEnd()+1);
                    Throwable var29 = null;

                    try {
                        int start = 0;
                        int ret;
                        for (int toRead = this.readBuffer.getBuffer().length; toRead > 0; start += ret) {
                            ret = in.read(this.readBuffer.getBuffer(), start, toRead);
                            if (ret < 0) {
                                throw new IOException("Premature EOF from inputStream");
                            }
                            toRead -= ret;
                        }
                        this.readBuffer.setStatus(ReadBuffer.STATUS.SUCCESS);
                        break;
                    } catch (Throwable var22) {
                        var29 = var22;
                        throw var22;
                    } finally {
                        if (in != null) {
                            if (var29 != null) {
                                try {
                                    in.close();
                                } catch (Throwable var23) {
                                    var29.addSuppressed(var23);
                                }
                            } else {
                                in.close();
                            }
                        }

                    }
                } catch (Exception var26) {
                    Exception e = var26;
                    LOG.warn("Exception thrown when retrieve key: " + this.key + ", exception: " + var26);

                    try {
                        RetryPolicy.RetryAction rc = this.retryPolicy.shouldRetry(e, retries++, 0, true);
                        if (rc.action != RetryDecision.RETRY) {
                            break;
                        }

                        Thread.sleep(rc.delayMillis);
                    } catch (Exception var25) {
                        LOG.warn("Exception thrown when call shouldRetry, exception " + var25);
                        break;
                    }
                }
            }

            if (this.readBuffer.getStatus() != ReadBuffer.STATUS.SUCCESS) {
                this.readBuffer.setStatus(ReadBuffer.STATUS.ERROR);
            }

            this.readBuffer.signalAll();
        } finally {
            this.readBuffer.unlock();
        }

    }
}

五、总结

  • Spark使用local模式压力测试:

    • 一次写入10w行数据无压力

    • 一次写入50w行数据会导致batch_num太高,爆出Cannot seek after EOF的错误

      • 该错误要是屏蔽报错也会因元数据已经将原本文件大小规定完毕,读取的总的Content_length和实际文件大小预期不符,导致文件写入失败。
  • 影响性能最主要的因素有两个:

    • Spark使用的运行模式是否是集群模式
    • Spark的可以使用的内存大小

六、其他

  • 阿里云OOS参数文档
    • https://help.aliyun.com/document_detail/84841.html
  • Minio接口文档
    • https://docs.min.io/docs/java-client-api-reference.html#listObjects
    • 中文文档
    • http://docs.minio.org.cn/docs/master/java-client-api-reference#putObject
  • Minio元数据介绍
    • https://blog.51cto.com/u_14625168/2511642
  • Spark与S3交互
    • https://blog.csdn.net/helloword4217/article/details/99691961
  • Apache Hudi 与 阿里云OSS交互
    • https://hudi.apache.org/cn/docs/next/oss_hoodie
  • Spark操作Hudi
    • https://blog.csdn.net/weixin_39636364/article/details/120358892
  • Hadoop EOF Exception
    • https://cwiki.apache.org/confluence/display/HADOOP2/EOFException
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值