ceph对象存储文件多线程跨集群同步实现

一、概要

现有多个集群,每个分别部署了ceph存储对象存储系统,需要将文件从集群A的ceph同步到集群B的ceph。

二、技术名词解释

  • ceph 分布式文件系统,本文讲的是其中的对象存储
  • cephA 表示A集群中部署的ceph
  • redis 用来缓存分片数量信息,当最后一片同步完成后将分片合并程完整文件

三、实现思路

后端分片读取ceph的文件,根据文件大小来判断是否进行分片多线程同步。

例如,将文件从cephA同步到cephB,若文件大于5MB,则进行分片多线程同步处理,小于5MB则直接走上传文件接口直接上传到cephB。

四、关键代码

4.1 同步发起端

  1. 线程池工具类
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import lombok.extern.slf4j.Slf4j;

import java.util.concurrent.*;
@Slf4j
public class ThreadFactoryUtil {
    // 基于数组的阻塞队列
    private static BlockingQueue<Runnable> blockingQueue = new LinkedBlockingQueue<>(100);
    private static volatile ThreadPoolExecutor pool;

    private ThreadFactoryUtil() {
        ThreadFactory factory = new ThreadFactoryBuilder().setNameFormat("upload-pool-%d").build();
        int i = Runtime.getRuntime().availableProcessors();
        log.info("系统核数:{}", i);
        // 核心线程数
        int corePoolSize = i * 2;
        // 最大线程数
        int maximumPoolSize = i * 2 * 10;
        pool = new ThreadPoolExecutor(corePoolSize, maximumPoolSize, 3L,
                TimeUnit.SECONDS, blockingQueue, factory,
                // 自定义策略
                (r, executor) -> {
                    try {
                        if (!executor.isShutdown()) {
                            while (executor.getQueue().remainingCapacity() == 0) {
                                log.info("队列满啦,而且线程数量达到最大数量啦!");
                                executor.getQueue().put(r);
                            }
                        }
                    } catch (Exception e) {
                        log.info("塞入队列异常啦!发邮件报警给运维");
                    }
                    // log.info("塞进队列成功!");
                });
        pool.allowCoreThreadTimeOut(true);
    }

    public static ThreadPoolExecutor getThreadFactoryUtilInstance() {
        if (null == pool) {
            synchronized (ThreadFactoryUtil.class) {
                if (pool == null) {
                    new ThreadFactoryUtil();
                    return pool;
                }
            }
        }
        return pool;
    }
}
  1. 计算分片与多线程同步处理方法
private void splitChunkSyncThreads(SyncInfo syncInfo, String key, ThreadPoolExecutor exec) {
        String fileUploadId = get32Uuid();
        String cephPath = syncInfo.getCephPath();
        Integer syncId = syncInfo.getSyncId();
        long length = syncInfo.getFilesize();
        Integer fileType = syncInfo.getFileType();
        try {
            //获取对象指定范围的流片段
            long packageLength = chunk;
            long pages = length / chunk;
            long leftLength = length % chunk;
            long pos = 0;
            long end = chunk - 1;
            long partNumber = 0;
            long partTotalNums = pages;
            if (leftLength > 0) {
                partTotalNums = pages + 1;
                pages = pages + 1;
            }
            String postUrl = "*****";
            Integer status;
            String msg = "";
            List<DataDealDTO> list = new ArrayList<>();
            for (long i = 0; i < pages; i++) {
                pos = i * packageLength;
                end = (i + 1) * packageLength - 1;
                partNumber = i + 1;
                if (end > length - 1) {
                    end = length - 1;
                    packageLength = end - pos + 1;
                }
                DataDealDTO dataDealDTO = new DataDealDTO();
                dataDealDTO.setPos(pos);
                dataDealDTO.setEnd(end);
                dataDealDTO.setPartNumber(partNumber);
                dataDealDTO.setPartTotalNums(partTotalNums);
                dataDealDTO.setPackageLength(packageLength);
                dataDealDTO.setBatchNo(fileUploadId);
                list.add(dataDealDTO);
                try {
                    if ((list.size() == threadNum && partTotalNums > threadNum) || i + 1 == partTotalNums) {
                        Future<Boolean> future = null;
                        for (DataDealDTO dealDTO : list) {
                            Callable<Boolean> task = () -> {
                                DataDealResultDTO dataDealResultDTO = dealSyncTask(postUrl, dealDTO, syncInfo);
                                if (WebConstants.SYNC_COMPLETE == dataDealResultDTO.getCode()) {
                                            dataDealResultDTO.getUrl().toString(), length, fileType);
                                }
                                if (!dataDealResultDTO.getUploadSuccess()) {
                                    throw new Exception(dataDealResultDTO.getMsg());
                                }
                                return true;
                            };
                            // 提交任务并保存结果
                            future = exec.submit(task);
                        }
                        // 等待任务执行完毕,获取执行结果
                        future.get();
                        list.clear();
                    }
                } catch (Exception e) {
                    this.abortMultUpload(syncInfo.getSyncClusterDestInfo(), syncInfo.getTargetObjectKey(), fileUploadId);
                    break;
                }
            }
        } catch (Exception e) {
            //发生异常时,在同步接收端删除ceph中已经上传的部分分片
            this.abortMultUpload(syncInfo.getSyncClusterDestInfo(), syncInfo.getTargetObjectKey(), fileUploadId);
        }
    }
  1. ceph分片获取流重试方法
public InputStream getChunkWithRetry(SyncInfo syncInfo, long pos, long end) throws Exception {
        int retryCount = 0;
        while (retryCount < 3) {
            try {
             	final AmazonS3 amazonS3 = RgwClient.getS3Client(cephBaseInfo.getAccessKey(), cephBaseInfo.getSecretKey(),
                    cephBaseInfo.getEndPoint());
            	GetObjectRequest rangeObjectRequest = new GetObjectRequest(cephBaseInfo.getBucketName(), fileName).withRange(start, end);
            	S3Object objectPortion = amazonS3.getObject(rangeObjectRequest);
                InputStream inputStream = objectPortion.getObjectContent();
                if (inputStream != null) {
                    return inputStream;
                } else {
                    throw new Exception("inputStream is null");
                }
            } catch (Exception e) {
                retryCount++;
                if (retryCount == 3) {
                    throw e;
                }
            }
        }
        return null;
    }
  1. 请求工具类
    发起端与接收端使用短连接,防止断连问题
public class OkHttpUtil {

    private static final Logger logger = LoggerFactory.getLogger(OkHttpUtil.class);

    private static final MediaType FROM_DATA = MediaType.parse("multipart/form-data");

    private final static int READ_TIMEOUT = 300;

    private final static int CONNECT_TIMEOUT = 300;

    private final static int WRITE_TIMEOUT = 300;

    private static volatile OkHttpClient okHttpClient;

    private OkHttpUtil() {
        OkHttpClient.Builder clientBuilder = new OkHttpClient.Builder();
        //读取超时
        clientBuilder.readTimeout(READ_TIMEOUT, TimeUnit.SECONDS);
        //连接超时
        clientBuilder.connectTimeout(CONNECT_TIMEOUT, TimeUnit.SECONDS);
        //写入超时
        clientBuilder.writeTimeout(WRITE_TIMEOUT, TimeUnit.SECONDS);
        //自定义连接池最大空闲连接数和等待时间大小,否则默认最大5个空闲连接
        clientBuilder.connectionPool(new ConnectionPool(32, 5, TimeUnit.MINUTES));
        clientBuilder.retryOnConnectionFailure(true); // 自动重连设置为true
        clientBuilder.addInterceptor(new RetryInterceptor(3)); //重试拦截器3次
        okHttpClient = clientBuilder.build();
    }

    public static OkHttpClient getInstance() {
        if (null == okHttpClient) {
            synchronized (OkHttpUtil.class) {
                if (okHttpClient == null) {
                    new OkHttpUtil();
                    return okHttpClient;
                }
            }
        }
        return okHttpClient;
    }
    
    /**
     * 调用okhttp的newCall方法
     *
     * @param request
     * @return
     */
    private static String execNewCall(Request request) {
        Response response = null;
        try {
            // 通过单例创建okhttpclient
            OkHttpClient okHttpClient = getInstance();
            response = okHttpClient.newCall(request).execute();
            ResponseBody body = response.body();
            if (response.isSuccessful()) {
                return body == null ? "" : body.string();
            } else {
                logger.error("error,statusCode={},body={}", response.code(), body == null ? "" : body.string());
            }
        } catch (IllegalStateException e) {
            logger.error("okhttp3 put connectTime out error >> ex = {}", ExceptionUtils.getStackTrace(e));
            e.printStackTrace();
        } catch (Exception e) {
            logger.error("okhttp3 put error >> ex = {}", ExceptionUtils.getStackTrace(e));
            e.printStackTrace();
        } finally {
            if (response != null) {
                response.close();
            }
        }
        return "";
    }
    
    /**
     * Post请求发送JSON数据....{"name":"zhangsan","pwd":"123456"}
     * 参数一:请求Url
     * 参数二:请求的JSON
     * 参数三:inputstream
     */
    public static String postJsonParamsInputStream(String url, Map<String, String> params, InputStream inputStream, String fileName) throws IOException {
        RequestBody requestBody = RequestBody.create(FileUtil.inputStreamToByteArray(inputStream), MediaType.parse("application/octet-stream"));
        //构建多部件builder
        MultipartBody.Builder bodyBuilder = new MultipartBody.Builder().setType(FROM_DATA);
        //添加参数
        if (params != null && params.keySet().size() > 0) {
            for (String key : params.keySet()) {
                bodyBuilder.addFormDataPart(key, params.get(key));
            }
        }
        bodyBuilder.addFormDataPart("file", fileName, requestBody);
        Request request = new Request.Builder()
                .url(url)
                .post(bodyBuilder.build())
                .addHeader("Connection", "close")
                .addHeader("Accept-Encoding", "identity")
                .build();
        return execNewCall(request);
    }
}

4.2 同步接收端

  1. 接受发起端的流分片,并最终做合并处理
public ResponseEntity<Result> uploadChunkInputeStreamToCeph(SyncChunkClusterInternalReq req, ThreadPoolExecutor exec) {
        String bucketName = req.getTargetBucketName();
        String s3Path = req.getObjectKey();
        // 创建client实例
        CephBaseInfo cephBaseInfo = new CephBaseInfo();
        cephBaseInfo.setAccessKey(req.getTargetAccessKey());
        cephBaseInfo.setEndPoint(endPoint);
        cephBaseInfo.setSecretKey(req.getTargetSecretKey());
        AmazonS3 s3 = new AmazonS3ServiceBuilder().amazonS3(cephBaseInfo);
        try {
            InitiateMultipartUploadRequest initRequest = new InitiateMultipartUploadRequest(bucketName, s3Path);
            InitiateMultipartUploadResult initResponse = s3.initiateMultipartUpload(initRequest);
            String batchNo = req.getBatchNo();
            String uploadId = uploadMap.computeIfAbsent(batchNo, key -> initResponse.getUploadId());
            Callable<Boolean> task = () -> {
                // 第一步,初始化
                log.info("开始上传:{}", uploadId);
                InputStream inputStream = req.getFile().getInputStream();
                UploadPartRequest uploadRequest = new UploadPartRequest()
                        .withBucketName(bucketName)
                        .withKey(s3Path)
                        .withUploadId(uploadId)
                        .withPartNumber(req.getPartNumber())
                        .withInputStream(inputStream)
                        .withPartSize(req.getPartSize());
                // 第二步,上传分段
                try {
                    UploadPartResult uploadPartResult = s3.uploadPart(uploadRequest);
                    if (uploadPartResult != null && org.apache.commons.lang3.StringUtils.isNotBlank(uploadPartResult.getETag())) {
                        return true;
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                    log.error("upload file to ceph error", e);
                    throw new Exception(e.getMessage());
                }
                return false;
            };
            // 提交任务并保存结果
            Future<Boolean> future = exec.submit(task);
            // 等待任务执行完毕,获取执行结果
            Boolean execResult = future.get();
            if (execResult) {
                String returnMessage = "上传文件名为" + s3Path + "的分片序号:" + req.getPartNumber() + "上传成功";
                String key = DATA_SYNC_BATCH_PREFIX + req.getBatchNo();
                redisUtil.sSet(key, req.getPartNumber());
                long uploadPartNum = redisUtil.sGetSetSize(key);
                if (uploadPartNum != req.getPartTotalNums()) {
                    DataDealResultDTO dataDealResultDTO = new DataDealResultDTO();
                    dataDealResultDTO.setCode(WebConstants.SYNC_CHUNK_SUCCESS);
                    dataDealResultDTO.setMsg(returnMessage);
                    dataDealResultDTO.setUploadSuccess(Boolean.TRUE);
                    return ResponseEntity.ok(Result.success(dataDealResultDTO));
                }
                try {
                    String uploadIdc = uploadMap.get(req.getBatchNo());
                    List<PartETag> partETags = this.getAllUploadedParts(s3Path, bucketName, uploadIdc, s3);
                    log.info("complete multipart upload ==================");
                    //完成上传,合并分段
                    CompleteMultipartUploadRequest compRequest = new CompleteMultipartUploadRequest(bucketName, s3Path, uploadIdc, partETags);
                    s3.completeMultipartUpload(compRequest);
                    uploadMap.remove(req.getBatchNo());
                    // 删除缓存redis数据
                    redisUtil.del(key);
                    DataDealResultDTO dataDealResultDTO = new DataDealResultDTO();
                    dataDealResultDTO.setCode(WebConstants.SYNC_COMPLETE);
                    dataDealResultDTO.setMsg("文件合并完成");
                    dataDealResultDTO.setUrl(url);
                    dataDealResultDTO.setUploadSuccess(Boolean.TRUE);
                    return ResponseEntity.ok(Result.success(dataDealResultDTO));
                } catch (Exception e) {
                    ReturnCode returnCode = e.getMessage().equals("QuotaExceeded") ? ReturnCode.BUCKET_SPACE_NOT_ENOUGH
                            : e.getMessage().equals("Forbidden") ? ReturnCode.CEPH_LOGIN_AUTH_ERROR
                            : e.getMessage().equals("NoSuchBucket") ? ReturnCode.BUCKET_NAME_NOT_EXIST : ReturnCode.UPLOAD_FAILED;
                    return ResponseEntity.ok(Result.failure(returnCode.getCode(), returnCode.getError()));
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
            s3.abortMultipartUpload(new AbortMultipartUploadRequest(bucketName, s3Path, uploadMap.get(req.getBatchNo())));
            log.error("Failed to upload, " + e.getMessage());
            ReturnCode returnCode = e.getMessage().contains("QuotaExceeded") ? ReturnCode.BUCKET_SPACE_NOT_ENOUGH
                    : e.getMessage().contains("Forbidden") ? ReturnCode.CEPH_LOGIN_AUTH_ERROR
                    : e.getMessage().contains("NoSuchBucket") ? ReturnCode.BUCKET_NAME_NOT_EXIST : ReturnCode.UPLOAD_FAILED;
            return ResponseEntity.ok(Result.failure(returnCode.getCode(), returnCode.getError()));
        }
        return ResponseEntity.ok(Result.failure(ReturnCode.UPLOAD_FAILED.getCode(), ReturnCode.UPLOAD_FAILED.getError()));
    }
  1. 发生异常时删除接收端已上传的分片
 public void abortMultUpload(AbortMultUploadReq abortMultUploadReq) {
        try {
            CephBaseInfo cephBaseInfo = new CephBaseInfo(endPoint, abortMultUploadReq.getBucketName(), abortMultUploadReq.getAccessKey(), abortMultUploadReq.getSecretKey());
            AmazonS3 s3 = new AmazonS3ServiceBuilder().amazonS3(cephBaseInfo);
            s3.abortMultipartUpload(new AbortMultipartUploadRequest(abortMultUploadReq.getBucketName(), abortMultUploadReq.getPath(),
                    uploadMap.get(abortMultUploadReq.getBatchNo())));
            //删除分片缓存
            String key = DATA_SYNC_BATCH_PREFIX + abortMultUploadReq.getBatchNo();
            redisUtil.del(key);
        } catch (Exception e) {
            log.error("中止分片上传失败【{}】", e);
        }
    }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值