java_threadpool&redistemplate多线程发送文件
文章目录
背景
遍历某个目录下所有的文件,调用上游服务提供的发送文件接口进行文件上传,上传的时候还需要计算文件的md5码作为请求头参数,如果上传成功就删除源文件;失败则下次继续发送。
分析
1.考虑到目录下的文件数量很多,遍历文件的方法使用NIO的Files.walk
2.考虑发送效率使用多线程threadpool和发送连接池化
3.为了避免重复发送,使用juc的countdownlatch和本地对象锁保证数据一致性
实现
1.目录文件md5计算和文件名获取
package com.gw.utils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.digest.DigestUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* 操作文件工具类
*/
@Slf4j
public class MyFileUtils {
/**
* 获取目录下指定数量的文件路径
* @param localPath 文件路径
* @param fileMaxCount 每次获取的最大数量
* @return List<String> 目录下的所有文件名
*/
public static List<String> getFilePath(String localPath, int fileMaxCount) {
List<String> filePathList = new ArrayList<>();
if (new File(localPath).exists()) {
try (Stream<Path> walk = Files.walk(Paths.get(localPath))) {
filePathList = walk.filter(Files::isRegularFile).limit(fileMaxCount)
.map(path -> path.getParent() + File.separator + path.getFileName().toString())
.collect(Collectors.toList());
} catch (IOException e) {
log.error("获取文件路径错误", e);
}
}
return filePathList;
}
/**
* 计算md5
* @param filePath 文件路径
* @return String md5值
*/
public static String getFileMd5(String filePath) {
String md5;
try (FileInputStream fileInputStream = new FileInputStream(filePath)) {
md5 = DigestUtils.md5Hex(fileInputStream);
} catch (Exception e) {
return null;
}
return md5;
}
}
package com.gw.filesend.task.md5;
import com.google.common.collect.Lists;
import com.gw.utils.MyFileUtils;
import com.pojo.constant.RuntimeConstants;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.io.File;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Slf4j
@Component
public class FileMd5Task {
@Value("${system.uploadFilePath}")
private String uploadFilePath;
@Value("${system.fileMaxCount}")
private int fileMaxCount;
@Scheduled(cron = "${scheduling.FileMd5.cron}")
public void calculate() {
if (RuntimeConstants.fileMd5Sign) {
// 获取指定多个目录下的文件路径
List<String> filePathList = getFilePathList();
// 计算md5Map
RuntimeConstants.FILE_MD5_MAP.putAll(getFileMd5Map(filePathList));
RuntimeConstants.fileMd5Sign = false;
}
}
private Map<String, String> getFileMd5Map(List<String> filePathList) {
HashMap<String, String> map = new HashMap<>(filePathList.size());
filePathList.forEach(filePath -> map.put(filePath, MyFileUtils.getFileMd5(filePath)));
return map;
}
private List<String> getFilePathList() {
List<String> filePathList = Lists.newArrayList();
RuntimeConstants.dirList.forEach(dirName -> {
String dirPath = uploadFilePath.concat(File.separator).concat(dirName);
filePathList.addAll(MyFileUtils.getFilePath(dirPath, fileMaxCount));
});
return filePathList;
}
}
2.restemplate封装和httpclient连接池集成
package com.gw.utils.config;
import com.gw.utils.restClient.RestTemplateUtils;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.http.client.ClientHttpRequestFactory;
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
import org.springframework.web.client.RestTemplate;
/**
* commons配置类
*/
@Configuration
public class CommonsConfiguration {
@Bean
public RestTemplateUtils getRestTemplateUtils(RestTemplate restTemplate) {
return new RestTemplateUtils(restTemplate);
}
@Bean
public RestTemplate getRestTemplate(ClientHttpRequestFactory httpRequestFactory) {
return new RestTemplate(httpRequestFactory);
}
@Bean
public ClientHttpRequestFactory httpRequestFactory(HttpClient httpClient) {
HttpComponentsClientHttpRequestFactory factory = new HttpComponentsClientHttpRequestFactory(httpClient);
factory.setBufferRequestBody(false);
return factory;
}
@Bean
public HttpClient getHttpClient() {
Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
.register("http", PlainConnectionSocketFactory.getSocketFactory())
.register("https", SSLConnectionSocketFactory.getSocketFactory())
.build();
PoolingHttpClientConnectionManager connectionManager = new PoolingHttpClientConnectionManager(registry);
connectionManager.setMaxTotal(300);
connectionManager.setDefaultMaxPerRoute(300);
RequestConfig requestConfig = RequestConfig.custom()
.setSocketTimeout(1000 * 180)
.setConnectTimeout(1000 * 30)
.setConnectionRequestTimeout(3000)
.build();
return HttpClientBuilder.create().setDefaultRequestConfig(requestConfig).setConnectionManager(connectionManager).build();
}
}
package com.gw.utils.restClient;
import org.springframework.core.io.FileSystemResource;
import org.springframework.http.*;
import org.springframework.util.LinkedMultiValueMap;
import org.springframework.util.MultiValueMap;
import org.springframework.web.client.RestTemplate;
import java.io.File;
public class RestTemplateUtils {
private RestTemplate restTemplate;
public RestTemplateUtils(RestTemplate restTemplate) {
this.restTemplate = restTemplate;
}
/**
* post,get,delete,put请求
*
* @param url 请求路径
* @param httpHeaders 请求头参数
* @param methodType 请求类型
* @return ResponseEntity<String> 响应信息
*/
public ResponseEntity<String> requestByHttpType(String url, String data, HttpHeaders httpHeaders, HttpMethod methodType) {
HttpEntity<String> entity = new HttpEntity<>(data, httpHeaders);
return restTemplate.exchange(url, methodType, entity, String.class);
}
/**
* 上传文件
*
* @param url 请求路径
* @param filePath 文件路径
* @param httpHeaders 请求头参数
* @return ResponseEntity<String> 响应信息
*/
public ResponseEntity<String> uploadFile(String url, String filePath, HttpHeaders httpHeaders) {
File file = new File(filePath);
httpHeaders.set("Content-Type", MediaType.MULTIPART_FORM_DATA_VALUE.concat(";charset=UTF-8"));
FileSystemResource resource = new FileSystemResource(file);
LinkedMultiValueMap<String, Object> form = new LinkedMultiValueMap<>();
form.add("file", resource);
HttpEntity<MultiValueMap<String, Object>> files = new HttpEntity<>(form, httpHeaders);
return restTemplate.postForEntity(url, files, String.class);
}
}
3.多线程文件发送
package com.gw.filesend.config;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.concurrent.*;
@Configuration
public class ThreadPoolConfig {
/**
* 创建发送文件的线程池
* @return ExecutorService 发送文件线程池
*/
@Bean
public ExecutorService getThreadPool() {
ThreadFactory factory = new ThreadFactoryBuilder().setNameFormat("sendFile-pool-%d").build();
return new ThreadPoolExecutor(8, 16, 20L, TimeUnit.SECONDS,
new LinkedBlockingDeque<>(1000), factory, new ThreadPoolExecutor.AbortPolicy());
}
}
package com.gw.filesend.task.file;
import com.google.common.collect.Maps;
import com.gw.filesend.service.SendFileService;
import com.pojo.constant.RuntimeConstants;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.time.Duration;
import java.time.Instant;
import java.util.Map;
import java.util.concurrent.ExecutorService;
@Slf4j
@Component
public class SendFileTask {
@Resource
private ExecutorService executorService;
@Resource
private SendFileService sendFileService;
@Scheduled(cron = "${scheduling.SendFile.cron}")
public void job() {
try {
// 锁住FileMd5Map,防止并发修改
Map<String, String> currentFileMd5Map;
synchronized (RuntimeConstants.FILE_MD5_MAP) {
// 深拷贝
currentFileMd5Map = Maps.newHashMap(RuntimeConstants.FILE_MD5_MAP);
int fileCount = currentFileMd5Map.size();
Instant startTime = Instant.now();
log.info("{}个文件开始发送,文件md5Map为{}", fileCount, new Object[]{currentFileMd5Map});
sendFileService.send(currentFileMd5Map, executorService);
Instant endTime = Instant.now();
log.info("{}个文件发送结束,耗时{}秒", fileCount, Duration.between(startTime, endTime).toMillis()/1000);
RuntimeConstants.FILE_MD5_MAP.clear();
RuntimeConstants.fileMd5Sign = true;
}
} catch (Exception e) {
log.error("发送文件失败", e);
}
}
}
package com.gw.filesend.service;
import com.gw.utils.restClient.RestTemplateUtils;
import com.pojo.constant.UrlConstants;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.HttpHeaders;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.io.File;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
@Slf4j
@Service
public class SendFileService extends CommonService {
@Resource
private RestTemplateUtils restTemplateUtils;
public void send(Map<String, String> currentFileMd5Map, ExecutorService executorService) {
CountDownLatch countDownLatch = new CountDownLatch(currentFileMd5Map.size());
currentFileMd5Map.forEach((filePath, fileMd5) ->
executorService.execute(() -> {
try {
HttpHeaders httpHeaders = new HttpHeaders();
httpHeaders.set("md5sum", fileMd5);
ResponseEntity<String> responseEntity = restTemplateUtils.uploadFile(getUrlPath(UrlConstants.file_upload), filePath, httpHeaders);
// 发送成功,删除文件
if (responseEntity.getStatusCodeValue() == 200) {
boolean result = new File(filePath).delete();
log.info("源文件{}发送成功,删除源文件{}", filePath, result ? "成功" : "失败");
} else {
log.info("源文件{}发送失败", filePath);
}
} catch (Exception e) {
log.error("异常线程记录", e);
} finally {
countDownLatch.countDown();
}
})
);
try {
countDownLatch.await();
} catch (InterruptedException e) {
log.error("数据文件发送等待响应超时", e);
}
}
}
4.依赖管理
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>data-transfer</artifactId>
<packaging>pom</packaging>
<version>1.0-SNAPSHOT</version>
<modules>
<module>gw-commons</module>
<module>data-pojo</module>
<module>file-send</module>
</modules>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<springboot.version>2.5.2</springboot.version>
<fastjson.version>1.2.83</fastjson.version>
<lombok.version>1.18.8</lombok.version>
<java.version>1.8</java.version>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<commons-lang3.version>3.8.1</commons-lang3.version>
<commons-io.version>2.5</commons-io.version>
<guava.version>20.0</guava.version>
<commons-codec.version>1.15</commons-codec.version>
<httpcomponents.verion>4.5.7</httpcomponents.verion>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-dependencies</artifactId>
<version>${springboot.version}</version>
<scope>import</scope>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>${commons-io.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.version}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>${commons-lang3.version}</version>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>${commons-codec.version}</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>${httpcomponents.verion}</version>
</dependency>
</dependencies>
</dependencyManagement>
</project>
总结
通过多线程和http连接池的使用,和串行相比,1000个文件,一个需要10s,也就是10000s;使用之后全部只需要60s,效率提升明显。(测试场景的线程数开到200,系统cpu占用不到2%,io也没负载,线程池的参数调优待研究)