需求描述:从SFTP读取csv文件写入本地数据库中
1. SFTP服务器配置
- application.yml
sftp:
host: 127.0.0.1
passWord: 123456
port: 22
userName: jack
2. 代码实现
- 依赖
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
</dependency>
- SftpClient.java
package com.gigrt.framework.client;
import cn.hutool.core.collection.CollUtil;
import com.gigrt.common.exception.FileSyncException;
import com.gigrt.framework.config.SftpProperties;
import com.jcraft.jsch.*;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import javax.validation.constraints.NotNull;
import java.io.*;
import java.util.*;
@Slf4j
@Component
public class SftpClient {
private static final String SLASH = "/";
private static final String DOT = ".";
private static final String DOUBLE_DOT = "..";
private ChannelSftp sftp = null;
private Session sshSession = null;
@Autowired
SftpProperties sftpProperties;
/**
* 连接sftp服务器
*/
private void connect() {
log.info("ftp连接开始host=" + sftpProperties.getHost() + "port" + sftpProperties.getPort() + "username=" + sftpProperties.getUserName());
JSch jsch = new JSch();
try {
jsch.getSession(sftpProperties.getUserName(), sftpProperties.getHost(), sftpProperties.getPort());
sshSession = jsch.getSession(sftpProperties.getUserName(), sftpProperties.getHost(), sftpProperties.getPort());
log.info("ftp---Session created.");
sshSession.setPassword(sftpProperties.getPassWord());
Properties properties = new Properties();
properties.put("StrictHostKeyChecking", "no");
sshSession.setConfig(properties);
sshSession.connect();
log.info("ftp---Session connected.");
Channel channel = sshSession.openChannel("sftp");
channel.connect();
log.info("Opening Channel.");
sftp = (ChannelSftp) channel;
log.info("ftp---Connected to " + sftpProperties.getHost());
} catch (JSchException e) {
log.error("sftp connect异常", e);
throw new FileSyncException("connect异常");
}
}
private void disconnect() {
if (this.sftp != null && this.sftp.isConnected()) {
this.sftp.disconnect();
this.sftp = null;
log.info("sftp is closed already");
}
if (this.sshSession != null && this.sshSession.isConnected()) {
this.sshSession.disconnect();
this.sshSession = null;
log.info("sshSession is closed already");
}
}
/**
* 下载单个文件
*/
public byte[] downloadFile(String filePath) {
log.info("sftp下载文件 filePath{} 开始", filePath);
connect();
try (
InputStream inputStream = sftp.get(filePath);
ByteArrayOutputStream swapStream = new ByteArrayOutputStream();
) {
//进入FTP服务器文件目录
log.info("DownloadFile:{},success from sftp", filePath);
log.info("ftp下载文件结束");
byte[] buff = new byte[1024];
int rc = 0;
while ((rc = inputStream.read(buff, 0, 1024)) > 0) {
swapStream.write(buff, 0, rc);
}
// in_b为转换之后的结果
byte[] in_b = swapStream.toByteArray();
return in_b;
} catch (SftpException | IOException e) {
log.error("sftp下载文件失败");
throw new FileSyncException("ftp下载文件失败");
} finally {
disconnect();
}
}
public void uploadFile(byte[] data, String directory, String remoteFileName) {
log.info("sftp 上传文件{} 开始", remoteFileName);
connect();
try (InputStream is = new ByteArrayInputStream(data)) {
this.createDirs(directory, sftp);
sftp.put(is, remoteFileName);
} catch (SftpException | IOException e) {
log.error("sftp upload error", e);
throw new FileSyncException("sftp upload error");
} finally {
disconnect();
}
}
/**
* 列出文件
*
* @param directory 目录
* @return 文件列表
*/
public List<String> listFiles(String directory, String matchRule) {
log.info("sftp listFiles 目录是:{} 开始,matchRule", directory);
connect();
try {
Vector files = sftp.ls(directory);
if (CollUtil.isNotEmpty(files)) {
List<String> list = new ArrayList<>(files.size());
for (Object object : files) {
ChannelSftp.LsEntry file = (ChannelSftp.LsEntry) object;
log.info("fileName:{}", file.getFilename());
//文件名需包含后缀名以及排除 .和 ..
boolean match = file.getFilename().contains(matchRule);
if (!DOT.equals(file.getFilename()) && !DOUBLE_DOT.equals(file.getFilename())
&& file.getFilename().contains(DOT) && match) {
String newFilePath = directory.endsWith(SLASH) ? directory + file.getFilename() : directory + SLASH + file.getFilename();
list.add(newFilePath);
}
}
return list;
}
} catch (SftpException e) {
log.error("sftp list files error", e);
throw new FileSyncException("sftp list files error");
} finally {
disconnect();
}
return Collections.emptyList();
}
/**
* 列出文件
*
* @param directory 目录
* @return 文件列表
*/
public List<String> listFiles(String directory) {
log.info("sftp listFiles 目录是:{} 开始,matchRule", directory);
connect();
try {
Vector files = sftp.ls(directory);
if (CollUtil.isNotEmpty(files)) {
List<String> list = new ArrayList<>(files.size());
for (Object object : files) {
ChannelSftp.LsEntry file = (ChannelSftp.LsEntry) object;
log.info("fileName:{}", file.getFilename());
// 排除 .和 ..
if (!DOT.equals(file.getFilename()) && !DOUBLE_DOT.equals(file.getFilename())
&& file.getFilename().contains(DOT)) {
String newFilePath = directory.endsWith(SLASH) ? directory + file.getFilename() : directory + SLASH + file.getFilename();
list.add(newFilePath);
}
}
return list;
}
} catch (SftpException e) {
log.error("sftp list files error", e);
throw new FileSyncException("sftp list files error");
} finally {
disconnect();
}
return Collections.emptyList();
}
/**
* 创建SFTP目录,如果目录不存在就创建
*
* @param dirPath 源目录
* @param sftp sftp
*/
private void createDirs(@NotNull String dirPath, ChannelSftp sftp) {
String[] dirs = Arrays.stream(dirPath.split("/")).filter(StringUtils::isNotBlank).toArray(String[]::new);
for (String dir : dirs) {
try {
sftp.cd(dir);
log.info("Change directory {}", dir);
} catch (Exception e) {
try {
sftp.mkdir(dir);
log.info("Create directory {}", dir);
} catch (SftpException e1) {
log.error("Create directory failure, directory:{}", dir, e1);
throw new FileSyncException("Create directory failure");
}
try {
sftp.cd(dir);
log.info("Change directory {}", dir);
} catch (SftpException e1) {
log.error("Change directory failure, directory:{}", dir, e1);
throw new FileSyncException("Create directory failure");
}
}
}
}
}
- CsvHelper.java
package com.gigrt.framework.helpers;
import com.gigrt.common.exception.FileSyncException;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import java.io.*;
import java.util.*;
import java.util.stream.Collectors;
@Slf4j
public class CsvHelper {
private CsvHelper() {}
public static CsvHelper getHelper() {
return new CsvHelper();
}
/**
* 读取csv文件,并转换成List<Map<String, String>>
*
* @param data 数据文件
* @param limit 限制行数 -1 表示不限制
* @param headerLine headerLine
* @return 转换map 对象
*/
public List<Map<String, Object>> read(byte[] data, Integer limit, Integer headerLine) {
// 读取文件参数设置,根据实际情况设置
CSVFormat format =
CSVFormat.Builder.create()
.setDelimiter(',')
.setRecordSeparator("\r\n")
.setIgnoreEmptyLines(true)
.setAllowDuplicateHeaderNames(true)
.build();
try (CSVParser parser = format.parse(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(data))))) {
return extractDataWithHeader(parser, limit, headerLine);
} catch (Exception e) {
log.error("csv读取错误!", e);
throw new FileSyncException("csv读取错误!");
}
}
/**
* 这里处理了一种常见情况,正常字段中有换行符,就是一条数据被分成两条,这里的处理是反手拼接起来
*/
private List<Map<String, Object>> extractDataWithHeader(CSVParser parser, int limit, int headerLine) {
final List<Map<String, Object>> result = new LinkedList<>();
List<String> headers = new ArrayList<>();
for (CSVRecord row : parser) {
final long rowNum = row.getRecordNumber();
if (rowNum == headerLine) {
headers = row.stream().collect(Collectors.toList());
continue;
}
result.add(setRowRecord(headers, row));
// 限制行数
if (limit >= 0 && rowNum > limit) {
return result;
}
}
return result;
}
private Map<String, Object> setRowRecord(List<String> headers, CSVRecord row) {
Map<String, Object> rowRecord = new HashMap<>(headers.size());
for (int i = 0; i < headers.size(); i++) {
if (i < row.size()) {
rowRecord.put(headers.get(i), row.get(i));
}
}
return rowRecord;
}
}
- Test.java
说明:fileStrategy只是对SftpClient的封装, 这里忽略
@Test
public void test04() throws IOException {
List<String> strings = fileStrategy.listFiles("/ETL/FILE/");
System.out.println(strings);
strings.forEach(file -> {
try {
byte[] bytes = fileStrategy.downloadFile(file);
System.out.println(bytes);
List<Map<String, Object>> result = CsvHelper.getHelper().read(bytes, -1, 1);
System.out.println(result);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}
3. 实现逻辑
1. 首先从ftp读取到文件:sftp.get(filePath)
2. 将读取的文件转换为byte[], 供系统需要时使用.
3. 封装工具类CsvHelper, 用于转换byte[]为可读类型, 例如:List<Map<String, Object>>