背景
- java定时任务。
- 定时ftp到多个目标目录,扫描获取文本文件。
- 解析整理文本数据,存入到同一张数据表中。
- 删除相关旧数据
- 其中各个文本文件之间有如下特点:
- 任一文本文件中一行即一条数据,通过分隔符分列解析入库
- 各个文本的编码 / 分隔符 / 列顺序 / 列数量 不统一
- 相同前缀文件名的文本 分隔符/列顺序/列数量 相同
要点
- 解耦 提取 封装
- ftp获取文件业务、文件文件数据对应表列业务、数据入库业务
示例
ftp获取文本文件
String localFileAbsPath=commMap.get("ftp_downPath")+ File.separator+file_prefixes[k]+".csv";
String fromFileName=commMap.get("ftp_file_path")+File.separator+file_prefixes[k]+
DateFormatUtils.format(DateUtils.addDays(new Date(), -1), "yyyyMMdd")+"0000.csv";
if(!getFileFromSftp(commMap,fromFileName)){
log.error("ftp方式获取文件失败--"+fromFileName);
continue;
}
private boolean getFileFromSftp(Map<String, String> commMap, String fromFileName){
SFTPUtil2 sftp2Util = new SFTPUtil2(commMap.get("ftp_ip"), Integer.parseInt(commMap.get("ftp_port")),
20000, commMap.get("ftp_user"), commMap.get("ftp_passwd"));
boolean downSucc=false;
if(sftp2Util.login()){
downSucc=sftp2Util.downloadFile2(fromFileName, commMap.get("ftp_downPath"));
String msg=(downSucc? "已获取到ftp文件:":"down file error:")+fromFileName;
log.info(msg);
sftp2Util.logout();
}else{
log.info("ftp_login_error");
}
return downSucc;
}
文本数据整理步骤1
String[] keys={"station_name|ne_name","","city","county","lac","update_time"};
List<Map<String, Object>> mapList = FileUtils.readTxtFileNotNull(localFileAbsPath, keys, "\\|");
public static List<Map<String,Object>> readTxtFileNotNull(String filePath, String[] keys, String regex) throws IOException {
List<Map<String,Object>> resultList= new ArrayList<>();
File file = new File(filePath);
if(file.isFile() && file.exists()){
System.out.println("开始读取文件:"+filePath);
String lineTxt = null;
try (
InputStreamReader read = new InputStreamReader(new FileInputStream(file),getFileCharsetName(filePath));
){
BufferedReader bufferedReader = new BufferedReader(read);
while((lineTxt = bufferedReader.readLine()) != null){
String[] split = lineTxt.split(regex);
Map<String,Object> map = new HashMap<>();
for(int i=0;i<keys.length;i++){
if(!"".equals(keys[i])){
String[] keyss = keys[i].split("\\|");
for (String s : keyss) {
map.put(s,split[i]);
}
}
}
resultList.add(map);
}
} catch (Exception e) {
System.out.println("读取文件内容出错"+lineTxt);
e.printStackTrace();
throw e;
}
}else{
System.out.println("找不到指定的文件"+filePath);
}
return resultList;
}
public static String getFileCharsetName(String fileName) throws IOException {
try(
InputStream inputStream = new FileInputStream(fileName);
){
byte[] head = new byte[3];
inputStream.read(head);
String charsetName = "GBK";
if (head[0] == -1 && head[1] == -2 )
charsetName = "UTF-16";
else if (head[0] == -2 && head[1] == -1 )
charsetName = "Unicode";
else if(head[0]==-27 && head[1]==-101 && head[2] ==-98)
charsetName = "UTF-8";
else if(head[0]==-17 && head[1]==-69 && head[2] ==-65)
charsetName = "UTF-8";
return charsetName;
}
}
文本数据整理步骤2
List<BaseStation> domainList=mapList2DomainList(mapList,domain,net_workg,creatTime);
private List<BaseStation> mapList2DomainList(List<Map<String, Object>> mapList, String domain, String netWork,Date creatTime ) {
LinkedList<BaseStation> domainList = new LinkedList<>();
for(int i=0;i<mapList.size();i++){
Map<String, Object> domainMap = mapList.get(i);
BaseStation baseStationDomain = new BaseStation();
baseStationDomain.setStationName(obj2Str(domainMap.get("station_name")));
baseStationDomain.setNeName(obj2Str(domainMap.get("ne_name")));
。。。
private String obj2Str(Object str){
return str==null|| StringUtils.isEmpty((String)str) ? "":(String)str;
}
数据批量入库
List<BaseStation> domainList=mapList2DomainList(mapList,domain,net_workg,creatTime);
for (int i=0,j=0,betw=1000; i < domainList.size(); i+=betw) {
j=i+betw;
if(j<domainList.size()) {
baseStationMapper.insertBatch(domainList.subList(i, j));
}else{
baseStationMapper.insertBatch(domainList.subList(i, domainList.size()));
break;
}
}
baseStationMapper.deleteDomainOldData(domain,net_workg,creatTime);
log.info(net_workg+"更新数据库数据完成--"+mapList.size());
<insert id="insertBatch" >
INSERT INTO base_station_test
(
`station_name`, `ne_name`, `city`, `county`,`lac`,`update_time`
) VALUES
<foreach collection ="list" item="info" separator="," >
(
#{info.stationName},#{info.neName},#{info.city},#{info.county},#{info.lac},#{info.updateTime}
)
</foreach>
</insert>
<delete id="deleteDomainOldData">
<if test="lac != null and updateTime != null">
delete from base_station_test where lac=#{lac} update_time != #{updateTime}
</if>
</delete>