java读取txt文件并统计重复行

环境:ide+maven3+mysql5.7+mybatis3+spring4+springmvc

控制层:

@CrossOrigin
@RestController
@RequestMapping("/mip/crowdmanager")
public class OwnCrowdController extends BaseController {

    //log日志
    private static final Logger LOGGER = LoggerFactory.getLogger(OwnCrowdController.class);

    @Autowired
    private OwnCrowdService ownCrowdService;

/**
     * 新建自有人群
     * @param brandCode
     * @param audienceName
     * @param equipmentType:自有人群管理设备类型:1手机号码表; 2手机号MD5;3会员编码;4imei原始值;5IDFA原始值;6imei MD5;7IDFA MD5
     * @param file
     * @param request
     * @return
     */
    @PostMapping("/createOwnCrowd")
    public ResponseEntity<BaseResponse> createOwnCrowd(
            @Validated
            @NotNull(message = "%custom%品牌编码不可为空")
            @RequestParam(value = "brandCode")
                    String brandCode,
            @Validated
            @NotNull(message = "%custom%人群包名称不可为空")
            @Pattern(regexp = "[\u4e00-\u9fa5_a-zA-Z0-9-_]+?", message = "%custom%人群包名称只能包含数字,字母,中文,特殊字符仅允许:-_")
            @RequestParam(value = "audienceName")
                    String audienceName,
            @Validated
            @NotNull(message = "%custom%设备类型不可为空")
            @RequestParam(value = "equipmentType")
                    Integer equipmentType,
            @Validated
            @NotNull(message = "%custom%上传文件不可为空")
            @RequestParam(value = "file", required = true)
                    MultipartFile file, HttpServletRequest request) {
        assertBrandCode(brandCode, request);
        //校验人群包名称是否重复
        Integer count = ownCrowdService.checkAudienceName(brandCode, audienceName);
        if (count > 0) {
            throw new FieldInvalidException("createOwnCrowd", "同一品牌下人群包名称不允许重复!");
        }
        // 上传文件校验
        if (file == null || file.getSize() == 0) {
            LOGGER.info("upload file is empty");
            throw new FieldInvalidException("uploadOwnCrowd", "上传文件不得为空!");
        }
        // 非txt文件
        if (!StringUtils.isBlank(file.getOriginalFilename()) && !".txt"
                .equals(file.getOriginalFilename().substring(file.getOriginalFilename().lastIndexOf('.')))) {
            LOGGER.info("upload file is not txt");
            throw new FieldInvalidException("uploadOwnCrowd", "上传的文件必须是txt文件!");
        }
        // 过大
        if (file.getSize() > 30 * 1024 * 1024) {
            LOGGER.info("upload file size max 30M");
            throw new FieldInvalidException("uploadOwnCrowd", "上传文件不得大于30M!");
        }
        try {
            FileResultVo fileResultVo = ownCrowdService.createOwnCrowd(brandCode, audienceName, equipmentType, file);
            String msg = fileResultVo.getCheckMessage();
            Object data = fileResultVo.getCheckResult();
            if (Objects.equals(SNConstant.CODE_SUCCESS, fileResultVo.getCheckCode())) {
                return ResponseEntity.ok(BaseResponse.success().setResult(data));
            } else {
                return ResponseEntity.ok(new BaseResponse(msg).setResult(data));
            }
        } catch (Exception e) {
            LOGGER.info("新建自有人群异常,exception:{}", e);
            return ResponseEntity.ok(new BaseResponse("新建自有人群失败").setResult(null));
        }
    }
}

业务层:

@Service
public class OwnCrowdServiceImpl implements OwnCrowdService {

    //log日志
    private static final Logger LOGGER = LoggerFactory.getLogger(OwnCrowdServiceImpl.class);

    @Autowired
    private DalClient dalClient;

/**
     * 新建自有人群
     * @param brandCode
     * @param audienceName
     * @param equipmentType
     * @param file
     * @return
     */
    public FileResultVo createOwnCrowd(String brandCode, String audienceName, Integer equipmentType,
            MultipartFile file) {
        FileResultVo resultVo = new FileResultVo();
        try {
            //校验上传文件内容是否正确
            FileResultVo fileResultVo = FileUtil.checkTxtFile(file.getInputStream(), equipmentType);
            if (Objects.equals(SNConstant.CODE_ERROR, fileResultVo.getCheckCode())) {
                resultVo = fileResultVo;
                return resultVo;
            }

            //上传文件
            String dir =
                    SNConstant.FILE_DIRECTORY + SNConstant.FILE_SEPARATE + DateUtils.format(new Date(), "yyyyMMdd");
            String filePath =
                    dir + SNConstant.FILE_SEPARATE + UUID.randomUUID().toString().replaceAll("\\-", "") + ".txt";
            new FtpUtils().createDir(dir);
            boolean flag = new FtpUtils().uploadFtpFile(filePath, file.getInputStream());
            if (flag) {
                //插入自有人群
                Long audienceAmount = Long.valueOf(fileResultVo.getCheckResult().toString());
                Map<String, Object> paramMap = new HashMap<>();
                paramMap.put("audienceName", audienceName);
                paramMap.put("equipmentType", equipmentType);
                paramMap.put("brandCode", brandCode);
                paramMap.put("audienceAmount", audienceAmount);
                paramMap.put("filePath", filePath);
                //插入成功后返回主键
                Number number = dalClient.execute4PrimaryKey("own.createOwnCrowd", paramMap);
                String audienceCode = number == null ? null : number.toString();
                resultVo.setCheckCode(SNConstant.CODE_SUCCESS);
                resultVo.setCheckMessage("插入成功");
                resultVo.setCheckResult(audienceCode);
            } else {
                resultVo.setCheckCode(SNConstant.CODE_ERROR);
                resultVo.setCheckMessage("上传文件失败");
                resultVo.setCheckResult(null);
            }
        } catch (IOException e) {
            LOGGER.info("获取上传文件失败", e);
            resultVo.setCheckCode(SNConstant.CODE_ERROR);
            resultVo.setCheckMessage("获取上传文件失败");
            resultVo.setCheckResult(null);
        }
        return resultVo;
    }
}

文件工具类:

package com.suning.snmip.utils.fileutils;

import com.suning.snmip.intf.enums.EquipmentTypeEnum;
import com.suning.snmip.intf.vo.FileResultVo;
import com.suning.snmip.utils.SNConstant;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.CollectionUtils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;

/**
 * Copyright (C), 2002-2019, 苏宁易购电子商务有限公司
 * Description: //模块目的、功能描述
 * Author: 17020045
 * Date: 2019/6/5 星期三 16:54
 */
public class FileUtil {

    private static final Logger LOGGER = LoggerFactory.getLogger(FileUtil.class);

    private FileUtil() {
        //Do nothing.
    }

    /**
     * 检查txt文件内容是否正确
     * @param inputStream 输入流
     * @param equipmentType 设备类型
     * @return
     */
    public static FileResultVo checkTxtFile(InputStream inputStream, Integer equipmentType) {
        FileResultVo fileResultVo = new FileResultVo();
        Set<String> distLines = new HashSet<>();//存储非重复行数
        List<String> failContent = new ArrayList<>();//存储非法文件内容数据
        //根据设备类型获得对应的正则表达式
        Pattern pattern = getRegex(equipmentType);
        LineIterator iterator = null;
        String nextStr = "";
        //获取txt文本文件编码格式
        String code = getCode(inputStream);
        //判断文件格式是否为utf-8
        if (StringUtils.isBlank(code)) {
            fileResultVo.setCheckCode(SNConstant.CODE_ERROR);
            fileResultVo.setCheckMessage("上传文件编码格式必须是UTF-8");
            fileResultVo.setCheckResult(null);
            return fileResultVo;
        }
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, code))) {// NOSONAR
            iterator = new LineIterator(reader);
            while (iterator.hasNext()) {// NOSONAR
                nextStr = iterator.nextLine();
                if (StringUtils.isNotBlank(nextStr)) {// NOSONAR
                    //根据正则表达式判断文件内容是否正常
                    failContent = checkLines(nextStr, pattern);
                    if (!CollectionUtils.isEmpty(failContent)) {// NOSONAR
                        break;
                    }
                    distLines.add(nextStr);
                }
            }
            //存储上传文件内容格式错误集合
            if (!CollectionUtils.isEmpty(failContent)) {
                fileResultVo.setCheckCode(SNConstant.CODE_ERROR);
                fileResultVo.setCheckMessage("上传文件内容格式不正确");
                fileResultVo.setCheckResult(failContent);
                return fileResultVo;
            }
            //校验上传文件内容是否为空
            if (CollectionUtils.isEmpty(distLines)) {
                fileResultVo.setCheckCode(SNConstant.CODE_ERROR);
                fileResultVo.setCheckMessage("上传文件内容格不得为空");
                fileResultVo.setCheckResult(null);
                return fileResultVo;
            }
            //成功存储量级
            fileResultVo.setCheckCode(SNConstant.CODE_SUCCESS);
            fileResultVo.setCheckMessage("非重复量级");
            fileResultVo.setCheckResult(distLines.size());
        } catch (IOException e) {
            LOGGER.error("获取文件总行数发生错误,IOException", e);
            fileResultVo.setCheckResult(SNConstant.CODE_ERROR);
            fileResultVo.setCheckMessage("获取文件总行数发生错误");
            fileResultVo.setCheckResult(null);
        } finally {
            if (iterator != null) {
                iterator.close();
            }
        }
        return fileResultVo;
    }

    /**
     * 获取校验的格式
     * @param equipmentType 上传文件类型
     * @return Pattern 文件类型正则表达式
     */
    private static Pattern getRegex(Integer equipmentType) {
        Pattern pattern = null;
        EquipmentTypeEnum typeEnum = EquipmentTypeEnum.getType(equipmentType);
        switch (typeEnum) {// NOSONAR
            case TYPE_TEL_ORG:
                //手机号包
                pattern = Pattern.compile("^(1[0-9]{10})$");
                break;
            case TYPE_TEL_MD5:
                //MD5
                //pattern = Pattern.compile("^([a-z0-9]{32})$"); // NOSONAR
            case TYPE_IMEI_MD5:
                //MD5
                //pattern = Pattern.compile("^([a-z0-9]{32})$"); // NOSONAR
            case TYPE_IDFA_MD5:
                //MD5
                pattern = Pattern.compile("^([a-z0-9]{32})$");
                break;
            case TYPE_MEM_ORG:
                //会员编码
                pattern = Pattern.compile("^([0-9]{8,10})$");
                break;
            case TYPE_IMEI_ORG:
                //IMEI
                pattern = Pattern.compile("^([A-Z0-9]{14,15})$");
                break;
            case TYPE_IDFA_ORG:
                //IDFA
                pattern = Pattern.compile("^([0-9A-Z]{8}-)([0-9A-Z]{4}-)([0-9A-Z]{4}-)([0-9A-Z]{4}-)([0-9A-Z]{12})$");
                break;
        }
        return pattern;
    }

    /**
     * 校验上传文件的内容
     * @param readerStr 读取的前后一百行
     * @param pattern  上传文件类型
     * @return list
     */
    private static List<String> checkLines(String readerStr, Pattern pattern) {
        List<String> failContent = new ArrayList<>();
        if (pattern != null && !pattern.matcher(readerStr).matches()) {
            failContent.add(readerStr);
            return failContent;
        }
        return failContent;
    }

    /**
     * 判断文件编码是否为utf-8
     *
     * @param inputStream
     * @return
     */
    public static String getCode(InputStream inputStream) {
        String code = "";
        byte[] head = new byte[3];
        try {
            int i = inputStream.read(head); // NOSONAR
            if (head[0] == -17 && head[1] == -69 && head[2] == -65)
                code = "UTF-8";
        } catch (IOException e) {
            LOGGER.error("FileUtil-->getCode exception:{}", e);
        }
        return code;
    }

}

FTp工具类:

package com.suning.snmip.utils.fileutils;

import org.apache.commons.net.ftp.FTPClient;
import org.apache.commons.net.ftp.FTPReply;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.InputStream;


public class FtpUtils {
    private static final Logger LOGGER = LoggerFactory.getLogger(FtpUtils.class);
    //主机
    private static final String HOSTNAME = "xxxx.xxx.com";
    //端口号
    private static final Integer PORT = 21;
    //用户名
    private static final String USERNAME = "xx/xxx/xxxxxxx";
    //密码
    private static final String FTP_PD = "123456";
    private FTPClient ftpClient = null;

    public FtpUtils() {
        super();
    }

    /**
     * 初始化ftp链接
     * @return
     */
    private synchronized boolean initFtp() {
        try {
            ftpClient = new FTPClient();
            ftpClient.setControlEncoding("utf-8");
            ftpClient.connect(HOSTNAME, PORT); // 连接ftp服务器
            ftpClient.login(USERNAME, FTP_PD); // 登录ftp服务器
            int replyCode = ftpClient.getReplyCode(); // 是否成功登录服务器
            if (!FTPReply.isPositiveCompletion(replyCode)) {
                ftpClient = null;
                return false;
            }
        } catch (Exception e) {
            LOGGER.error("初始化ftp发生错误:", e);
            ftpClient = null;
            return false;
        }
        return true;
    }

    /**
     * 检查ftp链接
     * @return
     */
    private synchronized boolean checkFtpClient() {
        synchronized (FtpUtils.class) {
            if (ftpClient == null) {
                boolean initFtp = initFtp();
                if (!initFtp) {
                    LOGGER.error("ftp 连接错误");
                    return false;
                }
            }
        }
        return true;
    }

    /**
     * 创建上传文件目录
     * @param dir
     */
    public void createDir(String dir) {
        try {
            if (checkFtpClient()) {
                boolean f = ftpClient.changeWorkingDirectory(dir);// NOSONAR
                if (!f) {
                    ftpClient.makeDirectory(dir);
                }
            }
        } catch (IOException e) {
            LOGGER.error("ftp检查目录发生异常:", e);
        }
    }

    /**
     * 上传文件
     * @param filePath 文件路径
     * @param inputStream 文件流
     */
    public boolean uploadFtpFile(String filePath, InputStream inputStream) {
        try {
            if (checkFtpClient()) {
                ftpClient.storeFile(filePath, inputStream);
                inputStream.close();
                return true;
            }
        } catch (IOException e) {
            LOGGER.error("上传ftp文件,IOException:", e);
            return false;
        }
        return false;
    }


}

常量类:

// CODE_ERROR
    public static final String CODE_ERROR = "0";
    // CODE_ERROR
    public static final String CODE_SUCCESS = "1";

    // 上传文件目录
    public static final String FILE_DIRECTORY = "/snmip";
    // 上传文件目录
    public static final String FILE_SEPARATE = "/";

文件VO实体类:

package com.suning.snmip.intf.vo;

import java.io.Serializable;

@Data
public class FileResultVo implements Serializable {

    private static final long serialVersionUID = 5019589871773838047L;

    /**
     * code:0:失败 1:成功
     */
    private String checkCode;

    /**
     * 提示信息
     */
    private String checkMessage;

    /**
     * 返回结果
     */
    private Object checkResult;

}

postman测试txt文件上传:

总结:

 1. txt默认的选项是ANSI,就是系统的默认编码,一般是GBK。(GBK包括所有的汉字,包括简体和繁体,而gb2312则只包括简体汉字)
 2. txt文本文档有四种编码选项:ANSI、Unicode、Unicode big endian、UTF-8
 ANSI: 无格式定义 
 Unicode:  前两个字节为FFFE Unicode文档以0xFFFE开头
 Unicode big endian: 前两字节为FEFF
 UTF-8: 前两字节为EFBB UTF-8以0xEFBBBF开头

 java编码与txt编码对应
 java    txt
Unicode    Unicode big endian
utf-8     utf-8
utf-16    utf-8
gb2312    ANSI

//Java获取txt文本文件编码
public static String getCode(String path) throws Exception {
        //[-76, -85, -71]  ANSI  
        //[-2, -1, 79] unicode big endian  
        //[-1, -2, 32]  unicode  
        //[-17, -69, -65] UTF-8 
        InputStream inputStream = new FileInputStream(path);
        byte[] head = new byte[3];
        inputStream.read(head);
        String code = "gb2312";  //或GBK
        if (head[0] == -1 && head[1] == -2 )
            code = "UTF-16";
        else if (head[0] == -2 && head[1] == -1 )
            code = "Unicode";
        else if(head[0]==-17 && head[1]==-69 && head[2] ==-65)
            code = "UTF-8";
        inputStream.close();
        System.out.println(code);
        return code;
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值