java读文件查找汉字_遍历文件,查找文件下的汉字,并将汉字生成csv文件

package com.shine.eiuop.utils;

import java.io.BufferedReader;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.InputStreamReader;

import java.io.OutputStreamWriter;

import java.io.UnsupportedEncodingException;

import java.util.HashMap;

import java.util.Iterator;

import java.util.List;

import java.util.Map;

import java.util.UUID;

import java.util.regex.MatchResult;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

import com.itextpdf.text.pdf.PdfStructTreeController.returnType;

import com.shine.framework.commutil.typewrap.EDto;

/**

* title: 清除注释

*

* @author

* @时间

*/

public class FileCopyChineseUtils {

/** 根目录 */

public static String rootDir = "C:\\Users\\14423\\Desktop\\亚强\\msp中文翻译\\msp2\\msp\\WebRoot";

public static void main(String args[]) throws Exception {

dofind(rootDir);

}

public static void dofind(String rootDir) throws Exception {

String alltmSr = deepDir(rootDir);

System.out.println(alltmSr);

String[] stringArrStrings = alltmSr.toString().split("\\r\\n");

String file_path = "D:\\SHINE_ROOT\\mspChinese.csv";

String file_name = "mspChinese.csv";

writeDataToCsvFile1(file_path,file_name,stringArrStrings);

}

public static String deepDir(String rootDir) throws Exception {

String string = "";

File folder = new File(rootDir);

StringBuilder alltmSr = new StringBuilder();

if (folder.isDirectory()) {

String[] files = folder.list();

for (int i = 0; i < files.length; i++) {

File file = new File(folder, files[i]);

if (file.isDirectory() && file.isHidden() == false) {

alltmSr.append(deepDir(file.getPath()));

} else if (file.isFile()) {

alltmSr.append(writeComment(file.getPath()));

}

}

} else if (folder.isFile()) {

alltmSr.append(writeComment(folder.getPath()));

}

return alltmSr.toString();

}

/**

* @param currentDir

* 当前目录

* @param currentFileName

* 当前文件名

* @throws FileNotFoundException

* @throws UnsupportedEncodingException

*/

/**

* @param filePathAndName

* @throws FileNotFoundException

* @throws UnsupportedEncodingException

*/

public static String writeComment(String filePathAndName)

throws FileNotFoundException, UnsupportedEncodingException {

StringBuffer buffer = new StringBuffer();

String line = null; // 用来保存每行读取的内容

InputStream is = new FileInputStream(filePathAndName);

BufferedReader reader = new BufferedReader(new InputStreamReader(is,"UTF-8"));

try {

line = reader.readLine();

} catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} // 读取第一行

while (line != null) { // 如果 line 为空说明读完了

buffer.append(line); // 将读到的内容添加到 buffer 中

buffer.append("\r\n"); // 添加换行符

try {

line = reader.readLine();

} catch (IOException e) {

e.printStackTrace();

} // 读取下一行

}

buffer.append("\r\n"); // 添加换行符

String filecontent = buffer.toString();

String regex = "[\u4e00-\u9fa5]";

Pattern pattern = Pattern.compile(regex);

Matcher matcher = pattern.matcher(filecontent);

StringBuilder tmSr = new StringBuilder();

int tmp = -1;

while (matcher.find()) {

MatchResult result = matcher.toMatchResult();

int start = result.start();

int end = result.end();

if(tmp == start || tmp == -1) {

// 判断连续

tmSr.append(filecontent.substring(start, end));

}else {

// 不连续

tmSr.append("\r\n");

tmSr.append(filecontent.substring(start, end));

}

tmp = end;

}

tmSr.append("\r\n"); // 添加换行符

return tmSr.toString();

}

/**

*

* @Description 写csv文件,

* @param filePath

* @param fields

* @param dtos void

* @param

* @throws @author

* @date 2019年11月18日 上午9:45:31

* @see

*/

public static void writeDataToCsvFile1(String filePath, String fileName,String[] datas) throws Exception {

File csvFile = null;

BufferedWriter csvFileOutputStream = null;

FileOutputStream fos = null;

String uuidFilePath = "D:\\SHINE_ROOT\\mspChinese.csv";

try {

FileUtils.createNewFile(filePath);

FileUtils.createNewFile(uuidFilePath);

csvFile = new File(filePath);

try {

// 如果文件不存在,则创建新的文件

if (!csvFile.exists()) {

csvFile.createNewFile();

}

} catch (Exception e) {

e.printStackTrace();

}

// 写入bom头

byte[] uft8bom = { (byte) 0xef, (byte) 0xbb, (byte) 0xbf };

fos = new FileOutputStream(csvFile);

//fos.write(uft8bom);

// UTF-8使正确读取分隔符","

// 如果生产文件乱码,windows下用gbk,linux用UTF-8

//csvFileOutputStream = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8"), 1024);

//csvFileOutputStream.newLine();

for (String dto : datas) {

if ("".equals(dto)!=true) {

fos.write((dto+"\r\n").getBytes());

}

}

fos.flush();

fos.close();

} catch (Exception e) {

e.printStackTrace();

}

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值