因为涉及到读取大的文件的数据,但是程序内存太小读不出来的原因,所以将文件进行拆分进行读取
package cn.jado.ctt_check.test;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.Logger;
import cn.jado.ctt_check.util.IOUtil;
public class SpiltFile {
private static final Logger logger = Logger.getLogger(SpiltFile.class);
public static void main(String[] args) {
String path = "F:/usr/local/统计/utf-8_20170221_wechat_article.DATA";
readFileByLineToList(path, "utf-8");
}
/***
* 将一个大文件以1百万一个文件进行拆分
* @author jado
* @param fileName
* @param charsetName
* @return
*/
public static List<String> readFileByLineToList(String fileName,String charsetName){
List<String> lineList = new ArrayList<String>();
String path = IOUtil.PathParser(fileName)[0]+"/wechat_msg/";
IOUtil.mkDirs(path);
if(charsetName == null){
charsetName = System.getProperty("file.encoding");
}
BufferedReader br = null;
InputStreamReader isReader = null;
try {
if(charsetName == null){
charsetName = System.getProperty("file.encoding");
}
isReader = new InputStreamReader(new FileInputStream(fileName),charsetName);
br = new BufferedReader(isReader);
String tempString = null;
int v = 0;
while((tempString = br.readLine())!= null){
v+=1;
// 百万条数据分成一个文件
lineList.add(tempString);
if(v%100000==0){
for(String line :lineList){
IOUtil.writeFile(path+v+"_wechat_article.DATA", line+"\n", "utf-8",true);
}
lineList.clear();
}
}
} catch (UnsupportedEncodingException e1) {
// TODO Auto-generated catch block
logger.error("解析文件编码异常",e1);
} catch (FileNotFoundException e2) {
// TODO Auto-generated catch block
logger.error("文件没有找到异常",e2);
} catch (IOException e3) {
// TODO Auto-generated catch block
logger.error("Io操作异常",e3);
}finally{
if(br != null){
try {
br.close();
br = null;
} catch (IOException e4) {
// TODO Auto-generated catch block
logger.error("文件关闭异常",e4);
}
}
if(isReader != null){
try {
isReader.close();
} catch (IOException e5) {
// TODO Auto-generated catch block
logger.error("文件关闭异常",e5);
}
}
}
return lineList;
}
}