package cn.jado.ctt_check.test;
import Java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import
java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List;
import org.apache.log4j.Logger;
import cn.jado.ctt_check.util.IOUtil;
public class SpiltFile { private static final Logger logger =
Logger.getLogger(SpiltFile.class);
public static void main(String[] args) {
String path = "F:/usr/local/统计/utf-8_20170221_wechat_article.DATA";
readFileByLineToList(path, "utf-8");
}
public static List readFileByLineToList(String fileName,String charsetName){
List lineList = new ArrayList();
String path = IOUtil.PathParser(fileName)[0]+"/wechat_msg/";
IOUtil.mkDirs(path);
if(charsetName == null){
charsetName = System.getProperty("file.encoding");
}
BufferedReader br = null;
InputStreamReader isReader = null;
try {
if(charsetName == null){
charsetName = System.getProperty("file.encoding");
}
isReader = new InputStreamReader(new FileInputStream(fileName),charsetName);
br = new BufferedReader(isReader);
String tempString = null;
int v = 0;
while((tempString = br.readLine())!= null){
v+=1;
// 百万条数据分成一个文件
lineList.add(tempString);
if(v0000==0){
for(String line :lineList){
IOUtil.writeFile(path+v+"_wechat_article.DATA", line+"\n", "utf-8",true);
}
lineList.clear();
}
}
} catch (UnsupportedEncodingException e1) {
// TODO Auto-generated catch block
logger.error("解析文件编码异常",e1);
} catch (FileNotFoundException e2) {
// TODO Auto-generated catch block
logger.error("文件没有找到异常",e2);
} catch (IOException e3) {
// TODO Auto-generated catch block
logger.error("Io操作异常",e3);
}finally{
if(br != null){
try {
br.close();
br = null;
} catch (IOException e4) {
// TODO Auto-generated catch block
logger.error("文件关闭异常",e4);
}
}
if(isReader != null){
try {
isReader.close();
} catch (IOException e5) {
// TODO Auto-generated catch block
logger.error("文件关闭异常",e5);
}
}
}
return lineList;
}
}