在帮别人解决一个问题的时候,需要把一个大文件按照某一个关键字作为新文件开始来分割文件,我实现的时候使用了两种方法来解决
第一种方法就是循环读每一行没在没有遇到关键行时都把之前的相加,知道遇到关键行的时候把数据写入新文件
//速度一般
public static void function1(String filename)
{
File f=new File(filename);
if(!f.exists())
{
System.out.println("文件不存在!");
return;
}
//String filename="d:\\ipg050104.xml";
String head="<?xml version=\"1.0\" encoding=\"UTF-8\"?>";//分割的那个
String pattern="<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
File file=new File(filename);
BufferedReader reader=null;
int bufferSize = 20 * 1024 * 1024*10;//设读取文件的缓存为20MB
try{
//大文件读取 设置缓冲区
FileInputStream fileInputStream = new FileInputStream(file);
BufferedInputStream bufferedInputStream = new BufferedInputStream(fileInputStream);
InputStreamReader inputStreamReader = new InputStreamReader(bufferedInputStream);
reader = new BufferedReader(inputStreamReader, bufferSize);
String tempString=null;
String otherString="";
boolean frist_line=true;
int i=0;
while((tempString=reader.readLine())!=null)
{
if(!tempString.equals(pattern))
{
otherString+=tempString+"\r";
}
else
{
if(!frist_line)
{
FileWriter output = new FileWriter("d:\\split\\sample" + i + ".xml");
output.append(head+"\r");
output.append(otherString);
output.flush();
output.close();
System.out.println("生成文件:"+i);
i++;
}
frist_line=false;
otherString="";
}
}
//把最后读的写进去
{
FileWriter output = new FileWriter("d:\\split\\sample" + i + ".xml");
output.append(head+"\r");
output.append(otherString);
output.flush();
output.close();
System.out.println("生成文件:"+i);
}
reader.close();
}catch(IOException e){
e.printStackTrace();
}
}
第二种是使用java提供的Scanner来进行文件的分割
public static void function2(String filename)
{
File f=new File(filename);
if(!f.exists())
{
System.out.println("文件不存在!");
return;
}
try {
//String filename="d:\\ipg050104.xml";
String head="<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
String pattern="<\\?xml version=\\\"1.0\\\" encoding=\\\"UTF-8\\\"\\?>";
BufferedReader reader=null;
File file=new File(filename);
int bufferSize = 20 * 1024 * 1024;//设读取文件的缓存为20MB
//大文件读取 设置缓冲区
FileInputStream fileInputStream = new FileInputStream(file);
BufferedInputStream bufferedInputStream = new BufferedInputStream(fileInputStream);
InputStreamReader inputStreamReader = new InputStreamReader(bufferedInputStream);
reader = new BufferedReader(inputStreamReader, bufferSize);
//Scanner sc = new Scanner(new BufferedReader(new FileReader(filename))); //这中可能会内存溢出
Scanner sc = new Scanner(reader);//使用大文件的bufferreader 有利于提升性能
//Pattern p= Pattern.compile("<\\?xml version=\\\"1.0\\\" encoding=\\\"UTF-8\\\"\\?>");
sc.useDelimiter(pattern);
int i=0;
while (sc.hasNext()) {
try {
File tf=new File("d:\\split_result\\"+file.getName()+"\\split");
if(!tf.exists())
{
tf.mkdirs();
}
FileWriter output = new FileWriter("d:\\split_result\\"+file.getName()+"\\" + i + ".xml");
output.append(head+"\r");
output.append(sc.next());
output.flush();
output.close();
System.out.println("生成文件:"+i);
i++;
} catch (IOException e) {
e.printStackTrace();
}
}
sc.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
两种方法的速度不一样,第二种速度更快