在这个例子中,使用sr变量作为缓存变量,同时,通过判断sr的长度获取缓存大小,防止占用内存过大导致卡死。
package com.taoniwu;
import java.util.regex.*;
import java.io.*;
public class TestRead {
public static void main (String[] args) {
File file=new File ("D://web.txt");
try
{
BufferedReader input=new BufferedReader (new FileReader (file) );
String text;
int sum = 0;
File txt = new File("d://web.html");
//判断文件是否存在
if(!txt.exists()){
txt.createNewFile();
}
else{
txt.delete();
}
FileWriter fw=new FileWriter( "d://web.html",true);
BufferedWriter bw=new BufferedWriter(fw);
String sr = "";
while ( (text=input.readLine() ) !=null) {
//正则表达,过滤非www开头的网址
Pattern p = Pattern.compile ("http://www.*./");
Matcher m = p.matcher (text);
while (m.find()) //查找符合pattern的字符串
{
//过滤带”baidu“和带”tarena”的url,并叠加链接代码
if(m.group().indexOf("baidu") == -1 && m.group().indexOf("tarena") == -1 && m.group().indexOf("aowin") == -1)
{
sr = sr + sum + "、"+m.group()+"
\\n";
sum++;
}
//添加缓存,当缓存达到30k时写入,并把sr清空
if(sr.length()>30720){
bw.write(sr);
sr = "";
}
}
}
bw.write(sr+"\\n");
bw.close();
} catch (Exception ex) {
System.out.println (ex+"错误");
}
System.out.println("完成!");
}
}