展开全部
java处理html指定标签最好用正则表达式。例如要去除html中所有的h1标签和类容就可以用下面的演示代码e5a48de588b662616964757a686964616f31333337393561:package konw.reg;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RemoveTag
{
public static void main(String[] args)
{
FileReader fr;
String content =null;
String regex = ".*[Hh]1>";
try
{
fr = new FileReader("tag.html");
BufferedReader br = new BufferedReader(fr);
String str = null;
StringBuffer sb = new StringBuffer();
while((str = br.readLine()) != null)
{
sb.append(str+"\n");
}
content = sb.toString();
br.close();
} catch (FileNotFoundException e)
{
e.printStackTrace();
} catch (IOException e)
{
e.printStackTrace();
}
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(content);
StringBuffer sb1 = new StringBuffer();
while(matcher.find())
{
sb1.append(matcher.replaceAll("")+"\n");
}
try
{
FileWriter fw = new FileWriter("tag.html");
BufferedWriter bw = new BufferedWriter(fw);
fw.write(sb1.toString());
bw.close();
} catch (IOException e)
{
e.printStackTrace();
}
}
}