即去掉XML中所有<>及包含的内容
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
public class congTools {
static Document doc;
static String content;
public congTools(String filename){
SAXReader reader = new SAXReader();
try {
doc = reader.read(filename);
} catch (DocumentException e) {
e.printStackTrace();
}
content=doc.getRootElement().asXML();
}
public int getLength(String content){
String result="";
String[] results=content.split("<.*?>");
for(String a:results){
result+=a;
}
return result.length();
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
congTools cl=new congTools("src/00000000000052735781.xml");
int len=cl.getLength(cl.content);
System.out.println(len);
}
}
split("<.*?>")表示,按以<开头,以>结尾的字符串作为分隔符