1. html 转换为标准 html ,用到的技术 jtidy;
public static void htmlCovertTohtml(String sourceFilename,
String targetFilename) {
Tidy tidy = new Tidy();
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
// 每行的最多字符,如果为0,不自动换行
tidy.setWraplen(0);
// 是否保持属性中的空白字符
tidy.setLiteralAttribs(true);
// 需要转换的文件,当然你也可以转换URL的内容
FileInputStream in;
FileOutputStream out;
try {
in = new FileInputStream(sourceFilename);
out = new FileOutputStream(targetFilename);
// 输出的文件
tidy.parse(in, out);
// 转换完成关闭输入输出流
out.close();
in.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
POM.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>jacksonPrograme</groupId> <artifactId>jacksonPrograme</artifactId> <version>0.0.1-SNAPSHOT</version> <dependencies> <dependency> <groupId>net.sf.jtidy</groupId> <artifactId>jtidy</artifactId> <version>r938</version> </dependency> </dependencies> <packaging>jar</packaging> </project>