这里的获取的是html文件中body中的所有标签以及内容
package com.lmt.service.file;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import org.springframework.stereotype.Component;
import com.lmt.config.UrlConstants;
@Component
public class ParseFile {
/**
* 解析html文件
* @param file
* @return
*/
public String readHtml(File file){
String body = "";
try {
FileInputStream iStream = new FileInputStream(file);
Reader reader = new InputStreamReader(iStream);
BufferedReader htmlReader = new BufferedReader(reader);
String line;
boolean found = false;
while (!found && (line = htmlReader.readLine()) != nul