从服务器中获取所有的文件
package web;
import java.io.File;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import cn.edu.hfut.dmic.webcollector.net.HttpRequester;
import cn.edu.hfut.dmic.webcollector.net.HttpRequesterImpl;
import cn.edu.hfut.dmic.webcollector.net.HttpResponse;
import cn.edu.hfut.dmic.webcollector.util.FileUtils;
public class GrapDoc {
public static void main(String[] args) throws Exception {
File input = new File("adc.html");
String url = "http://xxx.xx.xx.80/res/";
/*HttpRequester httpRequester = new HttpRequesterImpl();
HttpResponse response = httpRequester.getResponse(url);
Document doc = Jsoup.parse(new String(response.getContent(),"utf-8"),url);
Document doc3 =Jsoup.connect(url).
userAgent("Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36")
.timeout(50000)
.get();*/
checkContent(url);
}
private static void checkContent(String url) throws Exception {
HttpRequester httpRequester = new HttpRequesterImpl();
HttpResponse response = httpRequester.getResponse(url);
Document doc = Jsoup.parse(new String(response.getContent(),"utf-8"),url);
Elements contents = doc.select("td a");
for(Element e:contents) {
String path = e.attr("href").substring(4);
if (path.endsWith("/")) {
File tmpFile = new File(path);
if (!tmpFile.exists()) {
tmpFile.mkdirs();
}
checkContent(e.attr("abs:href"));
} else {
System.out.println(e.attr("abs:href"));
HttpRequester httpRequester2 = new HttpRequesterImpl();
HttpResponse response2 = httpRequester2.getResponse(e.attr("abs:href"));
byte[] content=response2.getContent();
try {
FileUtils.writeFileWithParent(e.attr("href").substring(4), content);
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
}
}