这里需要用到3个jar包:
common-logging.jar:点击下载
commons-codec.jar:点击下载
commons-httpclient.jar:点击下载
将下载好的jar add to buildpath。
由于我是深大学生,就爬取深大内部网了。
import java.io.FileWriter;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.UsernamePasswordCredentials;
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.methods.GetMethod;
public class Crawl {
public static void crawl() {
HttpClient httpClient = new HttpClient();
httpClient.getHostConfiguration().setProxy("proxy.szu.edu.cn", 8080);
httpClient.getParams().setAuthenticationPreemptive(true);
httpClient.getState().setProxyCredentials(AuthScope.ANY, new UsernamePasswordCredentials("账号", "密码"));
String url = "http://192.168.2.229/newkc/djbprint.aspx?xqh=20151&ykch=MC99000201";
GetMethod getMethod = new GetMethod(url);
try {
int status = httpClient.executeMethod(getMethod);
if(status != HttpStatus.SC_OK) {
System.out.println("error");
}
byte[] responseBody = getMethod.getResponseBody();
String html = new String(responseBody);
FileWriter writer = new FileWriter("a.txt");
writer.write(html);
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}