自己搞了个简单的网络爬虫代码(公司里上网是用代理的,所以其中有个代理认证的过程):
package com.myweb.test;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.Authenticator;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.net.URLConnection;
public class TestUrl {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
InetSocketAddress address = new InetSocketAddress("10.200.254.166",8002);
Proxy proxy = new Proxy(Proxy.Type.HTTP,address);
URL url = new URL("http://www.hao123.com");
Authenticator.setDefault(new MyAuthenticator("***************","****************"));
URLConnection conn= url.openConnection(proxy);
InputStreamReader isr = new InputStreamReader(conn.getInputStream());
File file = new File("d:/doc/inetFile.txt");
FileOutputStream fos = new FileOutputStream(file);
OutputStreamWriter osw = new OutputStreamWriter(fos);
char[] cs = new char[100];
while(isr.read(cs) > 0){
osw.write(cs);
}
isr.close();
osw.close();
}
}package com.myweb.test;
import java.net.Authenticator;
import java.net.PasswordAuthentication;
public class MyAuthenticator extends Authenticator {
private String name; //用户名
private String password; //密码
public MyAuthenticator(String name, String password){
this.name = name;
this.password = password;
}
@Override
protected PasswordAuthentication getPasswordAuthentication() {
// TODO Auto-generated method stub
return new PasswordAuthentication(name, password.toCharArray());
}
}