抓取网页的时候出现这个问题该怎么解决。
// 将URL中的HTML代码下载至本地硬盘
public static void downloadPageContent(String urlStr)
throws FileNotFoundException, UnsupportedEncodingException
{
int aStatus = 200;
HttpClient httpclient = new DefaultHttpClient();
HttpGet httpget = new HttpGet(urlStr);
HttpResponse response = null;
try
{
response = httpclient.execute(httpget);
} catch (ClientProtocolException e)
{
System.out.println("出错啦。这里");
// e.printStackTrace();
} catch (IOException e)
{
System.out.println("222");
if(e instanceof org.apache.http.conn.HttpHostConnectException)
{
System.out.println("拒绝链接,应该换IP尝试代理");
aStatus = 402;
}
if (e instanceof java.net.UnknownHostException)
{
System.out.println("找不到主机错误发生,处理异常中");
aStatus = 402;
}
aStatus = 402;
}
if (aStatus != 402)
{
System.out.println(response.getStatusLine().getStatusCode());
HttpEntity entity = response.getEntity();
// 如果目录不存在,则创建文件目录
File file = new File(DirectoryPath);
if (!file.exists())
file.mkdirs();// 创建文件
FileOutputStream os = new FileOutputStream(DirectoryPath
+ URLEncoder.encode(urlStr, "utf-8"));
if (entity != null)
{
InputStream instream = null;
try
{
instream = entity.getContent();
} catch (IllegalStateException e)
{
System.out.println("出错啦。!");
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
@SuppressWarnings("unused")
int l;
byte[] tmp = new byte[2048];
try
{
while ((l = instream.read(tmp)) != -1)
{
os.write(tmp, 0, tmp.length);
}
} catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
try
{
os.close();
} catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public static void main(String[] args) throws Exception
{
long startTime = System.currentTimeMillis();
System.out.println(startTime);
downloadPageContent(“http://www.pooioo.com”);
long endTime = System.currentTimeMillis();
System.out.println(endTime);
System.out.println(endTime-startTime);
}
链接重置。我该怎么办呢