爬虫访问中,如何解决网站限制IP的问题?
开发了一个爬虫,布置在自己的服务器上,请求某网站的查询功能,然后抓取查询结果,结果访问才一会儿,就被提示封IP了。有什么好的解决方法吗?
1. 某宝买代理,简单,但是太贵;
2. ADSL动态获取ip。
java代码:
- package com.sesame.network;
- import java.io.BufferedReader;
- import java.io.InputStreamReader;
- public class ConnectNetWork {
- /**
- * 执行CMD命令,并返回String字符串
- */
- public static String executeCmd(String strCmd) throws Exception {
- Process p = Runtime.getRuntime().exec("cmd /c " + strCmd);
- StringBuilder sbCmd = new StringBuilder();
- BufferedReader br = new BufferedReader(new InputStreamReader(p
- .getInputStream()));
- String line;
- while ((line = br.readLine()) != null) {
- sbCmd.append(line + "\n");
- }
- return sbCmd.toString();
- }
- /**
- * 连接ADSL
- */
- public static boolean connAdsl(String adslTitle, String adslName, String adslPass) throws Exception {
- System.out.println("正在建立连接.");
- String adslCmd = "rasdial " + adslTitle + " " + adslName + " "
- + adslPass;
- String tempCmd = executeCmd(adslCmd);
- // 判断是否连接成功
- if (tempCmd.indexOf("已连接") > 0) {
- System.out.println("已成功建立连接.");
- return true;
- } else {
- System.err.println(tempCmd);
- System.err.println("建立连接失败");
- return false;
- }
- }
- /**
- * 断开ADSL
- */
- public static boolean cutAdsl(String adslTitle) throws Exception {
- String cutAdsl = "rasdial " + adslTitle + " /disconnect";
- String result = executeCmd(cutAdsl);
- if (result.indexOf("没有连接")!=-1){
- System.err.println(adslTitle + "连接不存在!");
- return false;
- } else {
- System.out.println("连接已断开");
- return true;
- }
- }
- public static void main(String[] args) throws Exception {
- connAdsl("宽带","hzhz**********","******");
- Thread.sleep(1000);
- cutAdsl("宽带");
- Thread.sleep(1000);
- //再连,分配一个新的IP
- connAdsl("宽带","hzhz**********","******");
- }
- }