1. 伪装user agent
Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)
Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2)
Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)
设置代码如下(假设使用JAVA + HttpClient 4.1.2)
HttpGet getMethod = new HttpGet("URL"); getMethod.setHeader("User-Agent", "user agent内容");
2. log in
DefaultHttpClient httpclient = new DefaultHttpClient(); HttpPost postMethod = new HttpPost("http://passport.cnblogs.com/login.aspx"); //注意用post //登陆博客园所需要的参数 List nvps = new ArrayList(); nvps.add(new BasicNameValuePair("tbUserName", "风炎")); nvps.add(new BasicNameValuePair("tbPassword", "zero")); nvps.add(new BasicNameValuePair("btnLogin", "登 录")); nvps.add(new BasicNameValuePair("__EVENTTARGET", "")); nvps.add(new BasicNameValuePair("__EVENTARGUMENT", "")); nvps.add(new BasicNameValuePair("__VIEWSTATE", "/wEPDwULLTE1MzYzODg2NzZkGAEFHl9fQ29udHJvbHNSZXF1a XJlUG9zdEJhY2tLZXlfXxYBB QtjaGtSZW1lbWJlcm1QYDyKK I9af4b67Mzq2xFaL9Bt")); nvps.add(new BasicNameValuePair("__EVENTVALIDATION", "/wEWBQLWwpqPDQLyj/OQAgK3jsrkBALR55GJDgKC3I eGDE1m7t2mGlasoP1Hd9hLaF oI2G05")); nvps.add(new BasicNameValuePair("ReturnUrl", "http://www.cnblogs.com/")); nvps.add(new BasicNameValuePair("txtReturnUrl", "http://www.cnblogs.com/")); postMethod.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8)); HttpResponse response = httpclient.execute(postMethod);
3. 使用代理
DefaultHttpClient httpclient = new DefaultHttpClient(); //此代理不保证你看到的时候还存活 HttpHost proxy = new HttpHost("u120-227.static.grapesc.cz", 8080); httpclient.getParams().setParameter(ConnRoutePNames.DEFAULT_PROXY,proxy); //如果代理要认证,则加上以下语句 // httpclient.getCredentialsProvider().setCredentials(new AuthScope("proxy adress", proxy port), // new UsernamePasswordCredentials("username", "password")); //记得将网址拆成以下形式 HttpHost targetHost = new HttpHost("www.cnblogs.com"); //网站名前面不要加http:// HttpGet httpget = new HttpGet("/FengYan/"); HttpResponse response = httpclient.execute(targetHost, httpget);
4. 降低访问频率
5. 总结