源代码下载地址:http://www.zuidaima.com/share/1574292163382272.htm
可以通过指定关键词和站点名来自动抓取百度的搜索结果从而得知自己的网站在某个搜索词的排名,类似于http://www.aizhan.com/siteall/zuidaima.com/
01 | import java.io.BufferedReader; |
02 | import java.io.InputStream; |
03 | import java.io.InputStreamReader; |
04 | import java.net.HttpURLConnection; |
05 | import java.net.URL; |
06 | import java.net.URLEncoder; |
07 |
08 | public class AnyBaiduKeywordRank { |
09 |
10 | public static String request(String url) { |
11 | StringBuffer res = new StringBuffer(); |
12 | HttpURLConnection conn = null ; |
13 | try { |
14 | URL serverUrl = new URL(url); |
15 | conn = (HttpURLConnection) serverUrl.openConnection(); |
16 | conn.setRequestMethod( "GET" ); // "POST" ,"GET" |
17 | conn.setConnectTimeout( 10000 ); |
18 | conn.setReadTimeout( 10000 ); |
19 | conn.connect(); |
20 | InputStream ins = conn.getInputStream(); |
21 | String charset = "UTF-8" ; |
22 | InputStreamReader inr = new InputStreamReader(ins, charset); |
23 | BufferedReader bfr = new BufferedReader(inr); |
24 | String line = "" ; |
25 | do { |
26 | res.append(line); |
27 | line = bfr.readLine(); |
28 | } while (line != null ); |
29 | inr.close(); |
30 | bfr.close(); |
31 | } catch (Exception e) { |
32 | e.printStackTrace(); |
33 | } finally { |
34 | if (conn != null ) { |
35 | conn.disconnect(); |
36 | } |
37 | } |
38 | return res.toString(); |
39 | } |
40 |
41 | public static void main(String[] args) throws Exception { |
42 | String keyword = "分享" ; |
43 | String site = "javaniu" ; |
44 | keyword = URLEncoder.encode(keyword, "utf-8" ); |
45 | int p = 1 ; |
46 | int s = 10 ; |
47 | String url = "http://www.baidu.com/s?wd=%s&pn=%s&ie=utf-8&usm=1&rsv_page=1" ; |
48 | while ( true ) { |
49 | int pn = (p - 1 ) * s; |
50 | String _url = String.format(url, keyword, pn + "" ); |
51 | System.out.println( "Request url " + _url); |
52 | String html = request(_url); |
53 | if (html.indexOf(site) != - 1 ) { |
54 | System.out.println( "Find keyword" ); |
55 | break ; |
56 | } |
57 | p++; |
58 | } |
59 |
60 | } |
61 | } |
原理很简单,大家可以发挥想象力去抓取任意想象的数据。这不正是code的魅力么。