补上中国天气网城市编码抓取代码 很一般的抓取代码.....嘿嘿

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;

/***
 * 抓取天气网 城市编码
 * @author wu
 *
 */
public class GetWeatherCode {

	/**
	 * 通过域名或IP地址取得网站数据
	 * 
	 * @param strUrl
	 *            example:http://www.baidu.com
	 * @return
	 */
	public String getContent(String strUrl) {
		try {

			URL url = new URL(strUrl);
			System.out.println(strUrl);
			URLConnection urlconn = url.openConnection(); // 试图连接并取得返回状态码
			urlconn.connect();
			HttpURLConnection httpConn = (HttpURLConnection) urlconn;
			httpConn = (HttpURLConnection) url.openConnection();
			HttpURLConnection.setFollowRedirects(true);
			httpConn.setRequestMethod("GET");
			//模拟浏览器
			httpConn.setRequestProperty("User-Agent","Mozilla/5.0 (Windows; U; Windows NT 6.1; zh-CN; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2 (.NET CLR 3.5.30729)");
			httpConn.setConnectTimeout(60000);//超时设置...
			int httpResult = httpConn.getResponseCode();// 获得相应代码
			if (httpResult != HttpURLConnection.HTTP_OK) { // 不等于HTTP_OK说明连接不成功
				System.out.println("fail连接失败!!!");
				return null;
			} else {
				InputStream ins = urlconn.getInputStream();
				BufferedReader br = new BufferedReader(new InputStreamReader(ins));
				String s = "";
				StringBuffer sb = new StringBuffer("");
				while ((s = br.readLine()) != null) {
					sb.append(s + "\r\n");
				}
				br.close();
				return sb.toString();
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return null;
	}
	/**
	 * 踢掉 json格式
	 * @param str
	 * @return
	 */
	private String rplaceCode(String str){
		if(str != null){
			str = str.replaceAll("\\{", "");
			str = str.replaceAll("\\}", "");
			str = str.replaceAll("\\\"", "");
			str = str.replaceAll(",", "\n");
			str = str.replaceAll(":", "    ");
			return str;
		}
		return null;
	}
	/**
	 * 踢掉json格式
	 * @param str
	 * @return
	 */
	private String outCode(String str){
		if(str != null){
			str = str.replaceAll("\\{", "");
			str = str.replaceAll("\\}", "");
			str = str.replaceAll("\\\"", "");
			str = str.substring(0,str.indexOf(":"));
			return str;
		}
		return null;
	}

	/**
	 * 最初实现 输出不够好 所以不用了...
	 */
	@Deprecated
	public void oneImplements(){
		ArrayList<String> arry1 = new ArrayList<String>();
		ArrayList<String> arry2 = new ArrayList<String>();
		
		for (int i = 10101; i <= 10134; i++) {
			String rs = getContent("http://www.weather.com.cn/data/city3jdata/provshi/" + i + ".html");
			if (rs != null) {
				if(!rs.contains("<")){
					System.out.println("市级:\n" + "  " + rs);
					arry1.add("市级:\n" +rs);
					arry2.add("市级:\n" +rs);
					String[] shij = rs.split(",");
					int ct = shij.length;
					if (ct > 0) {
						int start = i * 100;
						int end = i * 100 + ct;
						for (int j = start; j <= end; j++) {
	
							String rs2 = getContent("http://www.weather.com.cn/data/city3jdata/station/" + j + ".html");
							if (rs2 != null && !rs2.contains("<")) {
								String jieguo = "  市级【" + shij[(j-i*100-1)>0?(j-i*100-1):0] + "】以下的:\n" + rs2;
								arry1.add(jieguo+"\n http://www.weather.com.cn/data/city3jdata/station/" + j + ".html");
								arry2.add(jieguo);
								System.out.println(jieguo);
								System.out.println("");
							}
						}
					}
				}
			}

		}
		System.out.println("带地址的:");
		for (String jg : arry1) {
			System.out.println(rplaceCode(jg));
		}
		System.out.println("不带地址的:");
		for (String jg : arry2) {
			System.out.println(rplaceCode(jg));
		}
	}
	public static void main(String args[]) {
		// // 具体使用方法
		// 省级{"10101":"北京","10102":"上海","10103":"天津","10104":"重庆","10105":"黑龙江","10106":"吉林","10107":"辽宁","10108":"内蒙古","10109":"河北","10110":"山西","10111":"陕西","10112":"山东","10113":"新疆","10114":"西藏","10115":"青海","10116":"甘肃","10117":"宁夏","10118":"河南","10119":"江苏","10120":"湖北","10121":"浙江","10122":"安徽","10123":"福建","10124":"江西","10125":"湖南","10126":"贵州","10127":"四川","10128":"广东","10129":"云南","10130":"广西","10131":"海南","10132":"香港","10133":"澳门","10134":"台湾"}
		GetWeatherCode ou = new GetWeatherCode();
		ArrayList<String> arry1 = new ArrayList<String>();//带地址的
		ArrayList<String> arry2 = new ArrayList<String>();//不带地址的
		//firebug 分析出的 首要城市请求地址
		String sourceUrl = "http://www.weather.com.cn/data/city3jdata/china.html";
		
		String src = ou.getContent(sourceUrl);//读取内容
		if(src != null && !src.contains("<")){
			String[] sheng = src.split(",");//省级分割

			for (String sh : sheng) {
				String outCode = ou.outCode(sh);
			    String rs = ou.getContent("http://www.weather.com.cn/data/city3jdata/provshi/" + outCode + ".html");
				if (rs != null) {
					if (!rs.contains("<")) {
						arry1.add("市级:"+sh+"\n" + rs);
						arry2.add("市级:"+sh+"\n" + rs);
						String[] shij = rs.split(",");//市级分割
						for (String sj : shij) {
							String shjiOutCode = ou.outCode(sj);
							String rs2 = ou.getContent("http://www.weather.com.cn/data/city3jdata/station/" + (outCode + shjiOutCode) + ".html");
							if (rs2 != null && !rs2.contains("<")) {
								String jieguo = "  市级【" + sj + "】以下的:\n" + rs2;
								arry1.add(jieguo + "http://www.weather.com.cn/data/city3jdata/station/" + (outCode + shjiOutCode) + ".html");
								arry2.add(jieguo);
							}
						}
					}
				}

			}
		}
		System.out.println("带地址的:");
		for (String jg : arry1) {
			System.out.println(ou.rplaceCode(jg));
		}
		System.out.println("不带地址的:");
		for (String jg : arry2) {
			System.out.println(ou.rplaceCode(jg));
		}
		//简单测试
		//System.out.println(ou.rplaceCode("{\"10101\":\"北京\",\"10102\":\"上海\",\"10103\":\"天津\",\"10104\":\"重庆\",\"10105\":\"黑龙江\",\"10106\":\"吉林\",\"10107\":\"辽宁\",\"10108\":\"内蒙古\",\"10109\":\"河北\",\"10110\":\"山西\",\"10111\":\"陕西\",\"10112\":\"山东\",\"10113\":\"新疆\",\"10114\":\"西藏\",\"10115\":\"青海\",\"10116\":\"甘肃\",\"10117\":\"宁夏\",\"10118\":\"河南\",\"10119\":\"江苏\",\"10120\":\"湖北\",\"10121\":\"浙江\",\"10122\":\"安徽\",\"10123\":\"福建\",\"10124\":\"江西\",\"10125\":\"湖南\",\"10126\":\"贵州\",\"10127\":\"四川\",\"10128\":\"广东\",\"10129\":\"云南\",\"10130\":\"广西\",\"10131\":\"海南\",\"10132\":\"香港\",\"10133\":\"澳门\",\"10134\":\"台湾\"}"));

	}
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值