去年在 Uptech 的时候写过一个开源的 XMPP Robat ,当时有一个搜索天气信息的功能,我用了 HtmlParser 来解析网页,说实话 HtmlParser 的确不错,只是我没什么时间琢磨他,使用还不习惯,所以现在换成正责表达式来解析网页,其实是想尝试尝试一下,现在解析天气预报信息的方式已从 HtmlParser 转移到了 Java 正则表达式, 这是刚实现的一段代码,贴出来共享 ...
/**
* Copyright (C) 2006 the original author or authors.
*
* This software is published under the terms of the GNU Public License (GPL),
* a copy of which is included in this distribution.
*/
package com.boar.modules;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.boar.container.BasicModule;
/**
* @author <a href="zhuaming@gmail.com">Ben </a>
*/
public class WeatherModule extends BasicModule{
private static ScheduledExecutorService executor = null;
private static Map cache = new ConcurrentHashMap();
private static final String urlLink =
"http://weather.tq121.com.cn/mapanel/index.php?city=";
private static final String citys[] = {
"北京", "哈尔滨", "长春", "沈阳", "大连",
"天津", "呼和浩特","乌鲁木齐", "西宁", "银川",
"兰州", "西安", "拉萨", "成都","重庆", "贵阳",
"昆明", "太原", "石家庄", "济南", "青岛", "郑州",
"合肥", "南京", "徐州", "连云港", "上海", "武汉",
"长沙", "南昌", "杭州", "福州", "厦门", "台北",
"南宁", "桂林", "海口", "三亚", "广州", "香港", "澳门"
};
public WeatherModule() {
super(" Weather Module");
}
public void start(){
executor = new ScheduledThreadPoolExecutor(1);
executor.scheduleWithFixedDelay(new WeatherMonitor(), 0, 60 * 60, TimeUnit.SECONDS);
}
public void stop() {
if (executor != null) {
executor.shutdown();
}
if (cache != null){
cache.clear();
}
}
public String search(String city){
if (cache.containsKey(city.trim())){
return cache.get(city.trim()).toString();
}
return " Not support .";
}
private class WeatherMonitor implements Runnable {
public void run() {
cache.clear();
parse();
}
private String getWeather(String pattern, String match){
Pattern sp = Pattern.compile(pattern);
Matcher matcher = sp.matcher(match);
while(matcher.find()){
return matcher.group(1);
}
return "";
}
private void parse() {
for(int i=0;i<= citys.length-1;i++){
StringBuffer pageBuffer = new StringBuffer();
try {
URL url = new URL(urlLink + citys[i]);
URLConnection ret = url.openConnection();
String input ;
BufferedReader br = new BufferedReader(new InputStreamReader(ret.getInputStream()));
while((input = br.readLine()) != null) {
pageBuffer.append(input);
}
}catch(Exception e){
System.out.println(e.getMessage());
}
StringBuffer weatherBuffer = new StringBuffer();
weatherBuffer.append(getWeather("<td width=/"163/" align=/"center/" valign=/"top/"><span class=/"big-cn/">(.*?)</span>",pageBuffer.toString()));
weatherBuffer.append(getWeather("<td width=/"160/" align=/"center/" valign=/"top/" class=/"weather/">(.*?)</td>",pageBuffer.toString()));
weatherBuffer.append(getWeather("<td width=/"153/" valign=/"top/"><span class=/"big-cn/">(.*?)</span>",pageBuffer.toString()));
weatherBuffer.append(getWeather("class=/"weatheren/">(.*?)</td>",pageBuffer.toString()));
cache.put(citys[i], weatherBuffer.toString());
}
}
}
}