Java 正则表达式解析 Html

去年在 Uptech 的时候写过一个开源的 XMPP Robat ,当时有一个搜索天气信息的功能,我用了 HtmlParser 来解析网页,说实话 HtmlParser 的确不错,只是我没什么时间琢磨他,使用还不习惯,所以现在换成正责表达式来解析网页,其实是想尝试尝试一下,现在解析天气预报信息的方式已从 HtmlParser 转移到了 Java 正则表达式, 这是刚实现的一段代码,贴出来共享 ...

/**
 * Copyright (C) 2006 the original author or authors.
 *
 * This software is published under the terms of the GNU Public License (GPL),
 * a copy of which is included in this distribution.
 */

package com.boar.modules;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.boar.container.BasicModule;

/**
 * @author <a href="zhuaming@gmail.com">Ben </a>
 */
public class WeatherModule extends BasicModule{

 private static ScheduledExecutorService executor = null;
 private static Map cache = new ConcurrentHashMap();

 private static final String urlLink =
  "http://weather.tq121.com.cn/mapanel/index.php?city=";
 
 private static final String citys[] = {
  "北京", "哈尔滨", "长春", "沈阳", "大连",
  "天津", "呼和浩特","乌鲁木齐", "西宁", "银川",
  "兰州", "西安", "拉萨", "成都","重庆", "贵阳",
  "昆明", "太原", "石家庄", "济南", "青岛", "郑州",
  "合肥", "南京", "徐州", "连云港", "上海", "武汉",
  "长沙", "南昌", "杭州", "福州", "厦门", "台北",
  "南宁", "桂林", "海口", "三亚", "广州", "香港", "澳门"  
 };
  
 public WeatherModule() {
  super(" Weather Module");
 }
 
 public void start(){    
     executor = new ScheduledThreadPoolExecutor(1);
     executor.scheduleWithFixedDelay(new WeatherMonitor(), 0, 60 * 60, TimeUnit.SECONDS);
 }
 
 public void stop() {
     if (executor != null) {
   executor.shutdown();
  }
     if (cache != null){
      cache.clear();
     }
    }
 
 public String search(String city){

  if (cache.containsKey(city.trim())){
   return cache.get(city.trim()).toString();
  }  
  return " Not support ."; 
 }
 

 private class WeatherMonitor implements Runnable {
  
  public void run()  {
   cache.clear();
   parse();
  }
  
  private String getWeather(String pattern, String match){
         Pattern sp = Pattern.compile(pattern);
      Matcher matcher = sp.matcher(match);
         while(matcher.find()){
          return matcher.group(1);
         }       
         return "";
  }
  
  private void parse() {
   for(int i=0;i<= citys.length-1;i++){
    StringBuffer pageBuffer = new StringBuffer();   
    try {
     URL url = new URL(urlLink + citys[i]);
     URLConnection ret = url.openConnection();
     String input ;
     
     BufferedReader br = new BufferedReader(new InputStreamReader(ret.getInputStream()));    
     while((input = br.readLine()) != null) {
      pageBuffer.append(input);
     }
    }catch(Exception e){
     System.out.println(e.getMessage());
    }
         
    StringBuffer weatherBuffer = new StringBuffer();

          weatherBuffer.append(getWeather("<td width=/"163/" align=/"center/" valign=/"top/"><span class=/"big-cn/">(.*?)</span>",pageBuffer.toString()));  
          weatherBuffer.append(getWeather("<td width=/"160/" align=/"center/" valign=/"top/" class=/"weather/">(.*?)</td>",pageBuffer.toString()));         
          weatherBuffer.append(getWeather("<td width=/"153/" valign=/"top/"><span class=/"big-cn/">(.*?)</span>",pageBuffer.toString()));         
          weatherBuffer.append(getWeather("class=/"weatheren/">(.*?)</td>",pageBuffer.toString()));
          
    cache.put(citys[i], weatherBuffer.toString());
   }
  }
 }
}

 
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值