java 正则表达式html,Java 正则表达式解析 Html

去年在 Uptech 的时候写过一个开源的 XMPP Robat ,当时有一个搜索天气信息的功能,我用了 HtmlParser 来解析网页,说实话 HtmlParser 的确不错,只是我没什么时间琢磨他,使用还不习惯,所以现在换成正责表达式来解析网页,其实是想尝试尝试一下,现在解析天气预报信息的方式已从 HtmlParser 转移到了 Java 正则表达式, 这是刚实现的一段代码,贴出来共享 ...

/**

* Copyright (C) 2006 the original author or authors.

*

* This software is published under the terms of the GNU Public License (GPL),

* a copy of which is included in this distribution.

*/

package com.boar.modules;

import java.io.BufferedReader;

import java.io.InputStreamReader;

import java.net.URL;

import java.net.URLConnection;

import java.util.Map;

import java.util.concurrent.ConcurrentHashMap;

import java.util.concurrent.ScheduledExecutorService;

import java.util.concurrent.ScheduledThreadPoolExecutor;

import java.util.concurrent.TimeUnit;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

import com.boar.container.BasicModule;

/**

* @author Ben

*/

public class WeatherModule extends BasicModule{

private static ScheduledExecutorService executor = null;

private static Map cache = new ConcurrentHashMap();

private static final String urlLink =

"http://weather.tq121.com.cn/mapanel/index.php?city=";

private static final String citys[] = {

"北京", "哈尔滨", "长春", "沈阳", "大连",

"天津", "呼和浩特","乌鲁木齐", "西宁", "银川",

"兰州", "西安", "拉萨", "成都","重庆", "贵阳",

"昆明", "太原", "石家庄", "济南", "青岛", "郑州",

"合肥", "南京", "徐州", "连云港", "上海", "武汉",

"长沙", "南昌", "杭州", "福州", "厦门", "台北",

"南宁", "桂林", "海口", "三亚", "广州", "香港", "澳门"

};

public WeatherModule() {

super(" Weather Module");

}

public void start(){

executor = new ScheduledThreadPoolExecutor(1);

executor.scheduleWithFixedDelay(new WeatherMonitor(), 0, 60 * 60, TimeUnit.SECONDS);

}

public void stop() {

if (executor != null) {

executor.shutdown();

}

if (cache != null){

cache.clear();

}

}

public String search(String city){

if (cache.containsKey(city.trim())){

return cache.get(city.trim()).toString();

}

return " Not support .";

}

private class WeatherMonitor implements Runnable {

public void run()  {

cache.clear();

parse();

}

private String getWeather(String pattern, String match){

Pattern sp = Pattern.compile(pattern);

Matcher matcher = sp.matcher(match);

while(matcher.find()){

return matcher.group(1);

}

return "";

}

private void parse() {

for(int i=0;i<= citys.length-1;i++){

StringBuffer pageBuffer = new StringBuffer();

try {

URL url = new URL(urlLink + citys[i]);

URLConnection ret = url.openConnection();

String input ;

BufferedReader br = new BufferedReader(new InputStreamReader(ret.getInputStream()));

while((input = br.readLine()) != null) {

pageBuffer.append(input);

}

}catch(Exception e){

System.out.println(e.getMessage());

}

StringBuffer weatherBuffer = new StringBuffer();

weatherBuffer.append(getWeather("

(.*?)",pageBuffer.toString()));            weatherBuffer.append(getWeather("(.*?)",pageBuffer.toString()));                   weatherBuffer.append(getWeather(" (.*?)",pageBuffer.toString()));                   weatherBuffer.append(getWeather("class=/"weatheren/">(.*?)",pageBuffer.toString()));              cache.put(citys[i], weatherBuffer.toString());   }  } }}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值