用Java语言,读取网页内容,并利用正则表达式工具解析其中包含的天气信息。原网址为http://www.weather.com.cn/index/zxqxgg/wlstyb.shtml,查看源代码找出天气信息的位置,然后就可以动手了。
为演示方便,单独写了一个jsp页面来测试;注意编码等细节问题。
<%@ page language="java" contentType="text/html; charset=utf-8"
pageEncoding="utf-8"%>
<%@ page import="java.net.URL" %>
<%@ page import="java.util.regex.Matcher" %>
<%@ page import="java.util.regex.Pattern" %>
<%@ page import="java.io.IOException" %>
<%@ page import="java.io.BufferedReader" %>
<%@ page import="java.io.InputStreamReader" %>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title>未来三天全国天气预报</title>
</head>
<body>
<%
String weather = null;
try {
URL a = new URL("http://www.weather.com.cn/index/zxqxgg/wlstyb.shtml");
BufferedReader br = new BufferedReader(new InputStreamReader(a.openStream(),"utf-8"));
String li = null;
Pattern p = Pattern.compile("<DL id=mainContent>.*?</DL>");
Matcher m = null;
while ((li = br.readLine()) != null) {
m = p.matcher(li);
if (m.find()) {
weather = new String(m.group().getBytes());
weather = weather.replaceAll("<a.*?<br\\s*/>", "");
break;
}
}
br.close();
} catch (IOException e) {
e.printStackTrace();
}
out.println( weather );
%>
</body>
</html>