代码写的很凌乱,正则用得也不是很好,不过还是实现了功能,贴出代码
package com.zzk.cn;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class TestSpider {
public static void main(String[] args) {
URL url = null;
URLConnection conn = null;
InputStream in = null;
InputStreamReader isr = null;
BufferedReader br = null;
try {
url = new URL("http://www.haotq.com/d_anqing.html");
conn = url.openConnection();
in = conn.getInputStream();
isr = new InputStreamReader(in);
br = new BufferedReader(isr);
String line = "";
String info="";
while (null != (line = br.readLine())) {
//System.out.println(line);
info+=line;
}
//System.out.println(info);
Pattern p=Pattern.compile("div\\sid=.weather_day_brief.*?</div>");//正则获取网页大致信息
Matcher m=p.matcher(info);
String s1="";
while(m.find()) {
s1=m.group();
//System.out.println(s1);
}
String s2="";
Pattern p1=Pattern.compile("<br>.*<script");//正则根据br获取7,8,9天的天气信息
//Pattern p1=Pattern.compile("<(/?\\s?br\\b)>");
Matcher m1=p1.matcher(s1);
while(m1.find()) {
//System.out.println(m1.group());
s2=m1.group();
}
String[] sPlit=s2.split("<br>");
int i=s2.length();
String t1="";
String t2="";
String t3="";
t1=sPlit[7];
t2=sPlit[8];
t3=sPlit[9];
System.out.println(t1);//7天后天气
System.out.println(t2);//8天后天气
System.out.println(t3);//9天后天气
//天气
String[] sPlit1=t1.split(" ");
int i1=t1.length();
System.out.println(sPlit1[1]);//白天阵雨,夜间阵雨,
System.out.println(sPlit1[2]);//气温2℃~9℃
System.out.println(sPlit1[3]);//北偏东风 二级
String w1="";
String w2="";
String w3="";
w1=sPlit1[1];
String[] sPlit4=w1.split(",");
String weather_title71=sPlit4[0].replace("白天", "");//第七天的白天的字段
String weather_title72=sPlit4[1].replace("夜间", "");//第七天的夜晚的字段
System.out.println(weather_title71);//第七天的白天天气情况
System.out.println(weather_title72);//第七天的夜晚天气情况
w2=sPlit1[2];
String[] sPlit5=w2.split("~");
String weather_temp71=sPlit5[0].replace("气温", "");
String weather_temp72=sPlit5[1].replace("~", "").replace(",", "");
System.out.println(weather_temp71);//第七天的最低气温
System.out.println(weather_temp72);//第七天的最高气温
w3=sPlit1[3];
String[] sPlit6=w3.split(" ");
String weather_winddirection71=sPlit6[0];
String weather_windpower71=sPlit6[1];
System.out.println(weather_winddirection71);//第七天的风向
System.out.println(weather_windpower71);//第七天的风力
String[] sPlit2=t2.split(" ");
int i2=t2.length();
String w4="";
String w5="";
String w6="";
w4=sPlit2[1];
String[] sPlit7=w4.split(",");
String weather_title81=sPlit7[0].replace("白天", "");//第八天的白天的字段
String weather_title82=sPlit7[1].replace("夜间", "");//第八天的夜晚的字段
System.out.println(weather_title81);//第八天的白天天气情况
System.out.println(weather_title82);//第八天的夜晚天气情况
w5=sPlit2[2];
String[] sPlit8=w5.split("~");
String weather_temp81=sPlit8[0].replace("气温", "");
String weather_temp82=sPlit8[1].replace("~", "").replace(",", "");
System.out.println(weather_temp81);//第八天的最低气温
System.out.println(weather_temp82);//第八天的最高气温
w6=sPlit2[3];
String[] sPlit9=w6.split(" ");
String weather_winddirection81=sPlit9[0];
String weather_windpower81=sPlit9[1];
System.out.println(weather_winddirection81);//第八天的风向
System.out.println(weather_windpower81);//第八天的风力
String[] sPlit3=t3.split(" ");
int i3=t3.length();
System.out.println(sPlit3[1]);
System.out.println(sPlit3[2]);
System.out.println(sPlit3[3]);
String w7="";
String w8="";
String w9="";
w7=sPlit3[1];
String[] sPlit10=w7.split(",");
String weather_title91=sPlit10[0].replace("白天", "");//第九天的白天的字段
String weather_title92=sPlit10[1].replace("夜间", "");//第九天的夜晚的字段
System.out.println(weather_title91);//第九天的白天天气情况
System.out.println(weather_title92);//第九天的夜晚天气情况
w8=sPlit3[2];
String[] sPlit11=w8.split("~");
String weather_temp91=sPlit11[0].replace("气温", "");
String weather_temp92=sPlit11[1].replace("~", "").replace(",", "");
System.out.println(weather_temp91);//第九天的最低气温
System.out.println(weather_temp92);//第九天的最高气温
w9=sPlit3[3];
String[] sPlit12=w9.split(" ");
String weather_winddirection91=sPlit12[0];
String weather_windpower91=sPlit12[1].replace("<script", "");
System.out.println(weather_winddirection91);//第九天的风向
System.out.println(weather_windpower91);//第九天的风力
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (null != br) {
br.close();
}
} catch (IOException e) {
e.printStackTrace();
}
br = null;
try {
if (null != isr) {
isr.close();
}
} catch (IOException e) {
e.printStackTrace();
}
isr = null;
try {
if (null != in) {
in.close();
}
} catch (IOException e) {
e.printStackTrace();
}
in = null;
}
}
}
输出:
7天后安庆天气: 白天阵雨,夜间阵雨, 气温2℃~9℃, 北偏东风 二级
8天后安庆天气: 白天阵雨,夜间阵雨, 气温-1℃~7℃, 北偏东风 二级
9天后安庆天气: 白天局部多云,夜间局部多云, 气温-1℃~8℃, 北偏东风 二级<script
白天阵雨,夜间阵雨,
气温2℃~9℃,
北偏东风 二级
阵雨
阵雨
2℃
9℃
北偏东风
二级
阵雨
阵雨
-1℃
7℃
北偏东风
二级
白天局部多云,夜间局部多云,
气温-1℃~8℃,
北偏东风 二级<script
局部多云
局部多云
-1℃
8℃
北偏东风
二级