直接上代码了 应该都能看懂 主要从html 页爬取数据
(只是技术交流 不要做商业用途。。。 不要犯法!!!)
Record 对象 是jfinal 的对象 没有包的 直接创建一个对象 或者 map 集合 塞值就行就行
public static List getWeather() throws Exception{
//中国天气网址
URL url = new URL("http://www.weather.com.cn/weather/101170101.shtml");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
InputStream is = conn.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
String text1 ="";
for(int i=0;i<1000;i++) {
text1 = br.readLine();
//从(今天) 开始读取天气信息
if(text1.contains("(今天)")){
break;
}
}
//循环几次读取几天信息, 每次读取一天的信息
ArrayList<Record> data = new ArrayList<Record>();
String rep = "(?s)<.*?/?.*?>";
for(int i=0;i<7;i++) {
Record rec = new Record();
//共17行 都是天气的信息
//读取日期信息
String text = text1.replaceAll(rep,"");
if(i!=0){
text = br.readLine().replaceAll(rep,"");
}
rec.set("date",text);
br.readLine();
br.readLine();
//读取天气信息
text = br.readLine().replaceAll(rep,"");
//计算截取的开始下标
int stratIndex = text.indexOf(">")+1;
text = text.substring(stratIndex);
rec.set("Info",text);
br.readLine();
text = br.readLine().replaceAll(rep,"");
rec.set("temp",text);
br.readLine();
br.readLine();
text = br.readLine().replaceAll(rep,"");
text = text.replace("<span title=\"", "").replace("\" class=\"\"></span>", "");
text = br.readLine().replace("E", "").replace("W", "").replace("N", "").replace("S", "");
text = text.replace("<span title=\"", "").replace("\" class=\"\"></span>", "");
rec.set("direct",text);
if(!text.equals(rec.getStr("direct").replace("<span title=\"", "").replace("\" class=\"\"></span>", ""))) {
rec.set("direct",rec.get("direct")+"转"+text);
}
br.readLine();
br.readLine();
text = br.readLine().replace("<i>", "").replace("</i>", "");
rec.set("power",text);
br.readLine();
br.readLine();
br.readLine();
br.readLine();
data.add(rec);
}
is.close();
br.close();
return data;
}
获取的结果