1 在解析HTML之前还需导入jsoup-1.10.2.jar
2 解析HTML,代码如下:
package com.od.cn;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class JsoupParserHtml {
private static final Logger LOGGER=Logger.getLogger(JsoupParserHtml.class);
//从网上把天气爬下来
private List<Weather> parserHtmlByHttp(String url){
List<Weather> weathers=new ArrayList<Weather>();
try {
Document document=Jsoup.connect(url).get();
Elements classes=document.getElementsByClass("part_se");
for(Element ele:classes){
String data_role=ele.attr("data-role");
if("collapsible".equals(data_role)){
Elements h1=ele.select("h1");
Elements td=ele.select("td");
Weather weather=new Weather();
weather.setArea(h1.text());
weather.setAirTemperature(td.get(1).text());
weather.setRainFall(td.get(3).text());
weather.setRelativeWet(td.get(5).text());
weather.setWindPower(td.get(7).text());
weather.setWindDirection(td.get(9).text());
weather.setDate(td.get(11).text());
weathers.add(weather);
}
}
} catch (IOException e) {
LOGGER.error("解析网页异常:"+e.getMessage());
}
LOGGER.info("成功获取网页数据");
return weathers;
}
//以json的格式保存到文本中
private void saveFile(List<Weather> weathers){
if(weathers!=null){
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd");
StringBuffer buffer=new StringBuffer();
buffer.append("{date:\""+sdf.format(new Date())+"\",data[");
for(int i=0;i<weathers.size();i++){
Weather weather=weathers.get(i);
if(i==weathers.size()-1){
buffer.append("{area:\""+weather.getArea()+"\",airTemperature:\""+weather.getAirTemperature()+"\",rainFall:\""+weather.getRainFall()+
"\",relativeWet:\""+weather.getRelativeWet()+"\",windPower:\""+weather.getWindPower()+"\",windDirection:\""+weather.getWindDirection()+"\",dateTime:\""+weather.getDate()+"\"}");
}else{
buffer.append("{area:\""+weather.getArea()+"\",airTemperature:\""+weather.getAirTemperature()+"\",rainFall:\""+weather.getRainFall()+
"\",relativeWet:\""+weather.getRelativeWet()+"\",windPower:\""+weather.getWindPower()+"\",windDirection:\""+weather.getWindDirection()+"\",dateTime:\""+weather.getDate()+"\"},");
}
}
buffer.append("]}");
BufferedWriter bw=null;
try {
bw=new BufferedWriter(new FileWriter("d:\\weather.txt"));
bw.write(buffer.toString());
bw.flush();
LOGGER.info("已保存文件");
} catch (IOException e) {
LOGGER.error("保存文件异常:"+e.getMessage());
}finally{
if(bw!=null){
try {
bw.close();
} catch (IOException e) {
LOGGER.error("关闭流异常:"+e.getMessage());
}
}
}
}
}
public static void main(String[] args) {
PropertyConfigurator.configure("WebRoot/conf/log4j.properties");
LOGGER.info("启动程序");
JsoupParserHtml jph=new JsoupParserHtml();
List<Weather> weathers=jph.parserHtmlByHttp("http://www.zhpmsc.org.cn/WeChat/monitorController/zoneSk?winzoom=1#");
jph.saveFile(weathers);
LOGGER.info("程序结束");
}
}