1、选择一个天气网页
static String url = "http://www.weather.com.cn/weather/101291401.shtml";//http://www.weather.com.cn/weather15d/101291401.shtml
2、设计存储天气信息的数据库
字段 | 含义 |
date | 日期 |
status | 状态 |
maxTem | 最高气温 |
minTem | 最低气温 |
updateTime | 更新时间 |
title | 标题 |
windLevel | 等级 |
city | 城市 |
del_flag | 删除标识 |
3、代码编写
import com.hlframe.modules.frontendautoconfig.entity.WeatherEnter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import java.net.URL;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.ListIterator;
import java.util.Set;
@Component
public class HtmlParseUtil {
//解析网页
static String url = "http://www.weather.com.cn/weather/101291401.shtml";//http://www.weather.com.cn/weather15d/101291401.shtml
static int i = 0;
@PostConstruct
@Scheduled(cron = "0 0 5 * * ?")
public static void parseWeather() throws Exception {
Document document = Jsoup.parse(new URL(url), 30000);
Element element = document.getElementById("7d");
// System.out.println(element.html());
//获取所有li标签
Elements elements = element.getElementsByTag("li");
HashMap<Integer, WeatherEnter> hash = new HashMap<>();
ListIterator<Element> listIter = elements.listIterator(1);
int j = 1;
while (listIter.hasNext()) {
if (j >= 7){
break;
}
Element e1 = listIter.next();
String date = e1.getElementsByTag("h1").text();
String status = e1.getElementsByClass("wea").eq(0).text();
String tem = e1.getElementsByClass("tem").eq(0).text();
String windLevel = e1.getElementsByClass("win").eq(0).text();
String title = e1.getElementsByClass("NNW").eq(0).attr("title");//风向
//String city = element.getElementsByClass("city").eq(0).text();
String[] tems = tem.split("/");
String maxTem = tems[0];
String minTem = tems[1];
WeatherEnter weather = new WeatherEnter();
i++;
weather.setCity("丽江");
weather.setDate(date.toString());
weather.setStatus(status.toString());
weather.setMaxTem(maxTem.toString());
weather.setMinTem(minTem.toString());
weather.setTitle(title.toString());
weather.setWindLevel(windLevel.toString());
hash.put(i, weather);
j++;
}
Set<Integer> keys = hash.keySet();
for (Integer key : keys) {
WeatherEnter value = hash.get(key);
//时间字段中不包含'明天'字样的则跳过循环
if(value.getDate().indexOf("明天")==-1){
continue;
}
Connection conn = null;
try {
PreparedStatement ps = null;
conn = Datautils.getConnection();
String sql = "insert into weather(`date`,`status`,`maxTem`,`minTem`,`updateTime`,`title`,`windLevel`,`city`,`del_flag`) values(?,?,?,?,?,?,?,?,?)";
ps = conn.prepareStatement(sql);//预编译SQL,先写sql,然后不执行
// ps.setInt(1,1);
ps.setString(1, value.getDate());
ps.setString(2, value.getStatus());
ps.setString(3, value.getMaxTem());
ps.setString(4, value.getMinTem());
ps.setDate(5,new java.sql.Date(new Date().getTime()));
ps.setString(6, value.getTitle());
ps.setString(7, value.getTitle()+" "+value.getWindLevel());
ps.setString(8,value.getCity());
ps.setString(9,"0");
ps.executeUpdate();
conn.close();
} catch (SQLException e) {
System.out.println("数据库访问失败");
e.printStackTrace();
}
System.out.println(key + "," + value.toString());
}
delete();
}
//删除除当天数据外的数据
public static int delete(){
Connection conn = null;
try {
PreparedStatement ps = null;
conn = Datautils.getConnection();
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd");//设置日期格式
String nowDate=df.format(new Date());
String sql = "update weather set del_flag='1' where del_flag='0' and updateTime!='"+nowDate+"'";
ps = conn.prepareStatement(sql);//预编译SQL,先写sql,然后不执行
int row=ps.executeUpdate();
conn.close();
return row;
} catch (SQLException e) {
System.out.println("数据库访问失败");
e.printStackTrace();
}
return 0;
}
public static void main(String[] args) throws Exception, ClassNotFoundException, SQLException{
parseWeather();
}
}