Java 天气爬虫

1、选择一个天气网页

    static String url = "http://www.weather.com.cn/weather/101291401.shtml";//http://www.weather.com.cn/weather15d/101291401.shtml

2、设计存储天气信息的数据库

字段含义
date日期
status状态
maxTem最高气温
minTem最低气温
updateTime更新时间
title标题
windLevel等级
city城市
del_flag删除标识

3、代码编写

import com.hlframe.modules.frontendautoconfig.entity.WeatherEnter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;

import javax.annotation.PostConstruct;
import java.net.URL;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.ListIterator;
import java.util.Set;

@Component
public class HtmlParseUtil {
    //解析网页
    static String url = "http://www.weather.com.cn/weather/101291401.shtml";//http://www.weather.com.cn/weather15d/101291401.shtml
    static int i = 0;

    @PostConstruct
@Scheduled(cron = "0 0 5 * * ?")
    public static void parseWeather() throws Exception {
        Document document = Jsoup.parse(new URL(url), 30000);
        Element element = document.getElementById("7d");
//        System.out.println(element.html());
        //获取所有li标签
        Elements elements = element.getElementsByTag("li");
        HashMap<Integer, WeatherEnter> hash = new HashMap<>();
        ListIterator<Element> listIter = elements.listIterator(1);
        int j = 1;
        while (listIter.hasNext()) {
            if (j >= 7){
                break;
            }
                Element e1 = listIter.next();
                String date = e1.getElementsByTag("h1").text();
                String status = e1.getElementsByClass("wea").eq(0).text();
                String tem = e1.getElementsByClass("tem").eq(0).text();
                String windLevel = e1.getElementsByClass("win").eq(0).text();
                String title = e1.getElementsByClass("NNW").eq(0).attr("title");//风向
                //String city = element.getElementsByClass("city").eq(0).text();

                String[] tems = tem.split("/");
                String maxTem = tems[0];
                String minTem = tems[1];
            WeatherEnter weather = new WeatherEnter();
                i++;
                weather.setCity("丽江");
                weather.setDate(date.toString());
                weather.setStatus(status.toString());
                weather.setMaxTem(maxTem.toString());
                weather.setMinTem(minTem.toString());
                weather.setTitle(title.toString());
                weather.setWindLevel(windLevel.toString());
                hash.put(i, weather);
                j++;
        }
        Set<Integer> keys = hash.keySet();
        for (Integer key : keys) {
            WeatherEnter value = hash.get(key);
            //时间字段中不包含'明天'字样的则跳过循环
            if(value.getDate().indexOf("明天")==-1){
                continue;
            }
            Connection conn = null;
            try {
                PreparedStatement ps = null;
                conn = Datautils.getConnection();
                String sql = "insert into weather(`date`,`status`,`maxTem`,`minTem`,`updateTime`,`title`,`windLevel`,`city`,`del_flag`) values(?,?,?,?,?,?,?,?,?)";
                ps = conn.prepareStatement(sql);//预编译SQL,先写sql,然后不执行
//                ps.setInt(1,1);
                ps.setString(1, value.getDate());
                ps.setString(2, value.getStatus());
                ps.setString(3, value.getMaxTem());
                ps.setString(4, value.getMinTem());
                ps.setDate(5,new java.sql.Date(new Date().getTime()));
                ps.setString(6, value.getTitle());
                ps.setString(7, value.getTitle()+" "+value.getWindLevel());
                ps.setString(8,value.getCity());
                ps.setString(9,"0");
                ps.executeUpdate();
                conn.close();
            } catch (SQLException e) {
                System.out.println("数据库访问失败");
                e.printStackTrace();
            }
            System.out.println(key + "," + value.toString());
        }
        delete();
    }
//删除除当天数据外的数据
    public static int delete(){
        Connection conn = null;
        try {
            PreparedStatement ps = null;
            conn = Datautils.getConnection();
            SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd");//设置日期格式
            String nowDate=df.format(new Date());
            String sql = "update weather set del_flag='1' where del_flag='0' and updateTime!='"+nowDate+"'";
            ps = conn.prepareStatement(sql);//预编译SQL,先写sql,然后不执行
            int row=ps.executeUpdate();
            conn.close();
            return row;
        } catch (SQLException e) {
            System.out.println("数据库访问失败");
            e.printStackTrace();
        }
        return 0;
    }



    public static void main(String[] args) throws Exception, ClassNotFoundException, SQLException{
        parseWeather();
  }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

柚几哥哥

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值