网络爬虫并存入MySql数据库

这次爬了天气网上北京某月的历史数据,最下面附了表的设计以及最后数据库中的结果

package net;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.sql.Connection;
import java.util.Date;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

class Point{
	private Date date;
	private int zuigao;
	private int zuidi;
	private String tianqi;
	private String fengxiang;
	private String fengli;
	
	public Date getDate() {
		return date;
	}

	public void setDate(Date date) {
		this.date = date;
	}

	public int getZuigao() {
		return zuigao;
	}

	public void setZuigao(int zuigao) {
		this.zuigao = zuigao;
	}

	public int getZuidi() {
		return zuidi;
	}

	public void setZuidi(int zuidi) {
		this.zuidi = zuidi;
	}

	public String getTianqi() {
		return tianqi;
	}

	public void setTianqi(String tianqi) {
		this.tianqi = tianqi;
	}

	public String getFengxiang() {
		return fengxiang;
	}

	public void setFengxiang(String fengxiang) {
		this.fengxiang = fengxiang;
	}

	public String getFengli() {
		return fengli;
	}

	public void setFengli(String fengli) {
		this.fengli = fengli;
	}

	public Point() {
		super();
	}

	public Point(String date, int zuigao, int zuidi, String tianqi, String fengxiang, String fengli) {
		super();
		DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
		try {
			this.date = df.parse(date);
		} catch (ParseException e) {
			e.printStackTrace();
		}
		this.zuigao = zuigao;
		this.zuidi = zuidi;
		this.tianqi = tianqi;
		this.fengxiang = fengxiang;
		this.fengli = fengli;
	}
}

public class SpiderTest {
	
	public static String getContent(String urlStr, String s) {
		StringBuilder sb = new StringBuilder();
		URL url;
		try {
			url = new URL(urlStr);
			BufferedReader bf = new BufferedReader(new InputStreamReader(url.openStream(), s));
			String temp = "";
			while((temp=bf.readLine())!=null) {
				sb.append(temp);
			}
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return sb.toString();
		
	}
	
	public static List<Point> getStr(String des, String regex){
		Pattern p = Pattern.compile(regex);
		Matcher m = p.matcher(des);
		List<Point> list = new ArrayList<Point>();
		while(m.find()) {
			Point pp = new Point(m.group(1),Integer.parseInt(m.group(2)),Integer.parseInt(m.group(3)),m.group(4),m.group(5),m.group(6));
			list.add(pp);
		}
		return list;
	}
	
	public static void mySql(String sql,List<Point> list) {
		String jdbc = "com.mysql.jdbc.Driver";
		String user = "root";
		String password = "123456";
		String url = "jdbc:mysql://localhost/t_user";
		Connection con = null;
		PreparedStatement ps = null;
		try {
			Class.forName(jdbc);
			con = DriverManager.getConnection(url, user, password);
			
			ps = con.prepareStatement(sql);
			for(int i = 0; i<list.size();i++) {
				java.sql.Date sqlDate=new java.sql.Date(list.get(i).getDate().getTime());
				ps.setDate(1, sqlDate);
				ps.setInt(2,list.get(i).getZuigao());
				ps.setInt(3,list.get(i).getZuidi());
				ps.setString(4,list.get(i).getTianqi());
				ps.setString(5,list.get(i).getFengxiang());
				ps.setString(6,list.get(i).getFengli());
				
				ps.executeUpdate();
			}
			
		} catch (ClassNotFoundException e) {
			e.printStackTrace();
		} catch (SQLException e) {
			e.printStackTrace();
		}finally {
			try {
				ps.close();
			}catch (Exception e) {
				e.printStackTrace();
			}
			try {
				con.close();
			}catch (Exception e) {
				e.printStackTrace();
			}
		}
		
	}
	public static String prt(Point p) {
		DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
		String date = df.format(p.getDate());
		return "时间: "+date+" 最高: "+p.getZuigao()+" 最低: "+p.getZuidi()+" 天气: "+p.getTianqi()+" 风向: "+p.getFengxiang()+" 风力: "+p.getFengli();
	}
	
	public static void main(String[] args) {
		String des = getContent("https://lishi.tianqi.com/beijing/201809.html", "gbk");
		System.out.println(des);
		List<Point> list = getStr(des, "href.*?>([\\d-]+)</a>[\\s\\S]+?(\\d+)[\\s\\S\\w]+?(\\d+)[\\s\\S\\w]+?([\\u4e00-\\u9fa5]+)[\\s\\S\\w]+?([\\u4e00-\\u9fa5]+)[\\s\\S\\w]+?([\\d\\u4e00-\\u9fa5]+)</li>");
		mySql("insert into tianqi(date,zuigao,zuidi,tianqi,fengxiang,fengli)values(?,?,?,?,?,?)",list);
		for(Point temp: list) {
			System.out.println(prt(temp));
		}
	}
}



在这里插入图片描述
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值