这次爬了天气网上北京某月的历史数据,最下面附了表的设计以及最后数据库中的结果
package net;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.sql.Connection;
import java.util.Date;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class Point{
private Date date;
private int zuigao;
private int zuidi;
private String tianqi;
private String fengxiang;
private String fengli;
public Date getDate() {
return date;
}
public void setDate(Date date) {
this.date = date;
}
public int getZuigao() {
return zuigao;
}
public void setZuigao(int zuigao) {
this.zuigao = zuigao;
}
public int getZuidi() {
return zuidi;
}
public void setZuidi(int zuidi) {
this.zuidi = zuidi;
}
public String getTianqi() {
return tianqi;
}
public void setTianqi(String tianqi) {
this.tianqi = tianqi;
}
public String getFengxiang() {
return fengxiang;
}
public void setFengxiang(String fengxiang) {
this.fengxiang = fengxiang;
}
public String getFengli() {
return fengli;
}
public void setFengli(String fengli) {
this.fengli = fengli;
}
public Point() {
super();
}
public Point(String date, int zuigao, int zuidi, String tianqi, String fengxiang, String fengli) {
super();
DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
try {
this.date = df.parse(date);
} catch (ParseException e) {
e.printStackTrace();
}
this.zuigao = zuigao;
this.zuidi = zuidi;
this.tianqi = tianqi;
this.fengxiang = fengxiang;
this.fengli = fengli;
}
}
public class SpiderTest {
public static String getContent(String urlStr, String s) {
StringBuilder sb = new StringBuilder();
URL url;
try {
url = new URL(urlStr);
BufferedReader bf = new BufferedReader(new InputStreamReader(url.openStream(), s));
String temp = "";
while((temp=bf.readLine())!=null) {
sb.append(temp);
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return sb.toString();
}
public static List<Point> getStr(String des, String regex){
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(des);
List<Point> list = new ArrayList<Point>();
while(m.find()) {
Point pp = new Point(m.group(1),Integer.parseInt(m.group(2)),Integer.parseInt(m.group(3)),m.group(4),m.group(5),m.group(6));
list.add(pp);
}
return list;
}
public static void mySql(String sql,List<Point> list) {
String jdbc = "com.mysql.jdbc.Driver";
String user = "root";
String password = "123456";
String url = "jdbc:mysql://localhost/t_user";
Connection con = null;
PreparedStatement ps = null;
try {
Class.forName(jdbc);
con = DriverManager.getConnection(url, user, password);
ps = con.prepareStatement(sql);
for(int i = 0; i<list.size();i++) {
java.sql.Date sqlDate=new java.sql.Date(list.get(i).getDate().getTime());
ps.setDate(1, sqlDate);
ps.setInt(2,list.get(i).getZuigao());
ps.setInt(3,list.get(i).getZuidi());
ps.setString(4,list.get(i).getTianqi());
ps.setString(5,list.get(i).getFengxiang());
ps.setString(6,list.get(i).getFengli());
ps.executeUpdate();
}
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}finally {
try {
ps.close();
}catch (Exception e) {
e.printStackTrace();
}
try {
con.close();
}catch (Exception e) {
e.printStackTrace();
}
}
}
public static String prt(Point p) {
DateFormat df = new SimpleDateFormat("yyyy-MM-dd");
String date = df.format(p.getDate());
return "时间: "+date+" 最高: "+p.getZuigao()+" 最低: "+p.getZuidi()+" 天气: "+p.getTianqi()+" 风向: "+p.getFengxiang()+" 风力: "+p.getFengli();
}
public static void main(String[] args) {
String des = getContent("https://lishi.tianqi.com/beijing/201809.html", "gbk");
System.out.println(des);
List<Point> list = getStr(des, "href.*?>([\\d-]+)</a>[\\s\\S]+?(\\d+)[\\s\\S\\w]+?(\\d+)[\\s\\S\\w]+?([\\u4e00-\\u9fa5]+)[\\s\\S\\w]+?([\\u4e00-\\u9fa5]+)[\\s\\S\\w]+?([\\d\\u4e00-\\u9fa5]+)</li>");
mySql("insert into tianqi(date,zuigao,zuidi,tianqi,fengxiang,fengli)values(?,?,?,?,?,?)",list);
for(Point temp: list) {
System.out.println(prt(temp));
}
}
}