Java爬取12306

写写自己在比赛项目开发中学到的爬虫,从12306开始。

要做一个爬虫的话,一定要会看网页的源代码,学会用浏览器的抓包,一般情况先抓包,看有没有自己想要的数据,如果有返回的json的数据就好很多了,直接根据url规则编写链接,使用json解析返回的数据,不需要使用jsoup解析。像12306就是返回的json数据的。

可以复制链接地址出来:

https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date=2017-12-06&leftTicketDTO.from_station=SHH&leftTicketDTO.to_station=CSQ&purpose_codes=ADULT

注:12306的这个url会发生改变,所以爬虫代码里的这个url需要更新。

看到这个url,就很明了,我们需要传入的参数leftTicketDTO.train_dateleftTicketDTO.from_stationleftTicketDTO.to_station。多看几条就知道purpose_codes这个参数的值总是ADULT。

现在就是需要获得各个站点的编码:https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9030

我使用的是sql server数据库,数据库名称:TPS,用户:sa,密码:123456

数据库配置文件:

<?xml version="1.0" encoding="UTF-8"?>
<connections>
	<connection>
		<classname>com.microsoft.sqlserver.jdbc.SQLServerDriver</classname>
		<url>jdbc:sqlserver://localhost:1433;databaseName=TPS</url>
		<user>sa</user>
		<password>123456</password>
	</connection>
</connections>

数据库链接类:

package com.util;

import java.io.File;
import java.sql.Connection;
import java.sql.DriverManager;
import java.util.Iterator;

import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

public class DBUtil {
	public static Connection conn;

	//获取数据库连接
	public static Connection getConn() {
		String className = null;
		String url = null;
		String user = null;
		String password = null;
		//通过dom4j对数据库链接文件进行解析,获取驱动字符串、连接类。
		try {
			SAXReader reader = new SAXReader();
			Document doc = reader.read(new File(DBUtil.class.getClassLoader()
					.getResource("datebaseconfig.xml").getFile()));
			Element root = doc.getRootElement();
			Iterator<Element> it = root.elementIterator();
			while (it.hasNext()) {
				// 拿到单个子节点
				Element connection = it.next();
				// 获取子节点文本内容
				className = connection.elementText("classname");
				url = connection.elementText("url");
				user = connection.elementText("user");
				password = connection.elementText("password");
			}
			Class.forName(className);
			conn = DriverManager.getConnection(url, user, password);
			return conn;
		} catch (Exception e) {
			e.printStackTrace();
			return null;
		}
	}
	
	//关闭数据库连接
	public static void CloseConn() {
		try {
			conn.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

}
忽略SSL链接:

package com.util;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLSession;

//网络连接类
public class GetNetUtil {

	// 忽略SSL证书
	private static void trustAllHttpsCertificates() throws Exception {
		javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1];
		javax.net.ssl.TrustManager tm = new miTM();
		trustAllCerts[0] = tm;
		javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext.getInstance("SSL");
		sc.init(null, trustAllCerts, null);
		javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
	}

	static class miTM implements javax.net.ssl.TrustManager,javax.net.ssl.X509TrustManager {
		public java.security.cert.X509Certificate[] getAcceptedIssuers() {
			return null;
		}

		public boolean isServerTrusted(
				java.security.cert.X509Certificate[] certs) {
			return true;
		}

		public boolean isClientTrusted(
				java.security.cert.X509Certificate[] certs) {
			return true;
		}

		public void checkServerTrusted(
				java.security.cert.X509Certificate[] certs, String authType)
				throws java.security.cert.CertificateException {
			return;
		}

		public void checkClientTrusted(
				java.security.cert.X509Certificate[] certs, String authType)
				throws java.security.cert.CertificateException {
			return;
		}
	}

	/**
	 * 
	 * @param urlAll
	 *            :请求接口
	 * @param charset
	 *            :字符编码
	 * @return 返回json结果
	 */
	public static String get(String urlAll, String charset) {
		BufferedReader reader = null;
		String result = null;
		StringBuffer sbf = new StringBuffer();
		String userAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36";// 模拟浏览器
		try {
			trustAllHttpsCertificates();
			HostnameVerifier hv = new HostnameVerifier() {
				@Override
				public boolean verify(String urlHostName, SSLSession session) {
					System.out.println("Warning: URL Host: " + urlHostName
							+ " vs. " + session.getPeerHost());
					return true;
				}
			};
			HttpsURLConnection.setDefaultHostnameVerifier(hv);
			URL url = new URL(urlAll);
			HttpURLConnection connection = (HttpURLConnection) url.openConnection();
			connection.setRequestMethod("GET");
			connection.setReadTimeout(30000);
			connection.setConnectTimeout(30000);
			connection.setRequestProperty("User-agent", userAgent);
			connection.connect();
			InputStream is = connection.getInputStream();
			reader = new BufferedReader(new InputStreamReader(is, charset));
			String strRead = null;
			while ((strRead = reader.readLine()) != null) {
				sbf.append(strRead);
				sbf.append("\r\n");
			}
			reader.close();
			result = sbf.toString();

		} catch (Exception e) {
			e.printStackTrace();
		}
		return result;
	}

}
解析12306返回数据:

package com.util;

import java.util.ArrayList;
import java.util.List;

import net.sf.json.JSONArray;
import net.sf.json.JSONObject;

import com.dao.StationDAO;

public class Json12306Util {
	public List json12306(String startCity, String arrCity,
			String date){
		List list = new ArrayList();
		try {
			StationDAO stationDAO = new StationDAO();
			String startScode = stationDAO.findScodeBySname(startCity);
			String arrScode = stationDAO.findScodeBySname(arrCity);
			String charset = "UTF-8";
			String urlname = "https://kyfw.12306.cn/otn/leftTicket/query?leftTicketDTO.train_date="
					+ date
					+ "&leftTicketDTO.from_station="
					+ startScode
					+ "&leftTicketDTO.to_station="
					+ arrScode
					+ "&purpose_codes=ADULT";
			System.out.println(urlname);
			String jsonResult = GetNetUtil.get(urlname, charset);// 得到JSON字符串
			System.out.println(jsonResult);
			String message;
			JSONObject obj = JSONObject.fromObject(jsonResult);// 转化为JSON类
			/* 获取返回状态码 */
			if (obj.containsKey("httpstatus")) {
				message = obj.getString("httpstatus");
				System.out.println("连接状况码:" + message);
				/* 如果状态码是200说明返回数据成功 */
				if (message != null && message.equals("200")) {
					message = obj.getString("data");
					System.out.println(message);
					JSONObject object = JSONObject.fromObject(message);
					message = object.getString("result");
					System.out.println(message);
					message = message.substring(message.indexOf("[") + 1,message.lastIndexOf("]"));
					System.out.println(message);
					for(String s : message.split(",")){
						System.out.println(s);
						s = s.substring(s.indexOf("\"") + 1,s.lastIndexOf("\""));
						System.out.println(s);
						String ss[] = s.split("\\|");
						for(int i = 0;i < ss.length;i++){
							System.out.println(ss[i]);
						}
					}
				}
			}
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return list;
	}

}

站点数据库操作类,需要在数据库中建立站点表(station),将12306站点(sname)与编码(scode)数据保存:

package com.dao;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;

import com.util.DBUtil;

public class StationDAO {

	// 通过站点名查询站点编码
	public String findScodeBySname(String sName) {
		String s = null;
		String sql = "select a.scode from station a where a.sname=?";
		try {
			Connection conn = DBUtil.getConn();
			PreparedStatement ps = conn.prepareStatement(sql);
			ps.setString(1, sName);
			ResultSet rs = ps.executeQuery();
			while (rs.next()) {
				s = rs.getString("scode");
			}
			rs.close();
			ps.close();
			conn.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
		return s;
	}

}
测试类:

import java.util.Scanner;

import com.util.Json12306Util;


public class Test {
	
	public static void main(String[] args) {
		Json12306Util js = new Json12306Util();
		Scanner s = new Scanner(System.in);
		System.out.println("请输入出发城市,例如:北京");
		String startCity = s.next();
		System.out.println("请输入到达城市,例如:天津");
		String arrCity = s.next();
		System.out.println("请输入出发日期:例如:2017-12-10");
		String date = s.next();
		js.json12306(startCity, arrCity, date);
	}

}
运行结果:


程序可能有考虑不全面,或者有bug,欢迎大家指正。

代码已经上传,大家可以下载:

http://download.csdn.net/download/qq_34075012/10117428

  • 0
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 5
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值