java网页数据抓取

对于加密的网站还没去研究,不知道能不能抓取,现在只是对一些没有加密的网站进行网页数据抓取。刚刚开始写的时候以为很多网站都能抓取,但是发现很多都加密了,本来以为一些地址可以通过网页数据检测工具测出他的数据变化,但是只能监测到一些通过js显示的数据,依然不能抓取到加密的网站。嗨,这个问题以后再说吧。

我抓取的网站是手机号查询和身份证查询的网站。http://qq.ip138.com/idsearch/index.asp这个是查询身份证的网站,源地址是这个,但当你输入自己的身份证是地址就会变成http://qq.ip138.com/idsearch/index.asp?action=idcard&userid=你输入的省份证&B1=%B2%E9+%D1%AF根据这个变化就可以抓取到输入特定身份证的网页源代码,再通过解析读取到需要的数据。下面是代码:

import java.net.* ;
import java.io.* ;
import java.util.regex.* ;
public class Capture{
	public static void main(String args[])throws Exception{
		System.out.println("*************************手机号查询************************") ;
		System.out.println("我的位置是:" + new GrabMobile().grabMobileLocation("15023141745")) ;
		System.out.println("手机卡类型是:" + new GrabMobile().grabMobileType("15023141745")) ;
		System.out.println("我的邮编是:" + new GrabMobile().grabMobilePost("15023141745")) ;
		System.out.println("*************************身份证查询************************") ;
		System.out.println("我的性别是:" + new GrabIdentity().grabIdentitySex("362203199208243575")) ;
		System.out.println("我的生日是:" + new GrabIdentity().grabIdentityBirth("362203199208243575")) ;
		System.out.println("我的家乡是:" + new GrabIdentity().grabIdentityHome("362203199208243575")) ;
	}
}
class GrabMobile{
	public String grabMobileLocation(String m)throws Exception{
		String strUrl = "http://www.ip138.com:8080/search.asp?action=mobile&mobile=" + m;
		URL url = new URL(strUrl) ;
		HttpURLConnection httpUrlCon = (HttpURLConnection)url.openConnection() ;
		InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ;
		BufferedReader bufRead = new BufferedReader(inRead) ;
		StringBuffer strBuf = new StringBuffer() ;
		String line = "" ;
		while ((line = bufRead.readLine()) != null) {
			strBuf.append(line);
		}
		String strStart = "卡号归属地" ;
		String strEnd = "卡 类 型";
		String strAll = strBuf.toString() ;
		
		int start = strAll.indexOf(strStart) ;
		
		int end = strAll.indexOf(strEnd) ;
		
		String result = strAll.substring(start+42,end-33) ;
		result = drawChMob(result) ;
		return result ;
	}
	public String grabMobileType(String m)throws Exception{
		String strUrl = "http://www.ip138.com:8080/search.asp?action=mobile&mobile=" + m;
		URL url = new URL(strUrl) ;
		HttpURLConnection httpUrlCon = (HttpURLConnection)url.openConnection() ;
		InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ;
		BufferedReader bufRead = new BufferedReader(inRead) ;
		StringBuffer strBuf = new StringBuffer() ;
		String line = "" ;
		while ((line = bufRead.readLine()) != null) {
			strBuf.append(line);
		}
		String strStart = "卡 类 型" ;
		String strEnd = "<TD align=\"center\">区 号</TD>";
		String strAll = strBuf.toString() ;
		
		int start = strAll.indexOf(strStart) ;
		
		int end = strAll.indexOf(strEnd) ;
		
		String result = strAll.substring(start+12,end) ;
		result = drawChMob(result) ;
		result = result.substring(1) ;
		return result ;
	}
	public String grabMobilePost(String m)throws Exception{
		String strUrl = "http://www.ip138.com:8080/search.asp?action=mobile&mobile=" + m;
		URL url = new URL(strUrl) ;
		HttpURLConnection httpUrlCon = (HttpURLConnection)url.openConnection() ;
		InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ;
		BufferedReader bufRead = new BufferedReader(inRead) ;
		StringBuffer strBuf = new StringBuffer() ;
		String line = "" ;
		while ((line = bufRead.readLine()) != null) {
			strBuf.append(line);
		}
		String strStart = "邮 编" ;
		String strEnd = "更详细的..";
		String strAll = strBuf.toString() ;
		
		int start = strAll.indexOf(strStart) ;
		
		int end = strAll.indexOf(strEnd) ;
		
		String result = strAll.substring(start+40,end-55) ;
		return result ;
	}
	public String drawChMob(String str){
		StringBuffer strBuf = new StringBuffer() ;
		String regex="([\u4e00-\u9fa5]+)";
		Matcher matcher = Pattern.compile(regex).matcher(str);
		while(matcher.find()){
			strBuf.append(matcher.group(0)).toString() ;
		}
		return strBuf.toString() ;
	}
}
class GrabIdentity{
	public String grabIdentitySex(String userid)throws Exception{
		String strUrl = "http://qq.ip138.com/idsearch/index.asp?action=idcard&userid=" + userid + "&B1=%B2%E9+%D1%AF";
		URL url = new URL(strUrl) ;
		HttpURLConnection httpUrlCon = (HttpURLConnection)url.openConnection() ;
		InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ;
		BufferedReader bufRead = new BufferedReader(inRead) ;
		StringBuffer strBuf = new StringBuffer() ;
		String line = "" ;
		while ((line = bufRead.readLine()) != null) {
			strBuf.append(line);
		}
		String strStart = " 别" ;
		String strEnd = "出生日期";
		String strAll = strBuf.toString() ;
		
		int start = strAll.indexOf(strStart) ;
		
		int end = strAll.indexOf(strEnd) ;
		
		String result = strAll.substring(start+7,end) ;
		result = drawCh(result) ;
		return result ;
	}
	public String grabIdentityBirth(String userid)throws Exception{
		String strUrl = "http://qq.ip138.com/idsearch/index.asp?action=idcard&userid=" + userid + "&B1=%B2%E9+%D1%AF";
		URL url = new URL(strUrl) ;
		HttpURLConnection httpUrlCon = (HttpURLConnection)url.openConnection() ;
		InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ;
		BufferedReader bufRead = new BufferedReader(inRead) ;
		StringBuffer strBuf = new StringBuffer() ;
		String line = "" ;
		while ((line = bufRead.readLine()) != null) {
			strBuf.append(line);
		}
		String strStart = "出生日期:</td><td class=\"tdc2\">" ;
		String strEnd = "</td><tr><tr><td class=";
		String strAll = strBuf.toString() ;
		
		int start = strAll.indexOf(strStart) ;
		int end = strAll.indexOf(strEnd) ;
		
		String result = strAll.substring(start+27,end) ;
		return result ;
	}
	public String grabIdentityHome(String userid)throws Exception{
		String strUrl = "http://qq.ip138.com/idsearch/index.asp?action=idcard&userid=" + userid + "&B1=%B2%E9+%D1%AF";
		URL url = new URL(strUrl) ;
		HttpURLConnection httpUrlCon = (HttpURLConnection)url.openConnection() ;
		InputStreamReader inRead = new InputStreamReader(httpUrlCon.getInputStream(),"GBK") ;
		BufferedReader bufRead = new BufferedReader(inRead) ;
		StringBuffer strBuf = new StringBuffer() ;
		String line = "" ;
		while ((line = bufRead.readLine()) != null) {
			strBuf.append(line);
		}
		String strStart = "证 地:</td><td class=\"tdc2\">" ;
		String strEnd = "<br/></td></tr><tr><td class=\"tdc3\" valign=\"top\" align=\"right\">部分或" ;
		String strAll = strBuf.toString() ;
		
		int start = strAll.indexOf(strStart) ;
		int end = strAll.indexOf(strEnd) ;
		
		String result = strAll.substring(start+31,end) ;
		return result ;
	}
	public String drawCh(String str){
		StringBuffer strBuf = new StringBuffer() ;
		String regex="([\u4e00-\u9fa5]+)";
		Matcher matcher = Pattern.compile(regex).matcher(str);
		if(matcher.find()){
			str = strBuf.append(matcher.group(0)).toString() ;
		}
		return str ;
	}
}

待会传上改装成的android小程序,可以手机号查询和身份证查询。


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值