java抓取数据的代码,java抓取html网页数据内容demo

package com.Action;

import java.util.HashMap;

import java.util.Iterator;

import java.util.List;

import java.util.Map;

import java.util.Map.Entry;

import com.Dao.zrdpDao;

import com.Dao.Impl.zrdpDaoImpl;

import com.Model.Piliang;

import com.Model.Qzpiliang;

public class zhuaqu {

/**

*/

private Map pageLink = new HashMap();//网页链接 求租

private Map pageLinkQZ = new HashMap();//网页链接 转让

private zrdpDao zrDao;//接口

private boolean flag ;//操作结果

private Piliang zrdp;//数据对象 转让

private Qzpiliang qzzrdp;//数据对象 求租

private int urlindex;//网页参数,分页页码

private String pageurl;//抓取网页链接的地址

private List dplist;//排重店铺集合

private String tel;//店铺联系方式

private List dparrs;//重复店铺id 集合

private String hrefs ;

private int cityflag;//城市标志

/**

* 转让信息抓取 入库

* @return

*/

@SuppressWarnings("unchecked")

public String getshopinfoFromweb(){

//抓取网页链接

try {

pageLink = zrDao.getUsePageLink(pageurl,urlindex,cityflag); //抓取网页链接

} catch (Exception e) {

e.printStackTrace();

}

//数据入库

Iterator iter = pageLink.entrySet().iterator();

while(iter.hasNext()){

Map.Entry e = (Map.Entry)iter.next();

try {

zrdp= zrDao.getShopInfoFromWeb(e.getValue()+".htm"); //抓取单条信息 转让

zrdp.setCityFlag(cityflag);

flag = zrDao.intoDatabase(zrdp);//单条信息入库

} catch (Exception e2) {

e2.printStackTrace();

}

}

return "getshopinfosuc";

}

/**

* 求租信息抓取 入库

* @return

*/

@SuppressWarnings("unchecked")

public String getshopinfoFromwebQZ(){

//抓取网页链接

try {

pageLinkQZ = zrDao.getUsePageLinkQZ(pageurl,urlindex,cityflag); //抓取网页链接

} catch (Exception e) {

e.printStackTrace();

}

//数据入库

Iterator qziter = pageLinkQZ.entrySet().iterator();

while(qziter.hasNext()){

Map.Entry qze=(Entry) qziter.next();

try {

qzzrdp = zrDao.getShopInfoFromWebQZ(qze.getValue()+".htm"); //抓取单条信息 求租

qzzrdp.setCityFlag(cityflag);

flag = zrDao.intoDatabaseQZ(qzzrdp);//单条信息入库

} catch (Exception e) {

e.printStackTrace();

}

}

return "getqzshopinfosuc";

}

public String dpPaichong(){

dparrs = zrDao.getSameDpId(cityflag);

try {

for (int i = 0; i < dparrs.size(); i++) {

flag =zrDao.paichongdp(dparrs.get(i).toString());

}

} catch (Exception e) {

e.printStackTrace();

}

return "paichongsuc";

}

//单条数据抓取测试

public String dddd(){

try {

zrdp = zrDao.getShopInfoFromWeb("C://Documents and Settings//Administrator//桌面//lx.mht");

} catch (Exception e) {

e.printStackTrace();

}

return "ddd";

}

public static void main(String[] args) {

zrdpDaoImpl dd = new zrdpDaoImpl();

Map mm=dd.getUsePageLink("http://192.168.1.47/plpwmanagers/",1,1);

System.out.println(mm);

}

public Map getPageLink() {

return pageLink;

}

public void setPageLink(Map pageLink) {

this.pageLink = pageLink;

}

public zrdpDao getZrDao() {

return zrDao;

}

public void setZrDao(zrdpDao zrDao) {

this.zrDao = zrDao;

}

public Piliang getZrdp() {

return zrdp;

}

public void setZrdp(Piliang zrdp) {

this.zrdp = zrdp;

}

public boolean isFlag() {

return flag;

}

public void setFlag(boolean flag) {

this.flag = flag;

}

public int getUrlindex() {

return urlindex;

}

public void setUrlindex(int urlindex) {

this.urlindex = urlindex;

}

public String getHrefs() {

return hrefs;

}

public String getTel() {

return tel;

}

public void setTel(String tel) {

this.tel = tel;

}

public String getPageurl() {

return pageurl;

}

public void setPageurl(String pageurl) {

this.pageurl = pageurl;

}

public List getDplist() {

return dplist;

}

public void setDplist(List dplist) {

this.dplist = dplist;

}

public List getDparrs() {

return dparrs;

}

public void setDparrs(List dparrs) {

this.dparrs = dparrs;

}

public void setHrefs(String hrefs) {

this.hrefs = hrefs;

}

public Map getPageLinkQZ() {

return pageLinkQZ;

}

public void setPageLinkQZ(Map pageLinkQZ) {

this.pageLinkQZ = pageLinkQZ;

}

public Qzpiliang getQzzrdp() {

return qzzrdp;

}

public void setQzzrdp(Qzpiliang qzzrdp) {

this.qzzrdp = qzzrdp;

}

public int getCityflag() {

return cityflag;

}

public void setCityflag(int cityflag) {

this.cityflag = cityflag;

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值