使用线程池和CountDownLatch计数器来进行抓取的简单事例1

package tangjiu;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import util.StringUtil;
import lookchem.checkuser.ProxyUtilLookchem;
public class TangJiu {
	public static void main(String[] args) {
		new TangJiu().forMaxPage();
	}
public void forMaxPage(){
	for(int i=681; i<=681; i++){
		getLinkList("http://*****/index--0-0-0-0-"+i+".html");
	}
}
static ExecutorService threadPool=Executors.newFixedThreadPool(5);//创建5个线程放入线程池内
public void getLinkList(String url){
	System.out.println("***********"+url);
	String content = ProxyUtil.getProxyStr(url, "","");
	final List<String> list = StringUtil.getalltagslist(content, "<td><a href=\"", "\"");
	final int listsize = list.size();
	final CountDownLatch countdown = new CountDownLatch(5);//子线程数
	for(int num=0; num <5; num++){//开启线程个数
		System.out.println("----------------共有的线程个数:"+countdown.getCount());
		final int temp = num;
//		Thread t = new Thread(
		threadPool.execute(//使用线程池来代替每次创建一个新的线程
			new Runnable(){
				public void run() {
					System.out.println((temp*6)+"@@"+((temp+1)*6));
					for(int i=temp*6; i<(temp+1)*6&&i<listsize; i++){
						System.out.println(Thread.currentThread().getName()+"#"+i);
						getContent("http://jxs.tangjiu.com/"+list.get(i));
					}
					countdown.countDown();//每个子线程结束后进行线程减1
					System.out.println(Thread.currentThread().getName()+"##线程结束------------------------");
					System.out.println("----------------剩余的线程个数:"+countdown.getCount());
				}}
		);
//		t.start();
	}
	try {countdown.await();//这里进行同步等待,等所有子线程结束后,执行 countdown.await()后面的代码
	System.out.println("##结束等待------------------------");
	} catch (InterruptedException e) {
		e.printStackTrace();}
}
public void getContent(String url){
	System.out.println("---------"+url);
	String content = ProxyUtil.getProxyStr(url, "","");
	
	String company = StringUtil.getFirststr(content, "<h2 class=\"h22\">企业名称", "</h2>", 0).trim();
	String channels = StringUtil.getFirststr(content, "经营渠道</th>", "</tr>", 1);
	String merchandise = StringUtil.getFirststr(content, "主营商品</th>", "</tr>", 1);//主营商品
	String brand = StringUtil.getFirststr(content, "代理品牌</th>", "</tr>", 1);
	String rank = StringUtil.getFirststr(content, "代理级别</th>", "</tr>", 1);
	String market = StringUtil.getFirststr(content, "主要市场</th>", "</tr>", 1);
	String buyinfo = StringUtil.getFirststr(content, "常年求购</th>", "</tr>", 1);
	
	String connecter = StringUtil.getFirststr(content, "联系人</th>", "</tr>", 1);
	String address = StringUtil.getFirststr(content, "联系地址</th>", "</tr>", 1);
	String code = StringUtil.getFirststr(content, "邮政编码</th>", "</tr>", 1);
	String tel = StringUtil.getFirststr(content, "联系电话</th>", "</tr>", 1);
	String fax = StringUtil.getFirststr(content, "传真</th>", "</tr>", 1);
	String phone = StringUtil.getFirststr(content, "手机</th>", "</tr>", 1);
	String email = StringUtil.getFirststr(content, "E-MAIL</th>", "</tr>", 1);
	String website = StringUtil.getFirststr(content, "网址</th>", "</tr>", 1);
	
	String[] ss= {company,channels,merchandise,brand,rank,market,buyinfo,connecter,address,code,tel,fax,phone,email,website};
	for(String s : ss){
		if(s.length() > 0)
			System.out.println(s);
	}
	TangJiuDao.save(ss);
}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值