package tangjiu;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import util.StringUtil;
import lookchem.checkuser.ProxyUtilLookchem;
public class TangJiu {
public static void main(String[] args) {
new TangJiu().forMaxPage();
}
public void forMaxPage(){
for(int i=681; i<=681; i++){
getLinkList("http://*****/index--0-0-0-0-"+i+".html");
}
}
static ExecutorService threadPool=Executors.newFixedThreadPool(5);//创建5个线程放入线程池内
public void getLinkList(String url){
System.out.println("***********"+url);
String content = ProxyUtil.getProxyStr(url, "","");
final List<String> list = StringUtil.getalltagslist(content, "<td><a href=\"", "\"");
final int listsize = list.size();
final CountDownLatch countdown = new CountDownLatch(5);//子线程数
for(int num=0; num <5; num++){//开启线程个数
System.out.println("----------------共有的线程个数:"+countdown.getCount());
final int temp = num;
// Thread t = new Thread(
threadPool.execute(//使用线程池来代替每次创建一个新的线程
new Runnable(){
public void run() {
System.out.println((temp*6)+"@@"+((temp+1)*6));
for(int i=temp*6; i<(temp+1)*6&&i<listsize; i++){
System.out.println(Thread.currentThread().getName()+"#"+i);
getContent("http://jxs.tangjiu.com/"+list.get(i));
}
countdown.countDown();//每个子线程结束后进行线程减1
System.out.println(Thread.currentThread().getName()+"##线程结束------------------------");
System.out.println("----------------剩余的线程个数:"+countdown.getCount());
}}
);
// t.start();
}
try {countdown.await();//这里进行同步等待,等所有子线程结束后,执行 countdown.await()后面的代码
System.out.println("##结束等待------------------------");
} catch (InterruptedException e) {
e.printStackTrace();}
}
public void getContent(String url){
System.out.println("---------"+url);
String content = ProxyUtil.getProxyStr(url, "","");
String company = StringUtil.getFirststr(content, "<h2 class=\"h22\">企业名称", "</h2>", 0).trim();
String channels = StringUtil.getFirststr(content, "经营渠道</th>", "</tr>", 1);
String merchandise = StringUtil.getFirststr(content, "主营商品</th>", "</tr>", 1);//主营商品
String brand = StringUtil.getFirststr(content, "代理品牌</th>", "</tr>", 1);
String rank = StringUtil.getFirststr(content, "代理级别</th>", "</tr>", 1);
String market = StringUtil.getFirststr(content, "主要市场</th>", "</tr>", 1);
String buyinfo = StringUtil.getFirststr(content, "常年求购</th>", "</tr>", 1);
String connecter = StringUtil.getFirststr(content, "联系人</th>", "</tr>", 1);
String address = StringUtil.getFirststr(content, "联系地址</th>", "</tr>", 1);
String code = StringUtil.getFirststr(content, "邮政编码</th>", "</tr>", 1);
String tel = StringUtil.getFirststr(content, "联系电话</th>", "</tr>", 1);
String fax = StringUtil.getFirststr(content, "传真</th>", "</tr>", 1);
String phone = StringUtil.getFirststr(content, "手机</th>", "</tr>", 1);
String email = StringUtil.getFirststr(content, "E-MAIL</th>", "</tr>", 1);
String website = StringUtil.getFirststr(content, "网址</th>", "</tr>", 1);
String[] ss= {company,channels,merchandise,brand,rank,market,buyinfo,connecter,address,code,tel,fax,phone,email,website};
for(String s : ss){
if(s.length() > 0)
System.out.println(s);
}
TangJiuDao.save(ss);
}
}
使用线程池和CountDownLatch计数器来进行抓取的简单事例1
最新推荐文章于 2024-08-22 16:57:46 发布