1、使用正则表达式爬取网上资源
String url = "https://search.51job.com/list/030000,000000,0000,00,9,99,KEYWORD,2,PAGES.html?lang=c&stype=1&postchannel=0000&workyear=WORKYEARS&cotype=99°reefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=6&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=";
url = url.replace("KEYWORD", keyword);
url = url.replace("WORKYEARS", workyear);
使用的是51job的search网址
有关键词,工作年限,页数等组成
2、进度条实时显示进度情况
long currentTime = System.currentTimeMillis();
long time = (currentTime - upload.getStartTime()) / 1000 + 1;
int percent = (int)((double)upload.getUploadSize() / (double)upload.getTotalSize() * 100);
int mb = (int)upload.getUploadSize();
int totalMb = (int)upload.getTotalSize();
result = time+"-"+percent+"-"+mb+"-"+totalMb;
实时返回用时时间,百分比,已爬取数量和总数量
var over = false;
var inter;
function upload(){
over = false;
$("#state").html("")
$("#progress").css("width","0")
//$("input[type=submit]").attr("disabled",true);
$("#progress").css("width","0%");
$("#state").html("正在上传... 总大小:0MB,已上传:0MB,0%,已用时:0秒,剩余时间:0秒,速度:0KB/S");
inter = setInterval(req,1000);
}
function req(){
//如果上传已经完成
if(over){
clearInterval(inter);
return;
}
var url = "${basePath}/admin/collect/collectJobs23";
$.get(url,function(date){
var state = date.split("-");
$("#state").html("正在爬取数据... 总大小:"+state[3]+",已爬取:"+state[2]+",完成"+state[1]+"%,已用时:"+state[0]+"秒");
$("#progress").animate({width:state[1]+"%"},500);
if(state[3] == state[2]){
over = true;
$("#state").html("正在爬取数据... 总大小:"+state[3]+",已爬取:"+state[2]+",完成"+state[1]+"%,已用时:"+state[0]+"秒");
}
});
}
前端则用setInterval定时显示进度条出来。
如需获取文本源码学习,可加QQ490647751,回复“再度出击,百分比进度条显示爬取职位信息”