java小作业(二十)

1.完善爬虫代码

package com.sj.bd;

import java.io.File;
import java.io.IOException;
import java.util.List;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;

//Excel处理类
public class ExcelSolve {

	//写入Excel中
	public void writeExcel(List<Job> list,String path,String keyWord) throws IOException, RowsExceededException, WriteException, InterruptedException{
		File file = new File(path);//加入内存
		WritableWorkbook workbook = Workbook.createWorkbook(file);//创建工作簿
		WritableSheet sheet = workbook.createSheet("中华英才网-"+keyWord, 0);//添加页
		//写入表头
		sheet.addCell(new Label(0, 0, "城市/区域"));
	    sheet.addCell(new Label(1, 0, "公司名称"));
	    sheet.addCell(new Label(2, 0, "日期"));
	    sheet.addCell(new Label(3, 0, "岗位信息"));
	    sheet.addCell(new Label(4, 0, "薪资"));
	    sheet.addCell(new Label(5, 0, "公司性质"));
	    sheet.addCell(new Label(6, 0, "学历/工作年限"));
	    sheet.addCell(new Label(7, 0, "公司人数"));
		
		//写入内容
		int row=1;
	    for (int i = 0; i <list.size(); i++){
			 Job job = list.get(i);
	    	 sheet.addCell(new Label(0, row,job.getCityArea() ));
	    	 sheet.addCell(new Label(1, row,job.getCompany() ));
	    	 sheet.addCell(new Label(2, row,job.getDate() ));
	    	 sheet.addCell(new Label(3, row,job.getJobName() ));
	    	 sheet.addCell(new Label(4, row,job.getMoney() ));
	    	 sheet.addCell(new Label(5, row,job.getNacture() ));
	    	 sheet.addCell(new Label(6, row,job.getRecord() ));
	    	 sheet.addCell(new Label(7, row,job.getNum() ));
	    	 row++;
		}
	    //workbook的写入
	    workbook.write();
	    //缓冲
	    Thread.sleep(2000);
	    //workbook的关闭
	    workbook.close();
		
	}
	
}
package com.sj.bd;
//工作信息类
public class Job {
	
	public String jobName;//岗位信息
	public String date;//发布时间
	public String company;//公司名称
	public String cityArea;//城市/区域
	public String record;//学历/年限
	public String nacture;//公司性质
	public String num;//人数
	public String money;//工资
	public String getJobName() {
		return jobName;
	}
	public void setJobName(String jobName) {
		this.jobName = jobName;
	}
	public String getDate() {
		return date;
	}
	public void setDate(String date) {
		this.date = date;
	}
	public String getCompany() {
		return company;
	}
	public void setCompany(String company) {
		this.company = company;
	}
	public String getCityArea() {
		return cityArea;
	}
	public void setCityArea(String cityArea) {
		this.cityArea = cityArea;
	}
	public String getRecord() {
		return record;
	}
	public void setRecord(String record) {
		this.record = record;
	}
	public String getNacture() {
		return nacture;
	}
	public void setNacture(String nacture) {
		this.nacture = nacture;
	}
	public String getNum() {
		return num;
	}
	public void setNum(String num) {
		this.num = num;
	}
	public String getMoney() {
		return money;
	}
	public void setMoney(String money) {
		this.money = money;
	}
	@Override
	public String toString() {
		return "Job [cityArea=" + cityArea + ", company=" + company + ", date="
				+ date + ", jobName=" + jobName + ", money=" + money
				+ ", nacture=" + nacture + ", num=" + num + ", record="
				+ record + "]";
	}

	
}
package com.sj.bd;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

//爬虫处理类
public class JsoupHtml {
	List<Job> list=new ArrayList<Job>();//list存放数据
	//获取document对象
	public Document getDocument(String keyWord, String city ,int page)
	throws IOException, InterruptedException {
		String url = "http://www.chinahr.com/sou/?city="+city+"&keyword="+keyWord+"&page="+page;//url地址
		System.out.println("当前url地址为:"+url);
		Document content = Jsoup.connect(url).get();
		Thread.sleep(10000);
		return content;//获取文档
	}
	
	//对获取的文档进行分析
	public List<Job> getDataInfo(Document document) throws InterruptedException{
		//获取每个招聘信息
		Elements jobList = document.getElementsByAttributeValue("class", "jobList");
		Thread.sleep(10000);
		System.out.println("获取到"+jobList.size()+"条数据");
		
		//每个招聘信息进行解析
		for (int i = 0; i < jobList.size(); i++) {
			Job job = new Job();//实例化job
			Element everyDiv = jobList.get(i);//获取每个div
//			System.out.println(everyDiv);
			Elements childrenElement = everyDiv.children();//获取所有子元素
			for (int j = 0; j < childrenElement.size(); j++) {
				Element childElement = childrenElement.get(j);//子元素的子元素,即每个ul
//				System.out.println(childElement.child(0).text());
				String[] liArray = childElement.child(0).text().split(" ");//第一个li,即是第一行信息
				String[] liArray2 = childElement.child(1).text().split(" ");//第二个li,即是第二行信息
				//写入list中
				job.setJobName(liArray[0]);
				job.setDate(liArray[1]);
				job.setCompany(liArray[2]);
				
				job.setCityArea(liArray2[0]);
				job.setRecord(liArray2[1]);
				job.setMoney(liArray2[2]+"元");
				job.setNacture(liArray2[3]+liArray2[4]+liArray2[5]);
				job.setNum(liArray2[7]);
			}
			list.add(job);//存入list
		}
		return list;
		
		
	}
	
	
}
package com.sj.bd;

import java.io.IOException;
import java.util.List;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;
import org.jsoup.nodes.Document;

//测试类
public class Test {
	public static void main(String[] args) throws IOException, RowsExceededException, WriteException, InterruptedException {
		String keyWord = "java";//搜索关键字
		String city = "34%2C398";//城市
		//下载10页
		for (int i = 1; i <=10; i++) {
			JsoupHtml jph= new JsoupHtml();//实例化
			Document document = jph.getDocument(keyWord, city ,i);
			
			//写入txt文档
			/*FileUtils.writeStringToFile(new File("E:/zp/job/job.txt"), document.toString());
			System.out.println("写入成功");*/
			List<Job> list=jph.getDataInfo(document);//将获取到的信息装入list
			Thread.sleep(2000);//缓冲
			ExcelSolve esv = new ExcelSolve();//实例化
			String path = "E:/zp/job/zhyc-"+keyWord+i+".xls";
			esv.writeExcel(list,path,keyWord);//写入Excel中
			System.out.println("写入成功了,且存在的位置为:"+path);
			System.out.println("-------------------------");
		}
		
	}

	
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值