jsoup使用换一下目录就可以运行

15 篇文章 0 订阅
1 篇文章 0 订阅

import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.date.DatePattern;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.http.HttpUtil;
import com.alibaba.excel.EasyExcelFactory;
import com.alibaba.excel.annotation.ExcelProperty;
import com.alibaba.excel.write.builder.ExcelWriterBuilder;
import com.alibaba.excel.write.builder.ExcelWriterSheetBuilder;
import java.io.File;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import lombok.Data;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class bkTest {

	static String path = "目录";

	public static void main(String[] args) {
		ArrayList<String> list = new ArrayList<>();
		String city = HttpUtil.get("https://www.ke.com/city/");
		Document parse = Jsoup.parse(city);
		Elements elementsByClass = parse.getElementsByClass("CLICKDATA");
		elementsByClass.forEach(element -> {
			String text = element.text();
			Elements elements = element.select("a");
			String href = elements.attr("href");
			String area = HttpUtil.get("https:" + href + "/ershoufang/");
			Document parse1 = Jsoup.parse(area);
			Elements elementsByClass1 = parse1.getElementsByClass("total fl");
			Elements span = elementsByClass1.select("span");
			if (CollectionUtil.isNotEmpty(span)) {
				Element element1 = span.get(0);
				String text1 = element1.text();
				list.add(text + "-" + text1);
			}

		});
		FileUtil.writeLines(list,
			path + "/" + DateUtil.format(new Date(), DatePattern.PURE_DATE_PATTERN) + ".txt",
			Charset.defaultCharset());

		merge();
	}

	@Data
	static class Home {

		@ExcelProperty("日期")
		public String fileName;
		@ExcelProperty("地区")
		public String name;
		@ExcelProperty("数量")
		public Integer cnt;
	}

	public static void merge() {
		ArrayList<Home> homes = new ArrayList<>();
		List<File> files = FileUtil.loopFiles(path);
		files.forEach(file -> {
			List<String> strings = FileUtil.readLines(file, Charset.defaultCharset());
			String name = file.getName();
			strings.forEach(s -> {
				String[] split = s.split("-");
				Home home = new Home();
				home.setFileName(name.replace(".txt", ""));
				home.setName(split[0]);
				home.setCnt(Integer.valueOf(split[1]));
				homes.add(home);
			});
		});
		ExcelWriterBuilder write = EasyExcelFactory.write(path + "/二手房.xlsx", Home.class);
		ExcelWriterSheetBuilder sheet = write.sheet(0);
		sheet.doWrite(homes);
	}


}

maven

    <dependency>
      <groupId>org.jsoup</groupId>
      <artifactId>jsoup</artifactId>
      <version>1.14.3</version>
    </dependency>
    <dependency>
      <groupId>com.alibaba</groupId>
      <artifactId>easyexcel</artifactId>
      <version>3.1.0</version>
    </dependency>
    <dependency>
      <groupId>cn.hutool</groupId>
      <artifactId>hutool-all</artifactId>
      <version>5.8.1</version>
    </dependency>

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值