import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.date.DatePattern;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.io.FileUtil;
import cn.hutool.http.HttpUtil;
import com.alibaba.excel.EasyExcelFactory;
import com.alibaba.excel.annotation.ExcelProperty;
import com.alibaba.excel.write.builder.ExcelWriterBuilder;
import com.alibaba.excel.write.builder.ExcelWriterSheetBuilder;
import java.io.File;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import lombok.Data;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class bkTest {
static String path = "目录";
public static void main(String[] args) {
ArrayList<String> list = new ArrayList<>();
String city = HttpUtil.get("https://www.ke.com/city/");
Document parse = Jsoup.parse(city);
Elements elementsByClass = parse.getElementsByClass("CLICKDATA");
elementsByClass.forEach(element -> {
String text = element.text();
Elements elements = element.select("a");
String href = elements.attr("href");
String area = HttpUtil.get("https:" + href + "/ershoufang/");
Document parse1 = Jsoup.parse(area);
Elements elementsByClass1 = parse1.getElementsByClass("total fl");
Elements span = elementsByClass1.select("span");
if (CollectionUtil.isNotEmpty(span)) {
Element element1 = span.get(0);
String text1 = element1.text();
list.add(text + "-" + text1);
}
});
FileUtil.writeLines(list,
path + "/" + DateUtil.format(new Date(), DatePattern.PURE_DATE_PATTERN) + ".txt",
Charset.defaultCharset());
merge();
}
@Data
static class Home {
@ExcelProperty("日期")
public String fileName;
@ExcelProperty("地区")
public String name;
@ExcelProperty("数量")
public Integer cnt;
}
public static void merge() {
ArrayList<Home> homes = new ArrayList<>();
List<File> files = FileUtil.loopFiles(path);
files.forEach(file -> {
List<String> strings = FileUtil.readLines(file, Charset.defaultCharset());
String name = file.getName();
strings.forEach(s -> {
String[] split = s.split("-");
Home home = new Home();
home.setFileName(name.replace(".txt", ""));
home.setName(split[0]);
home.setCnt(Integer.valueOf(split[1]));
homes.add(home);
});
});
ExcelWriterBuilder write = EasyExcelFactory.write(path + "/二手房.xlsx", Home.class);
ExcelWriterSheetBuilder sheet = write.sheet(0);
sheet.doWrite(homes);
}
}
maven
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.3</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.1.0</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.1</version>
</dependency>