网页地址:https://huodong.taobao.com/wow/tbhome/act/market-list
注:红框中是需要扒取的数据
效果
具体步骤
1、选中文字粘贴到excel
2、引入jar(poi处理excel)
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.17</version>
</dependency>
3、核心代码
package com.shiyu.test;
import com.alibaba.fastjson.JSON;
import lombok.Data;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
/**
* @author shiyu
* @since 2020/12/14
*/
public class MyTest {
public static void main(String[] args) {
try {
step2(step1());
} catch(Exception e) {
e.printStackTrace();
}
}
public static List<String> step1() throws Exception {
List<String> list = new ArrayList<>();
File file = new File("resources/tb.xlsx");
InputStream is = new FileInputStream(file);
Workbook wb = WorkbookFactory.create(is);
Sheet sheet = wb.getSheetAt(0);
for(int i = 0; i <= sheet.getLastRowNum(); i++) {
Row row = sheet.getRow(i);
if(row == null) {
continue;
}
Cell cell = row.getCell(0);
if(cell != null && cell.getStringCellValue() != null) {
String value = cell.getStringCellValue().trim();
if(value.length() > 0) {
System.out.println(value);
list.add(value);
}
}
}
return list;
}
public static List<BO> step2(List<String> list) {
List<BO> boList = new ArrayList<>();
String title1 = ""; //1级分类
String title2 = ""; //2级分类
boolean existFirst = false;
for(int i = 0; i < list.size(); i++) {
//1级分类处理
if(!existFirst && list.get(i).length() < 8){
title1 = list.get(i).trim();
existFirst = true; //已有1级分类
continue;
}
//2级分类处理
if(existFirst && list.get(i).length() < 8){
title2 = list.get(i).trim();
continue;
}
//3级分类处理
if(list.get(i).length() > 8){
String[] arr = list.get(i).split(" ");
for(String s : arr) {
BO bo = new BO();
bo.setA(title1);
bo.setB(title2);
bo.setC(s);
boList.add(bo);
}
}
//进入下一级
if(list.get(i).length() > 8 && i < list.size()-2 && list.get(i+1).length() < 8&& list.get(i+2).length() < 8){
existFirst = false;
}
}
System.out.println(JSON.toJSONString(boList));
return boList;
}
@Data
public static class BO {
private String a;
private String b;
private String c;
}
}
4、将step2方法得到的list存入数据库
5、拷贝数据到excel
6、处理excel(WPS-2.6.1)
6.1 对B列分类汇总:选中B列,点击“数据”-“分类汇总”-“确定”
得到
6.2 对B列定位后选择空值进行合并(mac版本“定位”快捷键为“Command+G”,windows版本为“Ctrl+G”)
弹出,选择合并相同单元格,确定后关闭“定位”窗口
得到
6.3 删除C列分类汇总数据
得到
6.4 选中B列,用格式刷将C列格式化
得到
6.5 删除B列
得到
6.6 B列分类汇总完成
6.7 A列同理,操作得到