pom.xml
<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.16.1</version>
</dependency>
</dependencies>
ChinaBrand.java
package com.jm.bean;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;
import java.util.List;
@Data
@Accessors(chain = true)
@NoArgsConstructor
@AllArgsConstructor
public class ChinaBrand {
private String logo;
private String name;
private String company;
private String area;
private String date;
private String industry;
private List<String> images;
private String info;
}
JsoupServiceImpl.java
package com.jm.service.impl.jsoup;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import com.jm.bean.ChinaBrand;
import com.jm.service.i.jsoup.JsoupService;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
@Slf4j
@Service
public class JsoupServiceImpl implements JsoupService {
public static void main(String[] args) {
JsoupService jsoupService = new JsoupServiceImpl();
jsoupService.brand();
}
@Override
public Boolean brand() {
String target = "https://www.chinapp.com/pinpai/3.html";
try {
Document doc = Jsoup.connect(target)
.ignoreContentType(true)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")
.timeout(300000)
.header("referer", "www.chinapp.com")
.get();
Elements elements = doc.select(".brandleft img");
ChinaBrand brand = new ChinaBrand();
this.setLogAndName(brand, doc, ".brandleft img");
this.setCompany(brand, doc, ".company_name_center p");
this.setAreaAndDateAndIndustry(brand, doc, ".brandCon");
this.setImages(brand, doc, "#paracontent img");
this.setInfo(brand, doc, "#paracontent p");
System.out.println(JSON.toJSONString(brand));
} catch (Exception e) {
e.printStackTrace();
}
return Boolean.FALSE;
}
private void setInfo(ChinaBrand brand, Document doc, String tag) {
Elements elements = doc.select(tag);
StringBuilder sb = new StringBuilder();
for (Element e : elements) {
sb.append(e.toString());
}
brand.setInfo(sb.toString());
}
private void setImages(ChinaBrand brand, Document doc, String tag) {
List<String> images = new ArrayList<>(10);
Elements elements = doc.select(tag);
for (Element e : elements) {
String image = e.attr("src");
images.add(image);
}
brand.setImages(images);
}
private void setAreaAndDateAndIndustry(ChinaBrand brand, Document doc, String tag) {
JSONObject json = new JSONObject();
Elements elements = doc.select(tag);
for (Element e : elements) {
Element class0 = e.getElementsByClass("jiucuo").get(0);
String area = class0.child(0).text();
brand.setArea(area);
Element class1 = e.getElementsByClass("jiucuo").get(1);
String date = class1.child(0).text();
brand.setDate(date);
Element class2 = e.getElementsByClass("jiucuo").get(2);
String industry = class2.child(0).text();
brand.setIndustry(industry);
}
}
private void setCompany(ChinaBrand brand, Document doc, String tag) {
JSONObject json = new JSONObject();
Elements elements = doc.select(tag);
for (Element e : elements) {
String company = e.text();
brand.setCompany(company);
}
}
public void setLogAndName(ChinaBrand brand, Document doc, String tag) {
JSONObject json = new JSONObject();
Elements elements = doc.select(tag);
for (Element e : elements) {
String log = e.attr("src");
String name = e.attr("alt");
brand.setLogo(log);
brand.setName(name);
}
}
}