spring boot jsoup 爬取数据

pom.xml

 <properties>
        <maven.compiler.source>17</maven.compiler.source>
        <maven.compiler.target>17</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.16.1</version>
        </dependency>
    </dependencies>

ChinaBrand.java

package com.jm.bean;

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import lombok.experimental.Accessors;

import java.util.List;

@Data
@Accessors(chain = true)
@NoArgsConstructor
@AllArgsConstructor
public class ChinaBrand {
    private String logo;
    private String name;
    private String company;
    private String area;
    private String date;
    private String industry;
    private List<String> images;
    private String info;
}

JsoupServiceImpl.java

package com.jm.service.impl.jsoup;

import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import com.jm.bean.ChinaBrand;
import com.jm.service.i.jsoup.JsoupService;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.List;

@Slf4j
@Service
public class JsoupServiceImpl implements JsoupService {

    public static void main(String[] args) {
        JsoupService jsoupService = new JsoupServiceImpl();
        jsoupService.brand();
    }

    @Override
    public Boolean brand() {
        //分类页
        //https://www.chinapp.com/brand/184
        //品牌详情页
        //https://www.chinapp.com/pinpai/3.html


        String target = "https://www.chinapp.com/pinpai/3.html";
        try {
            Document doc = Jsoup.connect(target)
                    .ignoreContentType(true)
                    .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36")
                    .timeout(300000)
                    .header("referer", "www.chinapp.com")
                    .get();
            Elements elements = doc.select(".brandleft img");

            ChinaBrand brand = new ChinaBrand();
            //jsoup 与jquery 标签选择器 一样获取标签
            this.setLogAndName(brand, doc, ".brandleft img");
            this.setCompany(brand, doc, ".company_name_center p");
            this.setAreaAndDateAndIndustry(brand, doc, ".brandCon");
            this.setImages(brand, doc, "#paracontent img");
            this.setInfo(brand, doc, "#paracontent p");

            System.out.println(JSON.toJSONString(brand));
        } catch (Exception e) {
            e.printStackTrace();
        }
        return Boolean.FALSE;
    }

    private void setInfo(ChinaBrand brand, Document doc, String tag) {
        Elements elements = doc.select(tag);
        StringBuilder sb = new StringBuilder();
        for (Element e : elements) {
            sb.append(e.toString());
        }
        brand.setInfo(sb.toString());
    }

    private void setImages(ChinaBrand brand, Document doc, String tag) {
        List<String> images = new ArrayList<>(10);
        Elements elements = doc.select(tag);
        for (Element e : elements) {
            String image = e.attr("src");
            images.add(image);
        }
        brand.setImages(images);
    }

    private void setAreaAndDateAndIndustry(ChinaBrand brand, Document doc, String tag) {
        JSONObject json = new JSONObject();
        Elements elements = doc.select(tag);
        for (Element e : elements) {
            Element class0 = e.getElementsByClass("jiucuo").get(0);
            String area = class0.child(0).text();
            brand.setArea(area);

            Element class1 = e.getElementsByClass("jiucuo").get(1);
            String date = class1.child(0).text();
            brand.setDate(date);

            Element class2 = e.getElementsByClass("jiucuo").get(2);
            String industry = class2.child(0).text();
            brand.setIndustry(industry);
        }
    }

    private void setCompany(ChinaBrand brand, Document doc, String tag) {
        JSONObject json = new JSONObject();
        Elements elements = doc.select(tag);
        for (Element e : elements) {
            String company = e.text();
            brand.setCompany(company);
        }
    }

    public void setLogAndName(ChinaBrand brand, Document doc, String tag) {
        JSONObject json = new JSONObject();
        Elements elements = doc.select(tag);
        for (Element e : elements) {
            String log = e.attr("src");
            String name = e.attr("alt");
            brand.setLogo(log);
            brand.setName(name);
        }
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

等一场春雨

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值