准备
我们打开一个b站首页,选择番剧索引。
打开开发者,f12,查看b站的api,当然也可以选择使用dom元素获取,两者之间还是有不同之处。
如果调用b站的api,使用b站的JSON数据,需要相应的entity,比较花时间。
而使用dom元素,需要去人工解析网站的class、id,各种选择器,两者之间都有一些局限性。
本人练习的是,JSON格式,dom就留给你们了。反正都是 重复劳动,没意思。
看下面这张图
这个地址就是B站返回的响应数据,好在没有加密。
在使用JSON在线网站格式化
当当当!!!
所有的格式完整的出来了,现在需要我们为这些JSON创建不同的bean。
package com.hikktn.pojo;
import com.alibaba.fastjson.annotation.JSONField;
import java.io.Serializable;
/**
* @ClassName Response
* @Description TODO
* @Author lisonglin
* @Date 2021/5/4 15:48
* @Version 1.0
*/
public class Response implements Serializable {
@JSONField(name="code")
private int code;
@JSONField(name = "data")
private Animation data;
@JSONField(name = "message")
private String message;
public Response() {
}
public int getCode() {
return code;
}
public void setCode(int code) {
this.code = code;
}
public Animation getData() {
return data;
}
public void setData(Animation data) {
this.data = data;
}
public String getMessage() {
return message;
}
public void setMessage(String message) {
this.message = message;
}
public Response(int code, Animation data, String message) {
this.code = code;
this.data = data;
this.message = message;
}
@Override
public String toString() {
return "Response{" + "code=" + code + ", data=" + data + ", message='" + message + '\'' + '}';
}
}
package com.hikktn.pojo;
import com.alibaba.fastjson.annotation.JSONField;
import java.io.Serializable;
import java.util.List;
/**
* @ClassName Animation
* @Description TODO
* @Author lisonglin
* @Date 2021/5/4 15:51
* @Version 1.0
*/
public class Animation implements Serializable {
@JSONField(name = "has_next")
private int hasNext;
@JSONField(name = "list")
private List<AnimationDetailedInfo> animationDetailedInfos;
@JSONField(name = "num")
private Integer num;
@JSONField(name = "size")
private Integer size;
@JSONField(name = "total")
private Long total;
public Animation() {
}
public int getHasNext() {
return hasNext;
}
public void setHasNext(int hasNext) {
this.hasNext = hasNext;
}
public List<AnimationDetailedInfo> getAnimationDetailedInfos() {
return animationDetailedInfos;
}
public void setAnimationDetailedInfos(List<AnimationDetailedInfo> animationDetailedInfos) {
this.animationDetailedInfos = animationDetailedInfos;
}
public Animation(int hasNext, List<AnimationDetailedInfo> animationDetailedInfos) {
this.hasNext = hasNext;
this.animationDetailedInfos = animationDetailedInfos;
}
public Integer getNum() {
return num;
}
public void setNum(Integer num) {
this.num = num;
}
public Integer getSize() {
return size;
}
public void setSize(Integer size) {
this.size = size;
}
public Long getTotal() {
return total;
}
public void setTotal(Long total) {
this.total = total;
}
public Animation(int hasNext, List<AnimationDetailedInfo> animationDetailedInfos, Integer num, Integer size,
Long total) {
this.hasNext = hasNext;
this.animationDetailedInfos = animationDetailedInfos;
this.num = num;
this.size = size;
this.total = total;
}
}
package com.hikktn.pojo;
import com.alibaba.fastjson.annotation.JSONField;
import java.io.Serializable;
/**
* @ClassName AnimationDetailed
* @Description TODO
* @Author lisonglin
* @Date 2021/5/4 15:53
* @Version 1.0
*/
public class AnimationDetailedInfo implements Serializable {
@JSONField(name = "badge")
private String badge;
@JSONField(name = "badge_info")
private BadgeInfo badgeInfo;
@JSONField(name = "badge_type")
private int badgeType;
@JSONField(name = "cover")
private String cover;
@JSONField(name = "index_show")
private String indexShow;
@JSONField(name = "is_finish")
private int isFinish;
@JSONField(name = "link")
private String link;
@JSONField(name = "media_id")
private Long mediaId;
@JSONField(name = "order")
private String order;
@JSONField(name = "order_type")
private String orderType;
@JSONField(name = "season_id")
private Long seasonId;
@JSONField(name = "season_type")
private Integer seasonType;
@JSONField(name = "title")
private String title;
@JSONField(name = "title_icon")
private String titleIcon;
public AnimationDetailedInfo() {
}
public String getBadge() {
return badge;
}
public void setBadge(String badge) {
this.badge = badge;
}
public BadgeInfo getBadgeInfo() {
return badgeInfo;
}
public void setBadgeInfo(BadgeInfo badgeInfo) {
this.badgeInfo = badgeInfo;
}
public int getBadgeType() {
return badgeType;
}
public void setBadgeType(int badgeType) {
this.badgeType = badgeType;
}
public String getCover() {
return cover;
}
public void setCover(String cover) {
this.cover = cover;
}
public String getIndexShow() {
return indexShow;
}
public void setIndexShow(String indexShow) {
this.indexShow = indexShow;
}
public int getIsFinish() {
return isFinish;
}
public void setIsFinish(int isFinish) {
this.isFinish = isFinish;
}
public String getLink() {
return link;
}
public void setLink(String link) {
this.link = link;
}
public Long getMediaId() {
return mediaId;
}
public void setMediaId(Long mediaId) {
this.mediaId = mediaId;
}
public String getOrder() {
return order;
}
public void setOrder(String order) {
this.order = order;
}
public String getOrderType() {
return orderType;
}
public void setOrderType(String orderType) {
this.orderType = orderType;
}
public Long getSeasonId() {
return seasonId;
}
public void setSeasonId(Long seasonId) {
this.seasonId = seasonId;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getTitleIcon() {
return titleIcon;
}
public void setTitleIcon(String titleIcon) {
this.titleIcon = titleIcon;
}
public Integer getSeasonType() {
return seasonType;
}
public void setSeasonType(Integer seasonType) {
this.seasonType = seasonType;
}
public AnimationDetailedInfo(String badge, BadgeInfo badgeInfo, int badgeType, String cover, String indexShow,
int isFinish, String link, Long mediaId, String order, String orderType,
Long seasonId, Integer seasonType, String title, String titleIcon) {
this.badge = badge;
this.badgeInfo = badgeInfo;
this.badgeType = badgeType;
this.cover = cover;
this.indexShow = indexShow;
this.isFinish = isFinish;
this.link = link;
this.mediaId = mediaId;
this.order = order;
this.orderType = orderType;
this.seasonId = seasonId;
this.seasonType = seasonType;
this.title = title;
this.titleIcon = titleIcon;
}
}
package com.hikktn.pojo;
import com.alibaba.fastjson.annotation.JSONField;
import java.io.Serializable;
/**
* @ClassName badgeInfo
* @Description TODO
* @Author lisonglin
* @Date 2021/5/4 15:55
* @Version 1.0
*/
public class BadgeInfo implements Serializable {
@JSONField(name = "bg_color")
private String bgColor;
@JSONField(name = "bg_color_night")
private String bgColorNight;
@JSONField(name = "text")
private String text;
public BadgeInfo() {
}
public String getBgColor() {
return bgColor;
}
public void setBgColor(String bgColor) {
this.bgColor = bgColor;
}
public String getBgColorNight() {
return bgColorNight;
}
public void setBgColorNight(String bgColorNight) {
this.bgColorNight = bgColorNight;
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
public BadgeInfo(String bgColor, String bgColorNight, String text) {
this.bgColor = bgColor;
this.bgColorNight = bgColorNight;
this.text = text;
}
}
在来分析一下api地址
即便我们不清楚其他参数代表什么意思,但是没关系,只要看到有变化的地方,我们就能确定他是干什么的,而page数其实就是我们在网页上点击第几页的意思。
如果你想要获取超多番剧信息,可以用for循环,我就不用了。
正式开始
mysql
/*
Navicat MySQL Data Transfer
Source Server : localhost
Source Server Version : 50723
Source Host : localhost:3306
Source Database : test
Target Server Type : MYSQL
Target Server Version : 50723
File Encoding : 65001
Date: 2021-05-04 18:06:55
*/
SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
-- Table structure for bilili_item
-- ----------------------------
DROP TABLE IF EXISTS `bilili_item`;
CREATE TABLE `bilili_item` (
`id` bigint(10) NOT NULL AUTO_INCREMENT COMMENT '主键id',
`image_url` varchar(255) DEFAULT NULL COMMENT '图片地址',
`title` varchar(100) DEFAULT NULL COMMENT '标题',
`page_views` varchar(50) DEFAULT NULL COMMENT '浏览量',
`type` varchar(10) DEFAULT NULL COMMENT '类型',
`episode` varchar(20) DEFAULT NULL COMMENT '集数',
`url` varchar(200) DEFAULT NULL COMMENT '番剧详细地址',
`created_time` datetime DEFAULT NULL COMMENT '创建时间',
`updated_time` datetime DEFAULT NULL COMMENT '修改时间',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=41 DEFAULT CHARSET=utf8;
pom
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.2.5.RELEASE</version>
</parent>
<dependencies>
<!--SpringMVC-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!--SpringData Jpa-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
<!--MySQL连接包-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
</dependency>
<!--WebMagic-->
<dependency>
<groupId>us.codecraft</groupId>
<artifactId>webmagic-core</artifactId>
<version>0.7.4</version>
</dependency>
<dependency>
<groupId>us.codecraft</groupId>
<artifactId>webmagic-extension</artifactId>
<version>0.7.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-log4j12 -->
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.24</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
<!-- 这个版本最好高版本,否则和webmagic-core不相匹配,报错Javaversion.11找不到 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.75</version>
</dependency>
</dependencies>
log4j.properties
log4j.rootLogger=DEBUG,A1
log4j.logger.com.hikktn = DEBUG
log4j.appender.A1=org.apache.log4j.ConsoleAppender
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-d{yyyy-MM-dd HH:mm:ss,SSS} [%t] [%c]-[%p] %m%n
application.properties
url最好跟着我写,否则插入mysql中文乱码,以及启动spring服务报错。
#DB Configuration:
spring.datasource.driverClassName=com.mysql.jdbc.Driver
spring.datasource.url=jdbc:mysql://127.0.0.1:3306/test?serverTimezone=UTC&useUnicode=true&characterEncoding=utf8&useSSL=false
spring.datasource.username=root
spring.datasource.password=123
#JPA Configuration:
spring.jpa.database=MySQL
spring.jpa.show-sql=true
Application.java
package com.hikktn;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.scheduling.annotation.EnableScheduling;
/**
* @ClassName Application
* @Description TODO
* @Author lisonglin
* @Date 2021/5/3 21:44
* @Version 1.0
*/
@SpringBootApplication
// 开启定时任务
@EnableScheduling
public class Application {
public static void main(String[] args){
SpringApplication.run(Application.class,args);
}
}
什么你还不会springdatajpa?
那你还是先学完这篇springdatajpa入门再说
https://blog.csdn.net/qq_41520636/article/details/115472103
本案例使用springdatajpa,因为轻便,简单,轻松。
对了,还有如何自动生成bean,过几天又忘了,还是写了操作流程的博客,方便查阅。
https://blog.csdn.net/qq_41520636/article/details/116380107
接下来,就是垃圾时间
package com.hikktn.pojo;
import com.fasterxml.jackson.annotation.JsonFormat;
import org.springframework.data.annotation.CreatedDate;
import org.springframework.data.annotation.LastModifiedDate;
import org.springframework.format.annotation.DateTimeFormat;
import javax.persistence.*;
import java.util.Date;
/**
* @ClassName BililiItemEntity
* @Description TODO
* @Author lisonglin
* @Date 2021/5/4 15:02
* @Version 1.0
*/
@Entity
@Table(name = "bilili_item", schema = "test")
public class BililiItemEntity {
// 主键id
@GeneratedValue(strategy = GenerationType.IDENTITY)
private long id;
// 图片地址
private String imageUrl;
// 标题
private String title;
// 浏览量
private String pageViews;
// 类型
private String type;
// 集数
private String episode;
// 番剧详细地址
private String url;
// 创建时间
@JsonFormat(shape = JsonFormat.Shape.STRING, pattern = "yyyy-MM-dd HH:mm:ss", timezone = "GMT+8")
@Temporal(TemporalType.DATE)
@DateTimeFormat(pattern = "yyyy-MM-dd HH:mm:ss")
@CreatedDate
private Date createdTime;
// 修改时间
@LastModifiedDate
@JsonFormat(shape = JsonFormat.Shape.STRING, pattern = "yyyy-MM-dd HH:mm:ss", timezone = "GMT+8")
@Temporal(TemporalType.DATE)
@DateTimeFormat(pattern = "yyyy-MM-dd HH:mm:ss")
private Date updatedTime;
@Id
@Column(name = "id", nullable = false)
public long getId() {
return id;
}
public void setId(long id) {
this.id = id;
}
@Basic
@Column(name = "imageUrl", nullable = true, length = 255)
public String getImageUrl() {
return imageUrl;
}
public void setImageUrl(String imageUrl) {
this.imageUrl = imageUrl;
}
@Basic
@Column(name = "title", nullable = true, length = 100)
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
@Basic
@Column(name = "page_views", nullable = true, length = 50)
public String getPageViews() {
return pageViews;
}
public void setPageViews(String pageViews) {
this.pageViews = pageViews;
}
@Basic
@Column(name = "type", nullable = true, length = 10)
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
@Basic
@Column(name = "episode", nullable = true, length = 20)
public String getEpisode() {
return episode;
}
public void setEpisode(String episode) {
this.episode = episode;
}
@Basic
@Column(name = "url", nullable = true, length = 200)
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
@Basic
@Column(name = "createdTime", nullable = true, updatable = false)
public Date getCreatedTime() {
return createdTime;
}
public void setCreatedTime(Date createdTime) {
this.createdTime = createdTime;
}
@Basic
@Column(name = "updatedTime", nullable = true)
public Date getUpdatedTime() {
return updatedTime;
}
public void setUpdatedTime(Date updatedTime) {
this.updatedTime = updatedTime;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
BililiItemEntity that = (BililiItemEntity) o;
if (id != that.id) return false;
if (imageUrl != null ? !imageUrl.equals(that.imageUrl) : that.imageUrl != null) return false;
if (title != null ? !title.equals(that.title) : that.title != null) return false;
if (pageViews != null ? !pageViews.equals(that.pageViews) : that.pageViews != null) return false;
if (type != null ? !type.equals(that.type) : that.type != null) return false;
if (episode != null ? !episode.equals(that.episode) : that.episode != null) return false;
if (url != null ? !url.equals(that.url) : that.url != null) return false;
if (createdTime != null ? !createdTime.equals(that.createdTime) : that.createdTime != null) return false;
if (updatedTime != null ? !updatedTime.equals(that.updatedTime) : that.updatedTime != null) return false;
return true;
}
@Override
public int hashCode() {
int result = (int) (id ^ (id >>> 32));
result = 31 * result + (imageUrl != null ? imageUrl.hashCode() : 0);
result = 31 * result + (title != null ? title.hashCode() : 0);
result = 31 * result + (pageViews != null ? pageViews.hashCode() : 0);
result = 31 * result + (type != null ? type.hashCode() : 0);
result = 31 * result + (episode != null ? episode.hashCode() : 0);
result = 31 * result + (url != null ? url.hashCode() : 0);
result = 31 * result + (createdTime != null ? createdTime.hashCode() : 0);
result = 31 * result + (updatedTime != null ? updatedTime.hashCode() : 0);
return result;
}
@Override
public String toString() {
return "BililiItemEntity{" + "id=" + id + ", imageUrl='" + imageUrl + '\'' + ", title='" + title + '\'' + ", " + "pageViews='" + pageViews + '\'' + ", type='" + type + '\'' + ", episode='" + episode + '\'' + ", " + "url='" + url + '\'' + ", createdTime=" + createdTime + ", updatedTime=" + updatedTime + '}';
}
}
package com.hikktn.dao;
import com.hikktn.pojo.BililiItemEntity;
import org.springframework.data.jpa.repository.JpaRepository;
/**
* @ClassName BililiItemDao
* @Description TODO
* @Author lisonglin
* @Date 2021/5/4 14:39
* @Version 1.0
*/
public interface BililiItemDao extends JpaRepository<BililiItemEntity,Long> {
}
package com.hikktn.service;
import com.hikktn.pojo.BililiItemEntity;
import java.util.List;
/**
* @ClassName BililiItemService
* @Description TODO
* @Author lisonglin
* @Date 2021/5/4 14:39
* @Version 1.0
*/
public interface BililiItemService {
//根据条件查询数据
public List<BililiItemEntity> findAll(BililiItemEntity item);
//保存数据
public void save(BililiItemEntity item);
}
package com.hikktn.service.impl;
import com.hikktn.dao.BililiItemDao;
import com.hikktn.pojo.BililiItemEntity;
import com.hikktn.service.BililiItemService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Example;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.List;
/**
* @ClassName BililiItemServiceImpl
* @Description TODO
* @Author lisonglin
* @Date 2021/5/4 14:40
* @Version 1.0
*/
@Service
public class BililiItemServiceImpl implements BililiItemService {
@Autowired
private BililiItemDao bililiItemDao;
@Override
public List<BililiItemEntity> findAll(BililiItemEntity bililiItemEntity) {
Example<BililiItemEntity> bililiItemEntityExample = Example.of(bililiItemEntity);
List<BililiItemEntity> bililiItemEntityList = this.bililiItemDao.findAll(bililiItemEntityExample);
return bililiItemEntityList;
}
@Override
@Transactional
public void save(BililiItemEntity item) {
this.bililiItemDao.save(item);
}
}
正题
package com.hikktn.task;
import com.alibaba.fastjson.JSON;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.hikktn.pojo.BililiItemEntity;
import com.hikktn.pojo.Response;
import com.hikktn.service.BililiItemService;
import com.hikktn.utils.HttpUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import java.sql.Timestamp;
import java.util.Date;
/**
* @ClassName BililiItemTask
* @Description TODO
* @Author lisonglin
* @Date 2021/5/4 14:45
* @Version 1.0
*/
@Component
public class BililiItemTask {
@Autowired
private HttpUtils httpUtils;
@Autowired
private BililiItemService bililiItemService;
private static final ObjectMapper MAPPER = new ObjectMapper();
private static final String API_URL = "https://api.bilibili.com/pgc/season/index/result?st=1&order=2" +
"&season_version=-1" + "&area=-1&is_finish=-1©right=-1&season_status=-1&season_month=-1&year=-1" +
"&style_id=-1&sort=0&page=1" + "&season_type=1&pagesize=20&type=1";
private static final String URL = "https://www.bilibili.com/anime/index/#season_version=-1&area=-1&is_finish=-1" +
"©right=-1&season_status=-1&season_month=-1&year=-1&style_id=-1&order=3&st=1&sort=0&page=1";
//设置定时任务执行完成后,再间隔100秒执行一次
@Scheduled(fixedDelay = 1000 * 100)
public void process() throws Exception {
// 获取bilibili番剧索引第一页
String html = this.httpUtils.getHtml(API_URL);
// 解析页面数据,保存相关数据到数据库中
this.parseHtmlJson(html);
}
private void parseHtmlJson(String html) {
Document doc = Jsoup.parse(html);
Element body = doc.body();
String text = body.text();
Response response = JSON.parseObject(text, Response.class);
System.out.println(response.toString());
if (!"success".equals(response.getMessage())) {
return;
}
for (int i = 0; i < response.getData().getSize(); i++) {
// 获取标题
String title = response.getData().getAnimationDetailedInfos().get(i).getTitle();
// 获取浏览量
String order = response.getData().getAnimationDetailedInfos().get(i).getOrder();
// 获取类型
String type = response.getData().getAnimationDetailedInfos().get(i).getBadge();
// 获取集数
String indexShow = response.getData().getAnimationDetailedInfos().get(i).getIndexShow();
// 获取图片地址
String cover = response.getData().getAnimationDetailedInfos().get(i).getCover();
// 获取番剧详细地址
String link = response.getData().getAnimationDetailedInfos().get(i).getLink();
BililiItemEntity bililiItemEntity = new BililiItemEntity();
bililiItemEntity.setTitle(title);
bililiItemEntity.setPageViews(order);
bililiItemEntity.setType(type);
bililiItemEntity.setEpisode(indexShow);
bililiItemEntity.setImageUrl(cover);
bililiItemEntity.setUrl(link);
bililiItemEntity.setCreatedTime(new Date());
bililiItemEntity.setUpdatedTime(bililiItemEntity.getCreatedTime());
String image = this.httpUtils.getImage(cover);
System.out.println(image);
this.bililiItemService.save(bililiItemEntity);
}
}
}
运行
你启动一下spring服务就行,定时器我自动执行。
搞定!