理论原理
maven依赖
<!-- jsoup爬虫 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.4</version>
</dependency>
代码实现
package org.example;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
public class Main {
public static void main(String[] args) {
data();
}
//记录中断页码
static int currentPage=1;
static void data(){
int sum=1;
while (true){
try {
getDataList();
break;
} catch (IOException e) {
e.printStackTrace();
System.out.println("data: request failed "+sum);
sum++;
}
}
}
//获取steam所有页的游戏数据【436】
static void getDataList() throws IOException {
Document document = Jsoup.connect("https://store.steampowered.com/search?specials=1&ndl=1&page=" + currentPage)
.userAgent("*")
.timeout(30 * 1000)
.get();
// System.out.println("getDataList: \n"+document);
Elements as = document.select("div.search_pagination_right a");
int n= Integer.parseInt(as.get(as.size()-2).text());
System.out.println("getDataList: total page is "+n);
getData(document);
while (currentPage < n){
currentPage++;
document = Jsoup.connect("https://store.steampowered.com/search?specials=1&ndl=1&page=" + currentPage)
.userAgent("*")
.timeout(30 * 1000)
.get();
getData(document);
}
}
private static void getData(Document document) {
Elements as = document.select("a.search_result_row.ds_collapse_flag");
for (Element a : as) {
String name = a.select("span.title").text();
String img = a.select("div.col.search_capsule img").attr("src");
String time = a.select("div.search_released.responsive_secondrow").text();
String discount = a.select("div.discount_pct").text();
String original = a.select("div.discount_original_price").text();
String current = a.select("div.discount_final_price").text();
System.out.println("----------------------------------------------------------");
System.out.println("getData: name -> "+name);
System.out.println("getData: img -> "+img);
System.out.println("getData: time -> "+time);
System.out.println("getData: discount -> "+discount);
System.out.println("getData: original -> "+original);
System.out.println("getData: current -> "+current);
}
}
}
运行结果