获取虾皮商品列表
使用Jsoup库来抓取Shopee网站上的商品信息。它首先设置了一个包含cookie和user-agent的请求头,然后构建一个URL来搜索具有特定关键字的商品。接着,它发送HTTP请求并解析返回的HTML文档。
如果返回的文档中包含错误信息,程序会输出"Error: not login"并终止执行。否则,它会将返回的JSON数据转换为Java Map对象,并从中提取商品列表。对于每个商品,它会获取商品的ID、店铺ID、基本信息(包括图片URL、名称、价格等),并将这些信息存储在一个Map中。最后,它将所有这些商品信息的Map添加到一个列表中,并打印出这个列表。
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class Main {
public static void main(String[] args) {
Map<String, String> header = new HashMap<>();
header.put("cookie", "REC_T_ID=2d9c51dd-52d1-11ef-b77d-72066b72d660; SPC_F=dA6J0HhS43jnOFd8yT37iTeOptrmM40D; _gcl_au=1.1.198480896.1722824385; _med=refer; _QPWSDCXHZQA=dedd3c1c-d18a-401a-d928-4bb2eef96b2c; REC7iLP4Q=84dd6912-42be-4b34-9db5-fc9da2e1c344; _fbp=fb.2.1722824385629.4146891893725022; SPC_CLIENTID=ZEE2SjBIaFM0M2puujvghsknmckjpmsq; _ga_SW6D8G0HXK=deleted; _ga=GA1.1.405897120.1722824414; __LOCALE__null=ID; csrftoken=1949xq2BdSaJ7DKayanWMaQIgsITsOsU; _sapid=b4857b4875b3cb2eb1d8b74a0fa368b530dd396039b8588a9e9642ff; SPC_SEC_SI=v1-azRkZGE4N0p1N3VacjA1Q6u9LTUH6TxEJANIs6gPK0tkIW5PtjATRS85fH1BW4E2iYXFDf5mc03tHMHDnW8HMrHWDHDg/XSDKCjN28XbBG8KbQK=; SPC_SI=9COjZgAAAABtbU1QSGF1bdRprQcAAAAAb29wampwY0cA; SPC_CDS_CHAT=8e5fe705-7e40-4a0b-86b9-fd8a3a430d75; ACCEPT_LANGUAGE=en-US,en; ACCEPT_ENCODING=gzip, deflate, br; ACCEPT_CHARSET=ISO-8859-1,utf-8; VIEWPORT_WIDTH=1920; UPGRADED_INSECURE_REQUESTS=1; DNT=1; CONNECTION=keep-alive; TE=trailers");
header.put("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36");
try {
String keyword = "kabel%20data%20charger%20authentic%20polos%205%20warna%20murah";
String limit = "60";
String newest = "0";
String order = "desc";
String url = String.format("https://shopee.co.id/api/v4/search/search_items?by=relevancy&keyword=%s&limit=%s&newest=%s&order=%s&page_type=search&scene=PAGE_GLOBAL_SEARCH&version=2&view_session_id=%s", keyword, limit, newest, order, UUID.randomUUID());
Document doc = Jsoup.connect(url).headers(header).get();
if (!doc.select("error").isEmpty()) {
System.out.println("Error: not login");
return;
}
String json = doc.text();
Map<String, Object> data = new Gson().fromJson(json, Map.class);
List<Map<String, Object>> items = (List<Map<String, Object>>) data.get("items");
if (items == null || items.isEmpty()) {
System.out.println("No items found");
return;
}
List<Map<String, String>> result = new ArrayList<>();
for (Map<String, Object> item : items) {
String itemId = (String) item.get("itemid");
String shopId = (String) item.get("shopid");
Map<String, Object> itemBasic = (Map<String, Object>) item.get("item_basic");
if (itemBasic == null) {
continue;
}
Map<String, String> dData = new HashMap<>();
dData.put("itemurl", "https://shopee.co.id/product/" + shopId + "/" + itemId);
dData.put("itemid", itemId);
dData.put("shopid", shopId);
dData.put("image", "https://down-id.img.susercontent.com/file/" + itemBasic.get("image"));
dData.put("name", (String) itemBasic.get("name"));
dData.put("price", String.valueOf(itemBasic.get("price")));
dData.put("price_max", String.valueOf(itemBasic.get("price_max")));
dData.put("price_min", String.valueOf(itemBasic.get("price_min")));
result.add(dData);
}
System.out.println(result);
} catch (IOException e) {
e.printStackTrace();
}
}
}