import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class Test3 {
public static void main(String[] args) throws Exception {
//所有分类
List<String> list = getFLList();
for (String s : list) {
//处理一个分类
handleFL(s);
}
}
/**
* 处理分类
* @param url
*/
private static void handleFL(String url) throws Exception {
//这个分类所有页的地址
List<String> list = getAllPageLink(url);
for (String s : list) {
//处理一个页
handlePage(s);
}
}
/**
* 处理一个页
* @param url
*/
private static void handlePage(String url) throws Exception {
//获得这一页商品的地址
List<String> list = getItemLinkList(url);
for (String s : list) {
//处理这一个商品
handleItem(s);
}
}
/**
* 处理一个商品
* @param url
*/
private static void handleItem(String url) throws Exception {
try {
String title = getTitle(url);
String id = url.substring(url.lastIndexOf("/") + 1, url.lastIndexOf("."));
double price = getPrice(id);
String desc = getDesc(id);
System.out.println("title:" + title);
System.out.println("price:" + price);
// System.out.println("desc:" + desc);
System.out.println("===================================");
}catch (Exception e){
}
}
/**
* 获取商品标题
* @param url
* @return
* @throws Exception
*/
private static String getTitle(String url) throws Exception{
return Jsoup.connect(url).get().selectFirst("div.sku-name").text();
}
/**
* 获取商品价格
* @param id
* @return
* @throws Exception
*/
private static double getPrice(String id) throws Exception{
String url = "https://p.3.cn/prices/mgets?skuIds=" + id;
String p = Jsoup.connect(url).ignoreContentType(true).execute().body();
ObjectMapper mapper = new ObjectMapper();
List<Map<String, String>> list = mapper.readValue(p, new TypeReference<List<Map<String, String>>>() {});
String price = list.get(0).get("p");
return Double.parseDouble(price);
}
/**
* 获取商品详情
* @param id
* @return
* @throws Exception
*/
private static String getDesc(String id) throws Exception{
String url = "http://d.3.cn/desc/" + id;
String s = Jsoup.connect(url).ignoreContentType(true).execute().body();
if (s == null || "".equals(s)){
return null;
}
s = s.substring(9, s.length() - 1);
ObjectMapper objectMapper = new ObjectMapper();
Map<String, String> map = objectMapper.readValue(s, Map.class);
return map.get("content");
}
/**
* 获取所有分类
* @return
* @throws Exception
*/
private static List<String> getFLList() throws Exception {
String url = "https://www.jd.com/allSort.aspx";
Elements as = Jsoup.connect(url).get().select("dl.clearfix dd a");
List<String> list = new ArrayList<>();
for (Element a : as) {
String href = "http:" + a.attr("href");
if (href.startsWith("http://list.jd.com")){
list.add(href);
}
}
return list;
}
/**
* 获取指定分类的最大页号
* @param url
* @return
*/
private static int getMaxPage(String url) throws Exception {
String s = Jsoup.connect(url).get().selectFirst("div.f-pager i").text();
return Integer.parseInt(s);
}
/**
* 获取指定分类所有页的地址
* @param url
* @throws Exception
*/
private static List<String> getAllPageLink(String url) throws Exception {
int maxPage = getMaxPage(url);
List<String> list = new ArrayList<>();
for (int i = 1;i <= maxPage; i++){
list.add(url + "&page=" + i);
}
return list;
}
/**
* 获取每页的所有商品链接
* @param url
* @return
* @throws Exception
*/
private static List<String> getItemLinkList(String url) throws Exception {
Elements as = Jsoup.connect(url).get().select("li.gl-item div.p-name a");
List<String> list = new ArrayList<>();
for (Element a : as) {
String href = "http:" + a.attr("href");
list.add(href);
}
return list;
}
}