java爬虫(jsoup)爬取斗图吧网站表情包
import java.util.*;
import org.jsoup.*;
import org.jsoup.nodes.*;
import java.io.*;
import org.jsoup.select.*;
public class EmotionPackage{
public static void main(String[] args)throws Exception{
downLoad(1);
}
static void downLoad(int page)throws Exception{
Document doc=Jsoup.connect("https://m.doutub.com/img_lists/miao/" + page + "").get();
Elements Elements= doc.select("div.expression-list.clearfix a img");
int i=1;
for (Element element : Elements){
String imgUrl=element.attr("src");
String end= imgUrl.substring(imgUrl.length() - 4, imgUrl.length());
String name=element.attr("alt");
System.out.println(name);
byte[] bytes= Jsoup.connect(imgUrl).ignoreContentType(true).execute().bodyAsBytes();
File file=new File("/storage/emulated/0/0-0表情包/猫/第" + page + "页");
if (!file.exists())
file.mkdirs();
FileOutputStream fos=new FileOutputStream("" + file + "/" + i + "-" + name + "" + end + "");
fos.write(bytes);
i++;
}
System.out.println("第" + page + "页下载完成");
downLoad(page + 1);
}
}