和别人聊天表情包太少怎么办,一直这样
今天就用java爬点表情包
上代码
我是用的springboot项目 引入jar包方便
pom文件
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.3</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.58</version>
</dependency>
<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId>
<version>3.1</version>
</dependency>
主方法
public static void main(String[] args) throws Exception {
createFolder("D:\\wangyou_meme\\hotmeme");
for (int i = 0; i < 200; i++) {
int finalI = i;
String path = "https://www.fabiaoqing.com/biaoqing/lists/page/"+ finalI +".html";
String serchPersion = getSerchPersion(path);
Document parse = Jsoup.parse(serchPersion);
Element imghover = parse.getElementsByClass("imghover").first();
Elements tagbqppdiv = imghover.getElementsByClass("tagbqppdiv");
for (int j = 0; j < tagbqppdiv.size(); j++) {
Element row = tagbqppdiv.get(j);
String imgUrl = row.getElementsByTag("img").first().attr("data-original");
String suffix = imgUrl.substring(imgUrl.lastIndexOf("."), imgUrl.length());
String filename = row.getElementsByTag("a").first().attr("title");
String regEx="[.<>/??:*/]";
Pattern p = Pattern.compile(regEx);
Matcher m = p.matcher(filename);
filename = m.replaceAll("").trim();
try {
download(imgUrl,"hotmeme\\"+filename + suffix);
} catch (Exception e) {
System.out.println("保存失败");
}
}
}
}
httpget方法
public static String getSerchPersion(String url) {
HttpClient httpClient = new HttpClient();
httpClient.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, "UTF8");
//设置 HttpClient 接收 Cookie,用与浏览器一样的策略
httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
//让服务器知道访问源为浏览器
httpClient.getParams().setParameter(HttpMethodParams.USER_AGENT, "Mozilla/5.0 (Windows NT 6.1; rv:8.0.1) Gecko/20100101 Firefox/8.0.1");
// 设置 Http 连接超时为5秒
httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(5000);
/* 2 生成 GetMethod 对象并设置参数 */
GetMethod getMethod = new GetMethod(url);
// 设置 get 请求超时为 5 秒
getMethod.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, 5000);
// 设置请求重试处理,用的是默认的重试处理:请求三次
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());
String response = "";
try {
int statusCode = httpClient.executeMethod(getMethod);
byte[] responseBody = getMethod.getResponseBody();// 读取为字节数组
response = new String(responseBody);
} catch (HttpException e) {
System.out.println("请检查输入的URL!");
} catch (IOException e) {
System.out.println("发生网络异常!");
} finally {
/* 6 .释放连接 */
getMethod.releaseConnection();
}
return response;
}
下载图片方法,表情包保存在D:\\wangyou_meme下
public static void download(String urlString, String filenames) throws Exception {
URL url = new URL(urlString);
URLConnection con = url.openConnection();
InputStream is = con.getInputStream();
byte[] bs = new byte[1024];
int len;
String filename = "D:\\wangyou_meme\\" + filenames; //下载路径及下载图片名称
File file = new File(filename);
if(file.exists()){
return;
}
FileOutputStream os = new FileOutputStream(file, true);
while ((len = is.read(bs)) != -1) {
os.write(bs, 0, len);
}
// 完毕,关闭所有链接
os.close();
is.close();
}
public static void createFolder(String folder){
File file = new File(folder);
if(!file.exists()){//如果文件夹不存在
file.mkdirs();//创建文件夹
}
}
以上就是全部代码,运行main方法发现下载的好慢,那就改造一下加入线程
下边是改造后的main方法
public static void main(String[] args) throws Exception {
createFolder("D:\\wangyou_meme\\hotmeme");
//这里线程池不要设置太大,要不请求时会丢
ExecutorService exec = Executors.newFixedThreadPool(5);
for (int i = 0; i < 200; i++) {
int finalI = i;
exec.execute(()->{
String path = "https://www.fabiaoqing.com/biaoqing/lists/page/"+ finalI +".html";
String serchPersion = getSerchPersion(path);
Document parse = Jsoup.parse(serchPersion);
Element imghover = parse.getElementsByClass("imghover").first();
Elements tagbqppdiv = imghover.getElementsByClass("tagbqppdiv");
for (int j = 0; j < tagbqppdiv.size(); j++) {
Element row = tagbqppdiv.get(j);
String imgUrl = row.getElementsByTag("img").first().attr("data-original");
String suffix = imgUrl.substring(imgUrl.lastIndexOf("."), imgUrl.length());
String filename = row.getElementsByTag("a").first().attr("title");
String regEx="[.<>/??:*/]";
Pattern p = Pattern.compile(regEx);
Matcher m = p.matcher(filename);
filename = m.replaceAll("").trim();
try {
download(imgUrl,"hotmeme\\"+filename + suffix);
} catch (Exception e) {
System.out.println("保存失败");
}
}
});
}
exec.shutdown();
}
速度快了几倍
嗯~真香
最后