importcom.google.common.io.Files;importcom.xxx.appstore.service.crawler.CalcMD5Service;importorg.apache.commons.lang.StringUtils;importorg.apache.commons.lang.math.RandomUtils;importorg.slf4j.Logger;importorg.slf4j.LoggerFactory;importjava.io.File;importjava.io.IOException;importjava.util.ArrayList;importjava.util.List;importjava.util.concurrent.TimeUnit;
public classCrawlerUtils {public static final String APK_DOWNLOAD_PATH = "/data/appstore/category/";private static Logger LOGGER = LoggerFactory.getLogger(CrawlerUtils.class);/*** 使用wget下载文件
*
*@paramdisplayName appName
*@paramcategory 分类
*@paramdownload_url 下载地址
*@return成功返回文件路径,失败返回null*/
public staticString downloadFileByWget(String displayName, String category, String download_url) {if (StringUtils.isBlank(displayName) || StringUtils.isBlank(category) ||StringUtils.isBlank(download_url)) {
LOGGER.info("downloadFileByWget ERROR, displayName:{}, category:{}, download_url:{}", newObject[]{displayName, category, download_url});return null;
}
String fileName= CalcMD5Service.encoder(displayName + RandomUtils.nextInt(1000));
String seed=CalcMD5Service.encoder(category);
String midPath= StringUtils.left(seed, 10);
String filePath= APK_DOWNLOAD_PATH + midPath + "/" + fileName + ".apk";
File file= newFile(filePath);try{
Files.createParentDirs(file);
}catch(IOException e) {
LOGGER.warn("IOException", e);return null;
}int retry = 2;int res = -1;int time = 1;while (retry-- > 0) {
ProcessBuilder pb= new ProcessBuilder("wget", download_url, "-t", "2", "-T", "10", "-O", filePath);
LOGGER.info("wget shell: {}", pb.command());
Process ps= null;try{
ps=pb.start();
}catch(IOException e) {
LOGGER.error("IOException", e);
}
res= doWaitFor(ps, 30 * time++);if (res != 0) {
LOGGER.warn("Wget download failed...");
}else{break;
}
}if (res != 0) {return null;
}returnfilePath;
}/***@paramps sub process
*@paramtimeout 超时时间,SECONDS
*@return正常结束返回0*/
private static int doWaitFor(Process ps, inttimeout) {int res = -1;if (ps == null) {returnres;
}
List stdoutList = new ArrayList<>();
List erroroutList = new ArrayList<>();boolean finished = false;int time = 0;
ThreadUtil stdoutUtil= newThreadUtil(ps.getInputStream(), stdoutList);
ThreadUtil erroroutUtil= newThreadUtil(ps.getErrorStream(), erroroutList);//启动线程读取缓冲区数据
stdoutUtil.start();
erroroutUtil.start();while (!finished) {
time++;if (time >=timeout) {
LOGGER.info("Process wget timeout 30s, destroyed!");
ps.destroy();break;
}try{
res=ps.exitValue();
finished= true;
}catch(IllegalThreadStateException e) {try{
TimeUnit.SECONDS.sleep(1);
}catch(InterruptedException e1) {
}
}
}returnres;
}
}