项目要求是能定时解压gz文件,并按照省将文件成一个大文件,代码实现功能:按照gz压缩包的路径格式解压文件,最后将文件合并成按照省份名称排放的大文件,项目用log4j记录日志,用Java定时器实现定时解压合并,并且删除解压出来的小文件,同时也删除解压目录下的压缩包,并将压缩包移动到已处理文件夹下(这么做是因为文件是定时下载到一个目录的,每次我们去这个目录下去处理文件,为了不重复解压合并已经处理过的文件,逻辑上我把处理过的文件都移动到一个已处理文件夹下,并且删除目录下的压缩包,这样就免去了记录跟判断文件是否是新文件的开销),下面直接上代码:
附录里面回加上工程代码,到时候各异直接下载下来
1)先看项目的配置文件(mergeFilleUtil.properties),log4j内容我就不贴了,一会儿自己下载看看就行:
#原始文件
PROVINCE_DIR=E:\\test\\rootfile
#解压后文件存放的路径
UN_ZIP_PATH=E:\\test\\unZip
#合并后文件存放的路径
OUT_PATH=E:\\test\\result
#已经处理过的文件存放路径
DONE_FILE_PATH=E:\\test\\doneFile
#任务执行结束后哪些文件路径下的需要删除
DELETE_PATH=E:\\test\\rootfile,E:\\test\\unZip
#配置任务的时间间隔,以秒为单位,一天是:24*60*60*1000
TASK_PERIOD=86400
#任务开始的小时时间
TASK_BEGIN_HOUR=9
#任务开始的分钟
TASK_BEGIN_MINUTE=26
2)主程序入口:
public static void main(String[] args) {
Map proMap = LoadProperty.readProperty();
Timer timer = new Timer();
MergeFileUtil mergeTask = new MergeFileUtil();//要执行的任务
//任务的时间间隔,1000表示1秒
long intevalPeriod = Long.valueOf(proMap.get("TASK_PERIOD").toString());
//Calendar类封装了一系列操作date时间的方法
Calendar calendar = Calendar.getInstance();
int year = calendar.get(calendar.YEAR);
int month = calendar.get(calendar.MONTH);
int day = calendar.get(calendar.DAY_OF_MONTH);
//指定每天执行任务的小时跟分钟比如19点32分
int hour = Integer.valueOf(proMap.get("TASK_BEGIN_HOUR").toString());
int minute = Integer.valueOf(proMap.get("TASK_BEGIN_MINUTE").toString());
calendar.set(year, month, day, hour, minute);
//Calendar.getTime()方法返回Date类型的返回值
Date date = calendar.getTime();
System.out.println("运行时间:" +year+"-"+month+"-"+day+"-"+hour+"-"+minute);
//指定定时任务的执行规则,调用定时任务。开始执行
timer.schedule(mergeTask, date, intevalPeriod*1000);
}
}
3)读取配置文件工具类:
public class LoadProperty {
private static Logger log = Logger.getLogger(LoadProperty.class);
private static Properties prop;
static{
ClassLoader loader = LoadProperty.class.getClassLoader();
InputStream in = loader.getResourceAsStream("mergeFilleUtil.properties");//此时仍然把资源装载近内存中了。
prop = new Properties();
try {
//utf-8编码配置文件,防止中文路径出错
prop.load(new InputStreamReader(in, "UTF-8"));
} catch (UnsupportedEncodingException e1) {
log.error("加载配置文件出错,请检查!");
} catch (IOException e1) {
log.error("加载配置文件流出错,请检查!");
} finally{
if(in!=null){
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
@SuppressWarnings("rawtypes")
public static Map readProperty(){
return prop;
}
}
4) 多线程文件解压合并工具类
public class MergeFileUtil extends TimerTask{
private static Logger log = Logger.getLogger(MergeFileUtil.class);
@Override
public void run() {
unZipFile();
}
//解压
@SuppressWarnings("rawtypes")
public void unZipFile(){
//读取配置文件中的路径
Map proMap = LoadProperty.readProperty();
String rootdir = proMap.get("PROVINCE_DIR").toString();
String unZipPath = proMap.get("UN_ZIP_PATH").toString();
String outDir = proMap.get("OUT_PATH").toString();
File rootfile=new File(rootdir);
if(!rootfile.exists()||!rootfile.isDirectory()){
log.error("文件路径不存在,请检查欲解压的文件所在目录是否填写正确!");
return;
}else{
File[] files=rootfile.listFiles();
if(files.length==0){
log.error("警告:没有要解压的文件!请检查:"+rootfile+"下是否存在需解压的文件!");
}else{
//分配解压线程池
ExecutorService pool=Executors.newFixedThreadPool(files.length);
for (File provinceDir:files){
excuteThreads(pool,provinceDir,outDir,unZipPath);
}
pool.shutdown();
while (true) {
if (pool.isTerminated()) {
//解压结束后启动合并线程池
mergerFile();
break;
}
try {
Thread.sleep(200);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
}
//解压类
private void excuteThreads(ExecutorService pool,File provinceDir,String outdir,String unZipPath){
pool.execute(
new UnZipFile(provinceDir.getAbsolutePath(),
outdir,unZipPath));
}
@SuppressWarnings("rawtypes")
private void mergerFile(){
//读取配置文件中的路径
Map proMap = LoadProperty.readProperty();
String rootdir = proMap.get("PROVINCE_DIR").toString();
String unZipPath = proMap.get("UN_ZIP_PATH").toString();
String outDir = proMap.get("OUT_PATH").toString();
String doneFilePath = proMap.get("DONE_FILE_PATH").toString();
String deletePath = proMap.get("DELETE_PATH").toString();
String []deleteArray = deletePath.split(",");
File unZipFile=new File(unZipPath);
if(!unZipFile.exists()||!unZipFile.isDirectory()){
log.error("文件路径不存在,请检查欲合并的文件所在目录是否填写正确!");
return;
}else{
File[] files=unZipFile.listFiles();
if(files.length==0){
log.error("警告:没有要合并的文件!请检查:"+unZipFile.getPath()+"下是否存在需合并的文件!");
}else{
//分配线程池
ExecutorService pool=Executors.newFixedThreadPool(files.length);
for (File provinceDir:files){
excuteMergerThreads(pool,provinceDir,outDir,unZipPath);
}
pool.shutdown();
while(true){
if(pool.isTerminated()){
//合并结束后启动移动跟删除文件程序
MoveAndDeleteFile moveAndDeleteFile = new MoveAndDeleteFile();
log.info("开始移动文件到已处理文件夹!");
moveAndDeleteFile.moveFile(new File(rootdir),new File(doneFilePath));
log.info("移动文件到已处理文件夹成功!");
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
log.info("开始删除文件!");
for(int i=0;i
File file = new File(deleteArray[i]);
if(!file.exists()){
log.error("欲删除的目录不存在,请检查目录:"+file.getPath()+"是否正确存在!");
}else{
moveAndDeleteFile.deleteFileAndDir(new File(deleteArray[i]),true);
}
}
log.info("删除文件结束!");
log.info("解压合并文件程序运行结束!");
break;
}
}
}
}
}
//文件合并类
private void excuteMergerThreads(ExecutorService pool,File file,String outDir,String unZipPath){
pool.execute(new MergeFile(file.getAbsolutePath(),
outDir,unZipPath));
}
}
5)解压代码:
public class UnZipFile implements Runnable {
private static Logger log = Logger.getLogger(UnZipFile.class);
private String outFilePath; //合并文件输出目录
private String mergeFileDir; //原始文件存放目录
private String unZipFileDir; //解压文件输出目录
private BufferedOutputStream bufferedOutputStream;
public static final int BUFSIZE = 1024 * 8;
private static int count;//计算器,记录文件解压个数
public UnZipFile(){
}
public UnZipFile(String mergeFileDir, String outFilePath,String unZipFileDir) {
this.outFilePath = outFilePath;
this.mergeFileDir = mergeFileDir;
this.unZipFileDir = unZipFileDir;
}
public void run() {
File f=new File(outFilePath);
File mergerDir=new File(mergeFileDir);
if(!f.exists()){
f.mkdirs();
return;
}
try {
log.info("开始解压路径:"+mergerDir+"下文件!");
unzipFile(mergerDir,unZipFileDir);
log.info("解压文件:"+mergerDir+"结束!");
} catch (IOException e) {
e.printStackTrace();
}
}
private void unzipFile(File file,String OutFileDir) throws IOException{
if(file.isDirectory()){
for(File children:file.listFiles()){
unzipFile(children,OutFileDir);
}
}else if(file.getAbsolutePath().endsWith(".gz")){
count++;
doUnZipFile(file, OutFileDir);
//每五千个打印一次,用作程序提示
if(count>0&&count%1000==0){
log.info("已经解压"+count+"个文件!");
}
}
}
/*
* 文件解压
* zipFile:传入解压文件
* outputDirectory:解压目的地路径
*/
public void doUnZipFile(File zipFile,String outputDirectory) {
FileInputStream fis = null;
GZIPInputStream is = null;
ArchiveInputStream in = null;
BufferedInputStream bufferedInputStream = null;
String separator = File.separator;
//确定目的地解压的目录结构begin
int lastIndex = zipFile.getPath().lastIndexOf(separator);
int index = mergeFileDir.lastIndexOf(separator);
String dirStructurePath=null;
if(lastIndex<=index){
dirStructurePath ="";
}else{
dirStructurePath = zipFile.getPath().substring(index+1, zipFile.getPath().lastIndexOf(separator));
}
//确定好目的地解压的目录结构end
String dirPath=null;
try {
fis = new FileInputStream(zipFile);
is = new GZIPInputStream(new BufferedInputStream(fis));
in = new ArchiveStreamFactory().createArchiveInputStream("tar", is);
bufferedInputStream = new BufferedInputStream(in);
TarArchiveEntry entry = (TarArchiveEntry) in.getNextEntry();
while (entry != null) {
//要解压的文件以及其压缩包内的文件:包名称/包下文件名称
String name = entry.getName();
String[] names = name.split("/");
//解压目的地目录
String dirRootPath;
if(dirStructurePath==""){
dirRootPath = outputDirectory;
}else{
dirRootPath = outputDirectory+separator+dirStructurePath;
}
for (int i = 0; i < names.length; i++)
{
String str = names[i];
dirRootPath = dirRootPath + separator + str;
}
dirPath = dirRootPath.substring(0,dirRootPath.lastIndexOf(separator));
File dirFilePath = new File(dirPath);
if(!dirFilePath.exists()){
dirFilePath.mkdirs();
}
if (name.endsWith("/")) {
mkFolder(dirRootPath);
} else {
File file = mkFile(dirRootPath);
bufferedOutputStream = new BufferedOutputStream(new FileOutputStream(file));
int b;
while ((b = bufferedInputStream.read()) != -1) {
bufferedOutputStream.write(b);
}
bufferedOutputStream.flush();
bufferedOutputStream.close();
}
entry = (TarArchiveEntry) in.getNextEntry();
}
//log.info("解压文件:"+zipFile.getName()+"到: "+dirPath+" "+"完成!");
} catch (FileNotFoundException e) {
log.info("找不到要解压的文件所在路径!");
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ArchiveException e) {
e.printStackTrace();
} finally {
try {
if (bufferedInputStream != null) {
bufferedInputStream.close();
}
if(in!=null){
in.close();
}
if(is!=null){
is.close();
}
if(fis!=null){
fis.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
private void mkFolder(String fileName) {
File f = new File(fileName);
if (!f.exists()) {
f.mkdir();
}
}
private File mkFile(String fileName) {
File f = new File(fileName);
try {
f.createNewFile();
} catch (IOException e) {
log.error("文件"+fileName+"创建失败");
e.printStackTrace();
}
return f;
}
}
6)合并代码:
public class MergeFile implements Runnable{
private static Logger log = Logger.getLogger(UnZipFile.class);
private String outFilePath; //合并文件输出目录
private String mergeFileDir; //解压文件存放目录
private static volatile int count;//计算器,记录文件解压个数
public static final int BUFSIZE = 1024 * 8;
public MergeFile(String mergeFileDir, String outFilePath,String unZipFileDir) {
this.outFilePath = outFilePath;
this.mergeFileDir = mergeFileDir;
}
@Override
public void run() {
mergerUnZipFile();
}
private void mergerUnZipFile(){
FileChannel outChannel = null;
//合并解压目录下的文件
File f = new File(mergeFileDir);
String path = f.getPath();
String outPath = outFilePath+path.substring(path.lastIndexOf(File.separator));
if(!new File(outPath).exists()){
new File(outPath).mkdirs();
}
try {
//构造合并文件路径以及文件名
File f2=new File(outPath+File.separator+"result");
if(!f2.exists()){
//若文件不存在,则新建
f2.createNewFile();
outChannel = new FileOutputStream(f2).getChannel();
}else{
//若文件存在,则在源文件后面追加新的内容
outChannel = new FileOutputStream(f2,true).getChannel();
}
} catch (IOException e) {
log.error("文件:"+outPath+File.separator+"result"+"不存在,请检查!");
e.printStackTrace();
}
log.info("开始合并路径:"+f.getPath()+"下文件");
mergeFile(f,outChannel);
log.info("路径:"+f.getPath()+"下文件合并完成");
if(outChannel!=null){
try {
outChannel.close();
} catch (IOException e) {
log.error("关闭管道流出错,请检查文件合并类下run方法的文件管道流!");
e.printStackTrace();
}
}
}
/*
* 用途:合并文件
* file:存放文件的根目录
* outFileChannel:输出文件管道流
*/
public void mergeFile(File file,FileChannel outFileChannel){
if(file.isDirectory()){
for(File children:file.listFiles()){
mergeFile(children,outFileChannel);
}
}else if(!file.getAbsolutePath().endsWith(".gz")){
mergeFiles(file,outFileChannel);
//每五千个打印一次,用作程序提示
count++;
if(count>0&&count%50000==0){
log.info("已经合并文件"+count+"个!");
}
}
}
public static void mergeFiles(File file,FileChannel outChannel) {
try {
FileChannel fc = new FileInputStream(file).getChannel();
ByteBuffer bb = ByteBuffer.allocate(BUFSIZE);
while(fc.read(bb) != -1){
bb.flip();
outChannel.write(bb);
bb.clear();
}
fc.close();
//log.info("合并文件"+file.getPath()+"完成!");
} catch (IOException ioe) {
log.error("合并文件:"+file.getPath()+"失败!");
ioe.printStackTrace();
}
}
}
7)移动跟删除文件代码:
public class MoveAndDeleteFile {
private static int count;//计数器
private Logger log = Logger.getLogger(MoveAndDeleteFile.class);
/*
* 用途:将文件从一个目录移动到另外一个目录
* rootdir:存放原文件的根目录
* rootBoo:为true时不删除根目录,只删除根目录下文件跟文件夹
*/
public void moveFile(File rootdir,File doneFilePath){
//开始移动文件
if(rootdir.isDirectory()){
for(File file:rootdir.listFiles()){
moveFile(file,doneFilePath);
}
}else{
moveTheFile(rootdir,doneFilePath);
//每五千个打印一次,用作程序提示
count++;
if(count>0&&count%5000==0){
log.info("已经移动文件"+count+"个!");
}
}
}
public void moveTheFile(File rootdir,File doneFilePath){
FileInputStream fi = null;
FileOutputStream fo = null;
FileChannel in = null;
FileChannel out = null;
try {
String currentFilePath = rootdir.getPath();
//确定好目的地解压的目录结构begin
int index = "E:\\test\\rootfile\\".lastIndexOf(File.separator);
String dirStructurePath = doneFilePath+currentFilePath.substring(index);
File dir = new File(dirStructurePath.substring(0, dirStructurePath.lastIndexOf(File.separator)));
if(!dir.exists()){
dir.mkdirs();
}
File dirFile = new File(dirStructurePath);
if(!dirFile.exists()){
dir.createNewFile();
}
fi = new FileInputStream(rootdir);
fo = new FileOutputStream(dirStructurePath);
in = fi.getChannel();//得到对应的文件通道
out = fo.getChannel();//得到对应的文件通道
in.transferTo(0, in.size(), out);//连接两个通道,并且从in通道读取,然后写入out通道
//System.out.println("移动文件:"+rootdir.getPath()+"完成!");
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
fi.close();
in.close();
fo.close();
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/*
* 先删除文件再删除文件夹
* file:存放文件的根目录
* rootBoo:为true时不删除根目录,只删除根目录下文件跟文件夹
*/
public void deleteFileAndDir(File file,boolean rootBoo){
String rootDir = null;
if(rootBoo){
rootDir = file.getPath();
}
File []files = file.listFiles();
for(int i=0;i
if(!files[i].isDirectory()){
files[i].delete();
//每五千个打印一次,用作程序提示
count++;
if(count>0&&count%50000==0){
log.info("已经删除文件"+count+"个!");
}
}else{
deleteFileAndDir(files[i],false);
}
}
//不删除存放文件的根目录
if(rootDir!=file.getPath()){
file.delete();
}
}
}
8)代码运行分析结果:
代码测试了两个省共15G的压缩包,解压后三十万个小文件,从解压、合并、移动、删除整个流程耗时共3个小时,合并后的文件大小共40G,效率上看还可以
9)注意事项
解压跟合并必须做成多线程启动的,每个省的解压跟合并都分配一个线程,代码里面用了Java线程池,在分配线程池数量前线计算一共有几个,按照省数量分配,代码里面在
上面第四点:多线程文件解压合并工具类里面有体现。
10)项目的配置文件我放在代码里面一同放在了附件,代码有不当之处欢迎指点,大家共同学习进步。用到的jar包我也在项目里面直接上传了,下载下来就能看到。
项目的百度网盘路径:http://pan.baidu.com/s/1c0vYUJI,复制直接下载即可