转自:http://blog.csdn.net/eryk86/article/details/7472346
- import java.io.IOException;
- import java.text.SimpleDateFormat;
- import java.util.Calendar;
- import java.util.HashSet;
- import java.util.Set;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FileStatus;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- /**
- *
- * @author eryk
- *
- */
- public class DeleteHDFSFile {
- private FileSystem fs;
- private Set<Path> _delete = new HashSet<Path>(); // 记录要删除的路径
- public DeleteHDFSFile(FileSystem fs) {
- this.fs = fs;
- }
- /**
- *
- * @param args
- * args[0] N天内的数据要保留
- * args[1] 要删除数据的路径 /run/output/hadoop
- * @throws Exception
- */
- public static void main(String[] args) throws Exception {
- Configuration _conf = new Configuration();
- FileSystem fs = FileSystem.get(_conf);
- DeleteHDFSFile df = new DeleteHDFSFile(fs);
- Calendar calendar = Calendar.getInstance();
- calendar.add(Calendar.DAY_OF_MONTH, Integer.parseInt("-"+args[0]));
- String date = new SimpleDateFormat("yyyyMMdd").format(calendar.getTime());
- df.fileList(args[1],Integer.parseInt(date));
- for(Path _path: df._delete){
- if(fs.exists(_path)){
- fs.delete(_path);
- System.out.println("delete:"+_path);
- }
- }
- fs.close();
- }
- public void fileList(String path,int date) throws IOException{
- FileStatus[] fileList = fs.listStatus(new Path(path));
- if(fileList == null)return;
- for(FileStatus file : fileList){
- String _path = file.getPath().toString();
- if(file.isDir()){
- if(_path.matches(".*?\\d{8}?.*")){
- String[] _pathDate = _path.split("/");
- for(String d : _pathDate){
- boolean t = (d.matches("\\d{8}"))? (Integer.parseInt(d)< date)? true : false : false;
- if(t){
- System.out.println("add delete list :"+ _path);
- _delete.add(new Path(_path));
- }
- }
- }
- fileList(file.getPath().toString(),date);
- }
- }
- }
- }