将所有文件夹内的.txt文件合并,并上传到hdfs中
合并后如下:
package files;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.IOUtils;
import test.CopyManyFilesToHDFS.RegexAcceptPathFilter;
public class MergeSmallFilesToHDFS {
private static FileSystem fs = null;
private static FileSystem local = null;
public static class RegexExcludePathFilter implements PathFilter {
private final String regex;
public RegexExcludePathFilter(String regex) {
this.regex = regex;
}
@Override
public boolean accept(Path path) {
boolean flag = path.toString().matches(regex);
//过滤 regex 格式的文件,只需 r