使用URL的方式读取一个文件内容,需要设置一个handler工厂,这个工厂只能设置一次
使用hadoop的FileSystem读取文件
将一个本地文件拷贝到hadoop文件系统中
列出文件属性
通过路径过滤器查找文件
删除,支持递归删除
重命名
检查文件是否存在
查找某个文件在HDFS中的位置
获取HDFS集群上所有节点的名称
创建本地和远端的checksum
压缩和解压缩,压缩池
- static {
- URL.setURLStreamHandlerFactory( new FsUrlStreamHandlerFactory() );
- }
- public void test1() throws IOException {
- URL u = new URL("hdfs://IP:8020/test");
- InputStream is = u.openStream();
- BufferedReader br = new BufferedReader(new InputStreamReader(is));
- String line = null;
- while( (line=br.readLine()) != null ) {
- System.out.println(line);
- }
- br.close();
- }
使用hadoop的FileSystem读取文件
- public void test2() throws IOException {
- String url = "hdfs://IP:8020/test-data/hello.txt";
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(url),config);
- InputStream is = null;
- is = fs.open(new Path(url));
- IOUtils.copyBytes(is, System.out, 4096, false);
- }
将一个本地文件拷贝到hadoop文件系统中
- public void test3() throws IOException {
- String src = "C:\\test.txt";
- String dest = "hdfs://IP:8020/test-data/hello.txt";
- InputStream is = new BufferedInputStream(new FileInputStream(src));
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(dest), config);
- OutputStream os = fs.create(new Path(dest), new Progressable() {
- @Override
- public void progress() {
- System.out.print(".");
- }
- });
- IOUtils.copyBytes(is, os, 4096, true);
- System.out.println("ok~");
- }
列出文件属性
- public void test4() throws IOException {
- String url = "hdfs:/IP:8020/test-data/hello.txt";
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(url),config);
- FileStatus status = fs.getFileStatus(new Path(url));
- System.out.println("AccessTime : "+status.getAccessTime());
- System.out.println("BlockSize : "+status.getBlockSize());
- System.out.println("group : "+status.getGroup());
- System.out.println("len : "+status.getLen());
- System.out.println("ModificationTime : "+status.getModificationTime());
- System.out.println("owner : "+status.getOwner());
- System.out.println("is dir ? : "+status.isDir());
- System.out.println("path : "+status.getPath());
- System.out.println("permission : "+status.getPermission());
- System.out.println("replication : "+status.getReplication());
- }
通过路径过滤器查找文件
- public void test5() throws IOException {
- String url = "hdfs://IP:8020/test-data/*";
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(url),config);
- FileStatus[] status = fs.globStatus( new Path(url), new RegexPathFilter("^.*hello.*$") );
- for(FileStatus s:status) {
- System.out.println(s.getPath().toString());
- }
- System.out.println("filter execute ok");
- }
- //路径正则过滤器类
- public class RegexPathFilter implements PathFilter {
- private final String regex;
- public RegexPathFilter(String regex) {
- this.regex = regex;
- }
- @Override
- public boolean accept(Path path) {
- return path.toString().matches(regex);
- }
- }
删除,支持递归删除
- public void delete() throws IOException {
- String url = "hdfs://IP:8020/test-data/xxx.txt";
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(url),config);
- fs.delete(new Path(url), false);
- System.out.println("delete ok");
- }
重命名
- public void test5_rename() throws IOException {
- String url = "hdfs://IP:8020/test-data/xx.txt";
- String url2 = "hdfs://IP:8020/test-data/modify-xx.txt";
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(url),config);
- boolean isok = fs.rename(new Path(url), new Path(url2));
- System.out.println("complete : "+isok);
- }
检查文件是否存在
- public void exist() throws IOException {
- String url = "hdfs:/IP:8020/test-data/modify-xx.txt";
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(url),config);
- boolean isExist = fs.exists( new Path(url));
- System.out.println("exist ? "+isExist);
- }
查找某个文件在HDFS中的位置
- public void test5_location() throws IOException {
- String url = "hdfs://IP:8020/test-data/hello.txt";
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(url),config);
- FileStatus status = fs.getFileStatus(new Path(url));
- BlockLocation[] bls = fs.getFileBlockLocations(status, 0, status.getLen());
- for(int i=0;i<bls.length;i++) {
- String[] hosts = bls[i].getHosts();
- System.out.println("block :"+i+"\tlocation : "+hosts[i]);
- }
- }
获取HDFS集群上所有节点的名称
- public void test5_allnode() throws IOException {
- String url2 = "hdfs://IP:8020/test-data/modify-xx.txt";
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(url2),config);
- DistributedFileSystem hdfs = (DistributedFileSystem)fs;
- DatanodeInfo[] status = hdfs.getDataNodeStats();
- for(DatanodeInfo d:status) {
- System.out.println(d.getHostName());
- }
创建本地和远端的checksum
- public void localCreateChecksum() throws IOException {
- String url = "file:///C:/zzzz/abc.txt";
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(url),config);
- ChecksumFileSystem cfs = new LocalFileSystem(fs);
- FSDataOutputStream fsdos = cfs.create(new Path(url));
- fsdos.write("hehe".getBytes());
- fsdos.flush();
- fsdos.close();
- }
- public void distributeCreateChecksum() throws IOException {
- String url = "hdfs://IP:8020/test/abc.txt";
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(url),config);
- ChecksumFileSystem cfs = new LocalFileSystem(fs);
- FSDataOutputStream fsdos = cfs.create(new Path(url));
- fsdos.write("hehe~".getBytes());
- fsdos.flush();
- fsdos.close();
- }
压缩和解压缩,压缩池
- public void compress() throws IOException {
- FileInputStream fis = new FileInputStream("C:\\zzzz\\xx.txt");
- GzipCodec gc = new GzipCodec();
- String url = "hdfs://IP:8020/test/compress.txt";
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(url), config);
- OutputStream fdos = fs.create(new Path(url));
- byte[] buf = new byte[10240];
- int len = fis.read(buf);
- System.out.println("content:");
- System.out.println( new String(buf,0,len) );
- CompressionOutputStream cos = gc.createOutputStream(fdos);
- cos.write(buf,0,len);
- cos.flush();
- cos.close();
- }
- public void decompress() throws IOException {
- String url = "hdfs://IP:8020/test/compress.gz";
- Configuration config = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(url), config);
- GzipCodec gc = new GzipCodec();
- FSDataInputStream fdis = fs.open(new Path(url));
- CompressionInputStream cis = gc.createInputStream(fdis);
- IOUtils.copy(cis, System.out);
- }
- public void comprssPool() throws IOException {
- FileInputStream fis = new FileInputStream("C:\\zzzz\\abc.txt");
- GzipCodec gc = new GzipCodec();
- FileOutputStream fdos = new FileOutputStream("C:/zzzz/pool.txt");
- Compressor compressor = CodecPool.getCompressor(gc);
- CompressionOutputStream cos = gc.createOutputStream(fdos, compressor);
- IOUtils.copy(fis, cos);
- CodecPool.returnCompressor(compressor);
- }