1、使用FileSystem以标准输出格式显示hadoop文件系统中的文件
源代码:
import org.apache.hadoop.conf.Configuration;
import java.io.InputStream;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
//import org.apache.hadoop.fs.*;
import java.net.URI;
public class FileSystemCat
{
public static void main(String[] args) throws Exception
{
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
InputStream in = null;
try {
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
}
finally {
IOUtils.closeStream(in);
}
}
}
编译:
javac -cp /home/grid/hadoop-1.2.1/hadoop-core-1.2.1.jar FileSystemCat.java
运行:
[grid@h1 myclass]$ hadoop FileSystemCat hdfs://h1:9000/user/grid/in/text2.txt
hello hadoop
2、使用seek 方法,将hadoop文件系统中的一个文件在标准输出上显示三次
源代码:
import org.apache.hadoop.conf.Configuration;
import java.io.InputStream;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
//import org.apache.hadoop.fs.*;
import java.net.URI;
import org.apache.hadoop.fs.FSDataInputStream;
public class FileSystemDoubleCat
{
public static void main(String[] args) throws Exception
{
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
FSDataInputStream in = null;
try {
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
in.seek(0);
// go back to the start of the file
IOUtils.copyBytes(in, System.out, 4096, false);
in.seek(0);
IOUtils.copyBytes(in, System.out, 4096, false);
}
finally {
IOUtils.closeStream(in);
}
}
}
编译:
javac -cp /home/grid/hadoop-1.2.1/hadoop-core-1.2.1.jar FileSystemDoubleCat.java
运行:
[grid@h1 myclass]$ hadoop FileSystemDoubleCat hdfs://h1:9000/user/grid/in/text2.txt
hello hadoop
hello hadoop
hello hadoop
3、将本地文件复制到hadoop文件系统
源代码:
import java.io.InputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.net.URI;
import java.io.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
public class FileCopyWithProgress
{
public static void main(String[] args) throws Exception
{
String localSrc = args[0];
String dst = args[1];
InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(dst), conf);
OutputStream out = fs.create(new Path(dst), new Progressable()
{
public void progress()
{
System.out.print(".");
}
});
IOUtils.copyBytes(in, out, 4096, true);
}
}
编译:
javac -cp /home/grid/hadoop-1.2.1/hadoop-core-1.2.1.jar FileCopyWithProgress.java
运行:
hadoop FileCopyWithProgress URLCat.java in/URLCat.java
[grid@h1 myclass]$ hadoop fs -ls in/
Found 5 items
-rw-r--r-- 2 grid supergroup 487 2013-08-25 06:57 /user/grid/in/URLCat.java
4、显示hadoop文件系统中一组路径的文件信息
源代码:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileUtil;
import java.net.URI;
public class ListStatus
{
public static void main(String[] args) throws Exception
{
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
Path[] paths = new Path[args.length];
for (int i = 0; i < paths.length; i++)
{
paths[i] = new Path(args[i]);
}
//paths[0] = new Path(args[0]);
FileStatus[] status = fs.listStatus(paths);
Path[] listedPaths = FileUtil.stat2Paths(status);
for (Path p : listedPaths)
{
System.out.println(p);
}
}
}
编译:
javac -cp /home/grid/hadoop-1.2.1/hadoop-core-1.2.1.jar ListStatus.java
运行:
[grid@h1 myclass]$ hadoop ListStatus hdfs://h1:9000/ hdfs://h1:9000/user/grid/in
hdfs://h1:9000/home
hdfs://h1:9000/user
hdfs://h1:9000/user/grid/in/URLCat.java
hdfs://h1:9000/user/grid/in/VERSION
hdfs://h1:9000/user/grid/in/test3.txt
hdfs://h1:9000/user/grid/in/text1.txt
hdfs://h1:9000/user/grid/in/text2.txt