hadoop提供的Java API主要用来对文件操作。最常见的也就是读、写、遍历。
总的来说,调用API的第一步获取FileSystem,可以用FileSystem fs = FileSystem.get()来获取。接下来如果是写(上传)文件,则用fs.create(),如果是读(下载)文件,则用fs.get()。然后再用FSDataInputStream、FSDataOutputStream 具体代码如下:
package hadoop;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public class HadoopFileSystem
{
public static final String HDFS_PATH = "hdfs://master:9000";
public static void main(String[] args)
{
//readFile();
//writeFile();
listFile("/home/hadoop");
}
static void listFile(String listPath)
{
try
{
FileSystem fs = FileSystem.get(new URI(HDFS_PATH), new Configuration());
FileStatus[] files = fs.listStatus(new Path(listPath));
for (FileStatus f : files)
{
if (f.isDir())
{
System.out.println("d " + f.getPath());
listFile(f.getPath().toString());
}
else
{
System.out.println("- " + f.getPath());
}
}
}
catch (URISyntaxException e)
{
e.printStackTrace();
}
catch (IOException e)
{
e.printStackTrace();
}
}
static void writeFile()
{
try
{
String path = "/home/hadoop/data/writefile/write.txt";
FileSystem fs = FileSystem.get(new URI(HDFS_PATH), new Configuration());
if ( fs.exists(new Path(path)) )
{
System.out.println("file already exist, delete it first...");
fs.deleteOnExit(new Path(path));
}
FSDataOutputStream out = fs.create(new Path(path));
InputStream in = new FileInputStream("C:/data.txt");
IOUtils.copyBytes(in, out, 1024, true);
System.out.println("**********write finished*******");
FSDataInputStream i = fs.open(new Path(path));
IOUtils.copyBytes(i, System.out, 1024, true);
}
catch (URISyntaxException e)
{
e.printStackTrace();
}
catch (IOException e)
{
e.printStackTrace();
}
}
static void readFile()
{
try
{
String path = HDFS_PATH + "/home/hadoop/data/input/core-site.xml";
FileSystem fs = FileSystem.get(URI.create(HDFS_PATH), new Configuration());
FSDataInputStream in = fs.open(new Path(path));
IOUtils.copyBytes(in, System.out, 1024, true);
}
catch (IOException e)
{
e.printStackTrace();
}
}
}
其中writeFile、readFile、listFile分别代表了写、读、列举。一般操作格式就是这个样子吧,先记下来再说。。。