类似于Google的GFS,Hadoop分布式文件系统(HDFS)被设计成适合运行在通用硬件(commodity hardware)上的分布式文件系统。它和现有的分布式文件系统有很多共同点。但同时,它和其他的分布式文件系统的区别也是很明显的。HDFS是一个高度容错性的系统,适合部署在廉价的机器上。HDFS能提供高吞吐量的数据访问,非常适合大规模数据集上的应用。HDFS放宽了一部分POSIX约束,来实现流式读取文件系统数据的目的。HDFS在最开始是作为Apache Nutch搜索引擎项目的基础架构而开发的。HDFS是Apache Hadoop Core项目的一部分。
1、App1.java
package hdfs;
import java.io.InputStream;
import java.net.URL;
import org.apache.hadoop.fs.FsUrlStreamHandlerFactory;
import org.apache.hadoop.io.IOUtils;
public class App1 {
/**
* 抛异常: unknown host: chaoren
* 原因:是因为本机没有解析主机名chaoren
*/
static final String PATH = "hdfs://chaoren:9000/hello";
public static void main(String[] args) throws Exception {
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
final URL url = new URL(PATH);
final InputStream in = url.openStream();
/**
* @param in 表示输入流
* @param out 表示输出流
* @param buffSize 表示缓冲大小
* @param close 表示在传输结束后是否关闭流
*/
IOUtils.copyBytes(in, System.out, 1024, true);
}
}
2、App2.java
package hdfs;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public class App2 {
static final String PATH = "hdfs://chaoren:9000/";
static final String DIR = "/d1";
static final String FILE = "/d1/hello";
public static void main(String[] args) throws Exception {
FileSystem fileSystem = getFileSystem();
//创建文件夹 hadoop fs -mkdir /f1
mkdir(fileSystem);
//上传文件 -put src des
putData(fileSystem);
//下载文件 hadoop fs -get src des
//getData(fileSystem);
//浏览文件夹
list(fileSystem);
//删除文件夹
//remove(fileSystem);
}
private static void list(FileSystem fileSystem) throws IOException {
final FileStatus[] listStatus = fileSystem.listStatus(new Path("/"));
for (FileStatus fileStatus : listStatus) {
String isDir = fileStatus.isDir()?"文件夹":"文件";
final String permission = fileStatus.getPermission().toString();
final short replication = fileStatus.getReplication();
final long len = fileStatus.getLen();
final String path = fileStatus.getPath().toString();
System.out.println(isDir+"\t"+permission+"\t"+replication+"\t"+len+"\t"+path);
}
}
private static void getData(FileSystem fileSystem) throws IOException {
final FSDataInputStream in = fileSystem.open(new Path(FILE));
IOUtils.copyBytes(in, System.out, 1024, true);
}
private static void putData(FileSystem fileSystem) throws IOException,
FileNotFoundException {
final FSDataOutputStream out = fileSystem.create(new Path(FILE));
final FileInputStream in = new FileInputStream("H:/kuaipan/hadoop/classes/yy131009/day2/readme.txt");
IOUtils.copyBytes(in, out, 1024, true);
}
private static void remove(FileSystem fileSystem) throws IOException {
fileSystem.delete(new Path(DIR), true);
}
private static void mkdir(FileSystem fileSystem) throws IOException {
fileSystem.mkdirs(new Path(DIR));
}
private static FileSystem getFileSystem() throws IOException, URISyntaxException {
return FileSystem.get(new URI(PATH), new Configuration());
}
}
3、FileUtil.java
package org.apache.hadoop.fs;
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.*;
import java.util.Enumeration;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.Shell.ShellCommandExecutor;
/**
* A collection of file-processing util methods
*/
public class FileUtil {
private static final Log LOG = LogFactory.getLog(FileUtil.class);
/**
* convert an array of FileStatus to an array of Path
*
* @param stats
* an array of FileStatus objects
* @return an array of paths corresponding to the input
*/
public static Path[] stat2Paths(FileStatus[] stats) {
if (stats == null)
return null;
Path[] ret = new Path[stats.length];
for (int i = 0; i < stats.length; ++i) {
ret[i] = stats[i].getPath();
}
return ret;
}
/**
* convert an array of FileStatus to an array of Path.
* If stats if null, return path
* @param stats
* an array of FileStatus objects
* @param path
* default path to return in stats is null
* @return an array of paths corresponding to the input
*/
public static Path[] stat2Paths(FileStatus[] stats, Path path) {
if (stats == null)
return new Path[]{path};
else
return stat2Paths(stats);
}
/**
* Delete a directory and all its contents. If
* we return false, the directory may be partially-deleted.
*/
public static boolean fullyDelete(File dir) throws IOException {
if (!fullyDeleteContents(dir)) {
return false;
}
return dir.delete();
}
/**
* Delete the contents of a directory, not the directory itself. If
* we return false, the directory may be partially-deleted.
*/
public static boolean fullyDeleteC