hadoop 关于文件操作类基本上是在“org.apache.hadoop.fs”包中。如:打开文件,删除文件,读写文件等。
static FileSystem get (Configuration conf);
得到configuration对象
得到FileSystem对象
进行文件操作
初始化conf
conf=new Configuration();
system.setproperty(hadoop_user_name=“node01”);
conf.set (“fs.hdfs.impl”,“org.apache.hadoop.hdfs.DistributeFileSystem”);
resource 下配置文件:
4个hadoop配置文件
hivejdbc配置文件
hive_driver=apache.hive.jdbc.HiveDriver
hive_user=
hive_password=
hive_url= hive端口号10000,zk端口号2181
mapreduce.job.queuename=root.aaa_queue
1.上传本地文件
Configuration conf= new Configuration();
Path scr =new Path("D://test.txt");//本地路径
Path dfs=new Path("/");
//获取fs对象
FileSystem hdfs=null;
try {
hdfs = FileSystem.get(conf);
hdfs.copyFromLocalFile(scr,dfs);
} catch (IOException e) {
logger.error("出现io异常");
}finally {
try {
hdfs.close();
} catch (IOException e) {
e.printStackTrace();
}
}
2.创建hdfs文件
byte[]b="aaa".getBytes();
FileSystem fs = HDFSUtils.getFS();
FSDataOutputStream outputStream = null;
try {
outputStream = fs.create(path);
outputStream.write(b,0,b.length);
outputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
3.创建hdfs文件目录
fs.mkdir(dfs);
4.重命名hdfs文件
boolean isRename=fs.rename(frpath,topath);
5.删除hdfs文件
boolean isDel=fs.delete(path,recursive);
recursive:是否递归删除
6.查看hdfs文件是否存在
boolean isExists=fs.exists(path);
7.查看hdfs最后修改时间
FileStatus fileStatus=fs.getFileStatus(path);
long moditime=fileStatus.getModificationTime();
8.读取hdfs某个目录下所有的文件
FileStatus fileStatus[]=fs.listStatus(path);
fori{
fileStatus[i].getPath().toString();
}
9.查找某个文件在hdfs集群的位置
fs.getFileBlockLocation(path,start,len);
10获取hdfs集群上所有的节点名称信息
getHostName();
11.追加内容到hdfs文件系统的文件中
FSDataOutputStream out=fs.append(path)
out.write();
12.开启hdfs目录快照功能,创建删除快照
DFSAdmin dfsAdmin=new DFSAdmin(conf);
String strings=new String[2];
string [1]="/path"
dfsAdmin.allowSnapshot(strings);
//第二个元素为要创建快照的目录
//创建快照
fs.createSnapshot(path,"pathName")
//路径,快照名
//删除快照
fs.deleteSnapshot(path,"pathName")
13.文件的压缩和解压缩
public class CodecTest {
//压缩文件
public static void compress(String codecClassName) throws Exception{
Class<?> codecClass = Class.forName(codecClassName);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(codecClass, conf);
//指定压缩文件路径
FSDataOutputStream outputStream = fs.create(new Path("/user/hadoop/text.gz"));
//指定要被压缩的文件路径
FSDataInputStream in = fs.open(new Path("/user/hadoop/aa.txt"));
//创建压缩输出流
CompressionOutputStream out = codec.createOutputStream(outputStream);
IOUtils.copyBytes(in, out, conf);
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
//解压缩
public static void uncompress(String fileName) throws Exception{
Class<?> codecClass = Class.forName("org.apache.hadoop.io.compress.GzipCodec");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(codecClass, conf);
FSDataInputStream inputStream = fs.open(new Path("/user/hadoop/text.gz"));
//把text文件里到数据解压,然后输出到控制台
InputStream in = codec.createInputStream(inputStream);
IOUtils.copyBytes(in, System.out, conf);
IOUtils.closeStream(in);
}
//使用文件扩展名来推断二来的codec来对文件进行解压缩
public static void uncompress1(String uri) throws IOException{
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
Path inputPath = new Path(uri);
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodec(inputPath);
if(codec == null){
System.out.println("no codec found for " + uri);
System.exit(1);
}
String outputUri = CompressionCodecFactory.removeSuffix(uri, codec.getDefaultExtension());
InputStream in = null;
OutputStream out = null;
try {
in = codec.createInputStream(fs.open(inputPath));
out = fs.create(new Path(outputUri));
IOUtils.copyBytes(in, out, conf);
} finally{
IOUtils.closeStream(out);
IOUtils.closeStream(in);
}
}
public static void main(String[] args) throws Exception {
//compress("org.apache.hadoop.io.compress.GzipCodec");
//uncompress("text");
uncompress1("hdfs://master:9000/user/hadoop/text.gz");
}