一、实验目的
- 理解HDFS在Hadoop体系结构中的角色;
- 熟练使用HDFS操作常用的Shell命令;
- 熟悉HDFS操作常用的Java API。
二、实验平台
- 操作系统:Linux(建议CentOS);
- Hadoop版本:2.6.1;
- JDK版本:1.7或以上版本;
- Java IDE:IDEA。
三、实验步骤
编程实现以下功能,并利用Hadoop提供的Shell命令完成相同任务:
(1) 将HDFS中指定文件的内容输出到终端中;
#!/bin/bash
read -p "Please select flie path you want to output: " filename
hadoop fs -test -e $filename
if [ $? -eq 0 ]; then
hadoop fs -test -d $filename
if [ $? -eq 0 ]; then
echo $filename "is a directory!"
else
hadoop fs -cat $filename
fi
else
echo "No file or directory!"
fi
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Scanner;
public class test {
public static void main(String[] args) {
try {
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
FileSystem fs = FileSystem.get(conf);
System.out.println("please select file you want to output: ");
Scanner sc = new Scanner(System.in);
String filename = sc.nextLine();
Path filepath = new Path(filename);
if (fs.exists(filepath)) {
FSDataInputStream inputStream = fs.open(filepath);
BufferedReader bf = new BufferedReader(new InputStreamReader(inputStream));
String line;
while ((line = bf.readLine()) != null) {
System.out.println(line);
}
sc.close();
fs.close();
} else {
System.out.println("file not exist!");
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
(2) 显示HDFS中指定的文件的读写权限、大小、创建时间、路径等信息;
为问题3的简化版本,改改即可,再次不再赘述,详见问题3
(3) 给定HDFS中某一个目录,输出该目录下的所有文件的读写权限、大小、创建时间、路径等信息,如果该文件是目录,则递归输出该目录下所有文件相关信息;
#!/bin/bash
read -p "Please select flie path you want to look: " filename
hadoop fs -test -e $filename
if [ $? -eq 0 ]; then
hadoop fs -test -d $filename
if [ $? -eq 0 ]; then
hadoop fs -ls -R $filename
else
hadoop fs -ls $filename
fi
else
echo "No file or directory!"
fi
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Scanner;
public class test {
public static void dfs(FileSystem fs, Path filepath) throws IOException {
FileStatus[] filestatuse = fs.listStatus(filepath);
for (FileStatus i : filestatuse){
System.out.println(i.toString());
if (i.isDirectory())
dfs(fs, i.getPath());
}
}
public static void main(String[] args) {
try {
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
FileSystem fs = FileSystem.get(conf);
System.out.println("please select file you want to check: ");
Scanner sc = new Scanner(System.in);
String filename = sc.nextLine();
Path filepath = new Path(filename);
if (fs.exists(filepath))
dfs(fs, filepath);
else
System.out.println("file is not exist");
sc.close();
fs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
(4) 提供一个HDFS内的文件的路径,对该文件进行创建和删除操作。如果文件所在目录不存在,则自动创建目录;
#!/bin/bash
read -p "Please select flie path you want to : " filename
hadoop fs -test -e $filename
if [ $? -eq 0 ]; then
# 文件路径存在
read -p "Please select operate delete or create (input 0 represent delete and 1 represent create): " op
read -p "enter filename: " name
# 文件删除
if [ $op -eq 0 ]; then
hadoop fs -test -e $filename/$name
if [ $? -eq 0 ]; then
hadoop fs -test -d $filename/$name
if [ $? -eq 0 ]; then
hadoop fs -ls -R $filename/$name
echo "Directory delete success!"
else
hadoop fs -ls $filename/$name
echo "File delete success!"
fi
else
echo "Operation failure: no file or directory!"
fi
# 文件创建
else
hadoop fs -test -e $filename/$name
if [ $? -eq 0 ]; then
echo "Operation failure: Unable to create a file or directory! " $filename "is exist!"
else
hadoop fs -touchz $filename/$name
echo "File create success!"
fi
fi
else
# 文件路径不存在
echo "File path does not exist, automatically create a directory!"
hadoop fs -mkdir -p $filename
read -p "Please select operate delete or create (input 0 represent delete and 1 represent create): " op
read -p "enter filename: " name
if [ $op -eq 0 ]; then
echo "Operation failure: no file or directory!"
else
hadoop fs -touchz $filename/$name
echo "File create success!"
fi
fi
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Scanner;
public class test {
public static void main(String[] args) {
try {
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
FileSystem fs = FileSystem.get(conf);
System.out.println("enter a path ");
Scanner sc = new Scanner(System.in);
String filename = sc.nextLine();
Path filepath = new Path(filename);
if (fs.exists(filepath))
System.out.println("file is exist");
else {
System.out.println("file is not exist, has been created now");
fs.create(filepath);
}
String choice;
System.out.println("please choose your option: delete or create");
choice = sc.nextLine();
if ((choice.equals("delete"))) {
System.out.println("enter a filename ");
String name = sc.nextLine();
name = filename+ "/" + name;
Path fp = new Path(name);
if (fs.exists(fp)) {
fs.delete(fp, true);
System.out.println("file delete success");
}
else
System.out.println(name + "is not exist");
}
else {
System.out.println("enter a filename ");
String name = sc.nextLine();
name = filename+ "/" + name;
Path fp = new Path(name);
if (fs.exists(fp))
System.out.println("file is exist");
else {
fs.create(fp);
System.out.println("file create success");
}
}
sc.close();
fs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
其余演示情况较多,仅展示一种。
(5) 提供一个HDFS的目录的路径,对该目录进行创建和删除操作。创建目录时,如果目录文件所在目录不存在,则自动创建相应目录;删除目录时,由用户指定当该目录不为空时是否还删除该目录;
#!/bin/bash
read -p "Please select flie path you want to : " filename
hadoop fs -test -e $filename
if [ $? -eq 0 ]; then
# 文件路径存在
read -p "Please select operate delete or create (input 0 represent delete and 1 represent create): " op
read -p "enter filename: " name
# 文件删除
if [ $op -eq 0 ]; then
hadoop fs -test -e $filename/$name
if [ $? -eq 0 ]; then
hadoop fs -test -d $filename/$name
if [ $? -eq 0 ]; then
isEmpty=$(hadoop fs -count $path/$filename | awk '{print $2}')
if [[ $isEmpty -eq 0 ]]; then
hadoop fs -rm -r $path/$filename
else
read -p "Not an empty directory,continue(input 0 represent yes and 1 represent no): " ch
if [ $ch == "yes" ]; then
hadoop fs -rm -r $path/$filename
fi
fi
else
hadoop fs -ls $filename/$name
echo "File delete success!"
fi
else
echo "Operation failure: no file or directory!"
fi
# 文件创建
else
hadoop fs -test -e $filename/$name
if [ $? -eq 0 ]; then
echo "Operation failure: Unable to create a file or directory! " $name "is exist!"
else
hadoop fs -touchz $filename/$name
echo "File create success!"
fi
fi
else
# 文件路径不存在
echo "File path does not exist, automatically create a directory!"
hadoop fs -mkdir -p $filename
read -p "Please select operate delete or create (input 0 represent delete and 1 represent create): " op
read -p "enter filename: " name
if [ $op -eq 0 ]; then
echo "Operation failure: no file or directory!"
else
hadoop fs -touchz $filename/$name
echo "File create success!"
fi
fi
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.util.Scanner;
public class hsfs2 {
public static void main(String[] args) {
try {
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
FileSystem fs = FileSystem.get(conf);
System.out.println("enter a path ");
Scanner sc = new Scanner(System.in);
String filename = sc.nextLine();
Path filepath = new Path(filename);
if (fs.exists(filepath)) {
System.out.println("文件已存在");
} else {
System.out.println("文件不存在,已创建");
fs.create(filepath);
}
FileStatus[] status = fs.listStatus(filepath);
String choice = "0";
if(status.length!=0) {
System.out.println("Not an empty directory,please choose your option: delete ,other");
choice = sc.next();
}
if((choice.equals("delete")))
{
fs.delete(filepath, true);
System.out.println("文件已删除");
}
sc.close();
fs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
(6) 向HDFS中指定的文件追加内容,由用户指定内容追加到原有文件的开头或结尾;
#!/bin/bash
read -p "Enter the file path where you want to add content: " path
if hadoop fs -test -f $path
then
read -p "make choie (head or end)" choice
if [ $choice == "head" ]
then
read -p "enter content: " filename
hadoop fs -copyFromLocal -f $filename $path
else
read -p "enter content: " filename
hadoop fs -appendToFile $filename $path
fi
else
echo "file does not exist, created"
hadoop fs -mkdir -p $path/../
hadoop fs -touchz $path
read -p "make choie (head or end)" choice
if [ $choice == "head" ]
then
read -p "enter content: " filename
hadoop fs -copyFromLocal -f $filename $path
else
read -p "enter content: " filename
hadoop fs -appendToFile $filename $path
fi
fi
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.util.Scanner;
public class hsfs2 {
public static void main(String[] args) {
try {
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
FileSystem fs = FileSystem.get(conf);
System.out.println("Enter the file path where you want to add content: ");
Scanner sc = new Scanner(System.in);
String path = sc.nextLine();
Path filepath = new Path(path);
if (!fs.exists(filepath)) {
fs.create(filepath);
System.out.println("文件不存在,已创建");
}
System.out.println("make choice (head or end)");
String choice = sc.nextLine();
System.out.println("enter content(end with ctrl + d):");
if(choice.equals("head"))
{
FSDataOutputStream data = fs.create(filepath);
while (sc.hasNext()){
String line = sc.nextLine();
if (!StringUtils.isBlank(line))
data.writeChars(line +'\n');
}
}
else {
FSDataOutputStream data = fs.append(filepath);
while (sc.hasNext()){
String line = sc.nextLine();
if (!StringUtils.isBlank(line))
data.writeChars(line +'\n');
}
}
sc.close();
fs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
(7) 删除HDFS中指定的文件;
read -p "enter a path you want to delete: " path
hadoop fs -rm -R $path
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.util.Scanner;
public class hsfs2 {
public static void main(String[] args) {
try {
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
FileSystem fs = FileSystem.get(conf);
System.out.println("enter a path you want delete");
Scanner sc = new Scanner(System.in);
String filename = sc.nextLine();
Path filepath = new Path(filename);
if (fs.exists(filepath)) {
fs.delete(filepath,true);
System.out.println("文件已删除");
} else {
System.out.println("文件不存在,已创建");
}
sc.close();
fs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
(8) 在HDFS中,将文件从源路径移动到目的路径。
read -p "enter sourcepath: " sourcepath
read -p "enter target path: " targetpath
hadoop fs -mv $sourcepath $targetpath
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.util.Scanner;
public class hsfs2 {
public static void main(String[] args) {
try {
Configuration conf = new Configuration();
conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
FileSystem fs = FileSystem.get(conf);
System.out.println("enter sourcepath ");
Scanner sc = new Scanner(System.in);
Path sourcepath = new Path(sc.nextLine());
System.out.println("enter targetpath ");
Path targetpath = new Path(sc.nextLine());
if (fs.exists(targetpath)) {
fs.rename(sourcepath, targetpath);
System.out.println("移动成功");
} else {
System.out.println("目标路径不存在");
}
sc.close();
fs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
编程实现一个类“MyFSDataInputStream”,该类继承“org.apache.hadoop.fs.FSDataInputStream”,要求如下:实现按行读取HDFS中指定文件的方法“readLine()”,如果读到文件末尾,则返回空,否则返回文件一行的文本。
// java
查看Java帮助手册或其它资料,用“java.net.URL”和“org.apache.hadoop.fs.FsURLStreamHandlerFactory”编程完成输出HDFS中指定文件的文本到终端中。
import java.net.URL;
import org.apache.hadoop.io.IOUtils;
import java.io.InputStream;
import java.io.IOException;
import org.apache.hadoop.fs.*;
import java.util.Scanner;
public class hsfs2 {
static{;
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
public static void cat(String FilePath){
try(InputStream in=new URL("hdfs","localhost",9000,FilePath).openStream()){
IOUtils.copyBytes(in, System.out, 4096, false);
IOUtils.closeStream(in);
}catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
try{
System.out.println("enter path ");
Scanner sc = new Scanner(System.in);
String filepath = sc.nextLine();
System.out.println("去读文件:"+filepath);
hdfsclass.cat(filepath);
System.out.println("\n 读取完成");
}catch(Exception e){
e.printStackTrace();
}
}
}
四、 实验总结及问题
- 学会使用什么做什么事情;
掌握并使用shell命令以及Java API开发 HDFS 脚本程序。
- 在实验过程中遇到了什么问题?是如何解决的?
多数类库方法不清楚,不明白,通过查阅网上资料,与同学交流以及查阅官方文档解决。
- 还有什么问题尚未解决?可能是什么原因导致的。
暂无