今天一开始建立eclipse与虚拟机访问时,出现诸多问题,解决后,复习了基本的hadoop文件操作
/**
* 测试hdfs,开始读取配置文件@before
* 关闭,@after
* */
public class TestHDFS {
Configuration conf;
static FileSystem fs;
//读取配置文件
@Before
public void conn() throws Exception{
conf = new Configuration(true);
fs = FileSystem.get(conf);
}
@After
public void close() throws Exception {
fs.close();
}
/**
* 创建目录
*
* */
@Test
public void mkdir() throws Exception {
Path path = new Path("/0222");
if(fs.exists(path)) {
fs.delete(path);
}
fs.mkdirs(path);
}
/**
* 创建文件
*
* */
@Test
public void create() throws Exception {
FSDataOutputStream output=fs.create(new Path("/0222/a.txt"));
output.write("this is a try to hdfs,wish my girlfriend will be fine".getBytes());
//写完文件,需要flush,将数据从缓冲区加载到内存中;
/*还可以这么写
InputStream input = new BufferedInputStream(new FileInputStream(new File("c.txt")));
IOUtils.copy(input, output);*/
output.flush();
output.close();
}
/**
* 上传本地文件到虚拟机中
* */
@Test
public void putTest() throws Exception {
/*Path src = new Path("F://test/coxreg.zip");
Path des = new Path("/0222/hadoop.tar.gz");
FSDataOutputStream output = fs.create(des);
InputStream in = new BufferedInputStream(new FileInputStream(new File("E:\\developsoft\\【阶段18】Hadoop大数据\\hadoop-2.5.2.tar.gz")));
fs.copyFromLocalFile(src, des);
IOUtils.copy(in, output);*/
Path src = new Path("E:/developsoft/【阶段18】Hadoop大数据/hadoop-2.5.2.tar.gz");
Path des = new Path("/0222/hadoop1.tar.gz");
fs.copyFromLocalFile(src, des);
}
/**
* 从虚拟机下载文件
* */
@Test
public void getTest()throws Exception{
Path path = new Path("/0222/coxreg.zip");
FSDataInputStream is = fs.open(path);
FileOutputStream output = new FileOutputStream("F://test//a.zip");
IOUtils.copy(is, output);
}
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
conf.set("SparkMaster", "hdfs://SparkMaster:9000/");
FileSystem fs = FileSystem.get(conf);
Path path = new Path("/0222/coxreg.zip");
FSDataInputStream is = fs.open(path);
FileOutputStream output = new FileOutputStream("F://test//a.zip");
IOUtils.copy(is, output);
}
/**
* 遍历某个文件夹下所有的文件
* */
@Test
public void listFile() throws Exception {
FileStatus[] fileList = fs.listStatus(new Path("/0222"));
for (FileStatus fileStatus : fileList) {
//判断/0222下是文件,还是文件夹
String dir = fileStatus.isDirectory()? "文件夹":"文件";//判断该文件是文件还是文件夹
String permission = fileStatus.getPermission().toString();//得到文件的权限
short replication = fileStatus.getReplication();//得到文件的副本个数
Long len = fileStatus.getLen();//得到文件的长度
String path = fileStatus.getPath().toString();//得到文件的路径
System.out.println(dir + "\t" + permission + "\t" + replication + "\t" + len + "\t" + path);
}
}
/**
* 获取到分割块所在的主机名称
* */
@Test
public void getFileBlockLocation() throws Exception {
FileStatus fileStatus = fs.getFileStatus(new Path("/0222/coxreg.zip"));
BlockLocation[] block = fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
for (BlockLocation blockLocation : block) {
for(String host:blockLocation.getHosts()) {
System.out.println(host);
}
}
}
/**
* 读取文件
* 第一步,FSDataInputStream 读取文件,读取到缓冲区
* 第二步,使用OutputStream输出流,把文件刷新到文件里
* */
@Test
public void getFileContent() throws Exception {
Path path = new Path("/0222/coxreg.zip");
//读取文件,对照内容
FSDataInputStream input = fs.open(path);
//创建一个文件,把FileSystem的读取内容缓存下来
File file = new File("F://test//0225.txt");
System.out.println(file.getAbsolutePath());
//创建java输出流,把读取的内容,写入到本地文件中;
OutputStream out = new FileOutputStream(file);
byte[] buf = new byte[1024];
int len = 0;
while((len = input.read(buf)) != -1) {
System.out.println(new String(buf, 0, len));
out.write(buf, 0, len);
//将数据缓存刷新到txt文件中
out.flush();
}
//System.out.println((char)input.readByte());
}
/**
* 删除文件
* */
@Test
public void deleteFile() throws Exception {
fs.delete(new Path("/test"),true);
}
}
明天继续更新hadoop的序列化和文件结构相关操作