关于副本数量的问题
副本数由客户端的参数dfs.replication决定(优先级: conf.set > 自定义配置文件 > jar包中的hdfs-default.xml)
1 概述
- HDFS集群分为两大角色:NameNode、DataNode (Secondary Namenode)
- NameNode负责管理整个文件系统的元数据
- DataNode 负责管理用户的文件数据块
- 文件会按照固定的大小(blocksize)切成若干块后分布式存储在若干台datanode上
- 每一个文件块可以有多个副本,并存放在不同的datanode上
- Datanode会定期向Namenode汇报自身所保存的文件block信息,而namenode则会负责保持文件的副本数量
- HDFS的内部工作机制对客户端保持透明,客户端请求访问HDFS都是通过向namenode申请来进行
2.hdfs写文件示意图
2.hdfs读文件示意图
3.namenode管理元数据的机制
4.hdfs的java客户端端
hdfs dfsadmin -report打印集群的状态
package hadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.util.Iterator;
import java.util.Map;
/**
-
@program:bigdata
-
@package:hadoop.hdfs
-
@filename:SimpleHdfsDemo.java
-
@create:2019.09.22.20.19
-
@author:Administrator
-
@descrption.
*/
public class SimpleHdfsDemo {
FileSystem fs=null;
Configuration conf=null;
@Before
public void init() throws Exception {conf=new Configuration();
// conf.set(“fs.defaultFS”,“hdfs://master:9000”);
// 这种方式需要在运行时加参数指定用户-DHADOOP_USER_NAME=hadoop
fs=FileSystem.get(new URI(“hdfs://192.168.186.5:9000”),conf,“hadoop”);
}
/*
* 上传文件
* /
@Test
public void testUpload() throws Exception {
fs.copyFromLocalFile(
new Path(“D:\大数据\大数据全套 (已分享)\文档资料\day06\day06\hadoop2.4.1集群搭建.txt”),
new Path("/test1.txt"));
fs.close();
}
/
* 下载文件
* /
@Test
public void testDownload() throws IOException {
fs.copyToLocalFile(new Path("/test1.txt"),new Path(“d:\”));
fs.close();
}
/
打印参数
* /
@Test
public void testConf(){
Iterator<Map.Entry<String, String>> it = conf.iterator();
while (it.hasNext()){
Map.Entry<String, String> next = it.next();
System.out.println(next.getKey()+":"+next.getValue());
}
}
@Test
public void testMkdir() throws IOException {
boolean mkdirs = fs.mkdirs(new Path("/testMkdir"));
System.out.println(mkdirs);
fs.close();
}
@Test
public void testDelete() throws IOException {
//第二个参数表示是否递归删除
boolean delete = fs.delete(new Path("/testMkdir"), true);
System.out.println(delete);
}
/
* 递归查看数据
* 数据量大时返回的是迭代器,因为迭代器不用存储数据
* /
@Test
public void testLs() throws IOException {
RemoteIterator ls = fs.listFiles(new Path("/"), true);
while (ls.hasNext()){
LocatedFileStatus next = ls.next();
System.out.println(“blocksize”+next.getBlockSize());
System.out.println(“owner”+next.getOwner());
System.out.println(“replication”+next.getReplication());
System.out.println(“Permission”+next.getPermission());
System.out.println(“name”+next.getPath().getName());
System.out.println("------------");
/
* 文件的块位置信息
* /
BlockLocation[] bl = next.getBlockLocations();
for (BlockLocation b:bl ){
String[] hosts = b.getHosts();
b.getOffset();
}
}
}
/
* 不会递归返回数据
* */
@Test
public void testLs2() throws IOException {
FileStatus[] f = fs.listStatus(new Path("/"));
for (FileStatus file:f){
System.out.println(file.getPath().getName());
}
}
}
2.通过流的方式
package hadoop.hdfs;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Before;
import org.junit.Test;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
/**
-
@program:bigdata
-
@package:hadoop.hdfs
-
@filename:HdfsStreamAccess.java
-
@create:2019.09.23.09.21
-
@author:Administrator
-
@descrption.用流的方式来操作hdfs上的文件,可以实现指定偏移量的数据
*/
public class HdfsStreamAccess {
FileSystem fs=null;
Configuration conf=null;
@Before
public void init() throws Exception {conf=new Configuration(); fs=FileSystem.get(new URI("hdfs://192.168.186.5:9000"),conf,"hadoop");
}
/*- 通过流的方式上传 数据
- */
@Test
public void testUpload() throws IOException {
FSDataOutputStream ot = fs.create(new Path("/testStream.txt"));
FileInputStream is = new FileInputStream(“D:\大数据\大数据全套 (已分享)\文档资料\day07\day07\day06的问题总结.txt”);
IOUtils.copy(is,ot);
}
/*
- 通过流的方式获取hdfs上的数据
- /
@Test
public void testDownload() throws IOException {
FSDataInputStream in = fs.open(new Path("/testStream.txt"));
FileOutputStream out = new FileOutputStream(“d:\1.txt”);
IOUtils.copy(in,out);
}
/ - 通过流的方式获取指定直接大小hdfs上的数据
- */
@Test
public void testRandomAccess() throws IOException {
FSDataInputStream in = fs.open(new Path("/testStream.txt"));
in.seek(12);
FileOutputStream out = new FileOutputStream(“d:\1.txt.rang”);
IOUtils.copy(in,out);
}
}
5.定时采集日子脚本
#!/bin/bash
#set java env
export JAVA_HOME=/home/hadoop/app/jdk1.7.0_51
export JRE_HOME= J A V A H O M E / j r e e x p o r t C L A S S P A T H = . : {JAVA_HOME}/jre export CLASSPATH=.: JAVAHO