HDFS一致模型_文件的一致性模型-CSDN博客

本文链接：https://blog.csdn.net/sp_ur/article/details/82626782

文件系统的一致模型描述了文件读/写的数据可见性，新建一个文件之后，它能在文件系统的命名空间中立即可见。

package hadoopDemo;

import hadoopDemo.pathFilter.RegexExcludePathFilter;

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;

/*
 * 测试文件的一致性
 */
public class CoherencyModel {
	@Test
	/*
	 * 测试创建path是否立即可见
	 */
	public void creatPath() throws Exception{
		Configuration conf = new Configuration() ;
		FileSystem fs = FileSystem.get(conf) ;
		fs.create(new Path("hdfs://ubuntucp:8020/test/lo.txt")) ;
		
	}

}

然后在 web ui 上可以看见

但是，写入文件的内容并不能保证立即可见。即使数据流已经刷新并存储，所以文件长度显示为0 。当写入的数据超过一个块后，第一个数据块对新的 reader 就是可见的，之后的块也不例外，总之，当前正在写入的块对其他 reader 不可见。

如下程序，运行 readData() 结果为0：

package hadoopDemo;

import hadoopDemo.pathFilter.RegexExcludePathFilter;

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;

/*
 * 测试文件的一致性
 */
public class CoherencyModel {
	@Test
	/*
	 * 测试创建文件写入内容是否立即可见
	 */
	public void creatPath() throws Exception{
		Configuration conf = new Configuration() ;
		FileSystem fs = FileSystem.get(conf) ;
		FSDataOutputStream out = fs.create(new Path("hdfs://ubuntucp:8020/test/lo.txt")) ;
		out.writeInt(100) ;
		out.writeInt(200) ;
		out.writeInt(300) ;
		out.close() ;		
	}
	@Test
	public void readData() throws Exception{
		Configuration conf = new Configuration() ;
		FileSystem fs = FileSystem.get(conf) ;
		FileStatus fst = fs.getFileStatus(new Path("hdfs://ubuntucp:8020/test/lo.txt")) ;
		System.out.println(fst.getLen()) ;
	}

}

HDFS 提供了一种强行将所有缓存刷新到 datanode 的手段，即对 FSDataOutputStream 调用 hflush() 方法。当 hflush() 方法返回成功后，对所有新的 reader 而言，HDFS 能保证文件中到目前为止写入的数据均到达所有 datanode 的写入管道并对所有新的 reader 均可见。

package hadoopDemo;

import hadoopDemo.pathFilter.RegexExcludePathFilter;

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;

/*
 * 测试文件的一致性
 */
public class CoherencyModel {
	@Test
	/*
	 * 测试创建文件写入内容是否立即可见
	 */
	public void creatPath() throws Exception{
		Configuration conf = new Configuration() ;
		FileSystem fs = FileSystem.get(conf) ;
		FSDataOutputStream out = fs.create(new Path("hdfs://ubuntucp:8020/test/lo.txt")) ;
		out.writeInt(100) ;
		out.hflush() ;
		out.writeInt(200) ;
		out.hflush() ;
		out.writeInt(300) ;
		out.hflush() ;
		out.close() ;		
	}