hadoop2理解读取文件在流程

F5:进入方法

F6:单步执行

F7:推出当前方法

F8:跳过当前断点

 

(1)读取文件代码

package com.jn.hadoop.hdfs;

 

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStream;

import java.io.OutputStream;

import java.net.URI;

import java.net.URISyntaxException;

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IOUtils;

 

public class ReadHdfs {

 

    /**

     * @param args

     * @throwsURISyntaxException

     * @throws IOException

     */

    public static void main(String[] args)  {

       FileSystem fs;

       try {

           fs = FileSystem.get(new URI("hdfs://192.168.41.13:9000"), new Configuration());

           InputStream in = fs.open(new Path("/good.txt"));

           OutputStream out = new FileOutputStream("d://good.txt");

           IOUtils.copyBytes(in, out, 4096,true);

       } catch (IOException e) {

           e.printStackTrace();

       } catch (URISyntaxException e) {

           e.printStackTrace();

       }

    }

 

}

 

 

(2)第一句执行过程

fs = FileSystem.get(new URI("hdfs://192.168.41.13:9000"), new Configuration());

 

(1)  类加载器进行加载

public abstract class ClassLoader

 

private synchronized ClassloadClassInternal(String name)

    throws ClassNotFoundException

    {

    return loadClass(name);

(2)  URI类进入

publicfinalclass URI

    implementsComparable<URI>, Serializable

(3)  URI类进行实例化

public URI(String str) throwsURISyntaxException {

    new Parser(str).parse(false);

}

(4)  类加载Configuration类

private synchronized ClassloadClassInternal(String name)

    throws ClassNotFoundException

    {

    returnloadClass(name);

    }

(5)  初始化Configuration类

构造器

  public Configuration() {

    this(true);

  }

调用构造器的重载

  public Configuration(boolean loadDefaults) {

    this.loadDefaults = loadDefaults;

    updatingResource = new HashMap<String, String[]>();

    synchronized(Configuration.class) {

      REGISTRY.put(this, null);

    }

  }

 

(6)  执行进入类FileOutputStream,

 

public static FileSystem get(URI uri, Configuration conf) throws IOException {

    String scheme =uri.getScheme();

    String authority = uri.getAuthority();

 

    if (scheme == null && authority == null) {     // usedefault FS

      return get(conf);

    }

 

    if (scheme != null && authority == null) {     // noauthority

      URI defaultUri = getDefaultUri(conf);

      if (scheme.equals(defaultUri.getScheme())    // ifscheme matches default

          && defaultUri.getAuthority()!= null) {  // & default has authority

        return get(defaultUri,conf);              // return default

      }

    }

   

    String disableCacheName = String.format("fs.%s.impl.disable.cache", scheme);

    if (conf.getBoolean(disableCacheName, false)) {

      return createFileSystem(uri,conf);

    }

 

    return CACHE.get(uri, conf);

  }

(7)  执行进入类get方法

    FileSystem get(URI uri, Configuration conf)throws IOException{

      Key key = new Key(uri,conf);//对用户权限与组的校验。

      return getInternal(uri, conf,key);

}

(8)  执行进入类getInternal方法

单例设计模式,懒汉式。

private FileSystemgetInternal(URI uri, Configuration conf, Key key) throws IOException{

      FileSystem fs;

      synchronized (this) {

        fs = map.get(key);

      }

      if (fs != null) {

        return fs;

      }

 

      fs = createFileSystem(uri, conf);

      synchronized (this) { // refetch the lock again

        FileSystem oldfs = map.get(key);

        if (oldfs != null) { // a filesystem is created while lock is releasing

          fs.close(); // close the new file system

          return oldfs;  // return the old file system

        }

       

        // now insert the new file system into the map

        if (map.isEmpty()

                && !ShutdownHookManager.get().isShutdownInProgress()){

          ShutdownHookManager.get().addShutdownHook(clientFinalizer, SHUTDOWN_HOOK_PRIORITY);

        }

        fs.key = key;

        map.put(key, fs);

        if (conf.getBoolean("fs.automatic.close", true)) {

          toAutoClose.add(key);

        }

        return fs;

      }

}

 

 

从配置文件中获取FileSystem的实现类DistributedFileSystem。

classorg.apache.hadoop.hdfs.DistributedFileSystem

 

 public static Class<? extends FileSystem> getFileSystemClass(String scheme,

      Configuration conf) throws IOException {

    if (!FILE_SYSTEMS_LOADED) {

      loadFileSystems();

    }

    Class<? extends FileSystem> clazz = null;

    if (conf != null) {

      clazz = (Class<? extends FileSystem>)conf.getClass("fs." + scheme + ".impl", null);

    }

    if (clazz == null) {

      clazz = SERVICE_FILE_SYSTEMS.get(scheme);

    }

    if (clazz == null) {

      throw new IOException("NoFileSystem for scheme: " + scheme);

    }

    return clazz;

  }

 

进行fs的创建

  private static FileSystem createFileSystem(URI uri, Configuration conf

      ) throws IOException {

    Class<?> clazz = getFileSystemClass(uri.getScheme(),conf);

    if (clazz == null) {

      throw new IOException("NoFileSystem for scheme: " +uri.getScheme());

    }

    FileSystem fs =(FileSystem)ReflectionUtils.newInstance(clazz, conf);//利用反射创建fs,多态,用父类来接收子类。

    fs.initialize(uri, conf);

    return fs;

  }

 

DistributedFileSystem类中

public void initialize(URI uri, Configuration conf) throws IOException {

    super.initialize(uri,conf);

    setConf(conf);

 

    String host = uri.getHost();

    if (host == null) {

      throw new IOException("IncompleteHDFS URI, no host: "+ uri);

    }

    homeDirPrefix = conf.get(

        DFSConfigKeys.DFS_USER_HOME_DIR_PREFIX_KEY,

        DFSConfigKeys.DFS_USER_HOME_DIR_PREFIX_DEFAULT);

   

    this.dfs = new DFSClient(uri, conf, statistics);

    this.uri = URI.create(uri.getScheme()+"://"+uri.getAuthority());

    this.workingDir = getHomeDirectory();

  }

 

DFSClient类中

  public DFSClient(URI nameNodeUri, Configuration conf,

                   FileSystem.Statistics stats)

    throws IOException {

    this(nameNodeUri,null,conf, stats);

  }

 

 

@VisibleForTesting

  public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode,

      Configuration conf, FileSystem.Statisticsstats)

    throws IOException {

    // Copyonly the required DFSClient configuration

  

   NameNodeProxies.ProxyAndInfo<ClientProtocol> proxyInfo = null;

进行创建代理对象,NameNodeProxies对RPC进行封装

   proxyInfo = NameNodeProxies.createProxy(conf,nameNodeUri,

          ClientProtocol.class,nnFallbackToSimpleAuth);

   }

 

 

 

NameNodeProxies类

public static <T> ProxyAndInfo<T> createNonHAProxy(

      Configuration conf, InetSocketAddressnnAddr, Class<T> xface,

      UserGroupInformation ugi, boolean withRetries,

      AtomicBoolean fallbackToSimpleAuth) throws IOException {

 

    T proxy;

if (xface ==ClientProtocol.class) {

//进行创建代理

      proxy = (T) createNNProxyWithClientProtocol(nnAddr,conf, ugi,

          withRetries, fallbackToSimpleAuth);

 }

}

 

NameNodeProxies类

private static ClientProtocolcreateNNProxyWithClientProtocol(

      InetSocketAddress address, Configurationconf, UserGroupInformation ugi,

      boolean withRetries, AtomicBooleanfallbackToSimpleAuth)

      throws IOException {

    RPC.setProtocolEngine(conf,ClientNamenodeProtocolPB.class, ProtobufRpcEngine.class);

 

    final RetryPolicy defaultPolicy =

        RetryUtils.getDefaultRetryPolicy(

            conf,

            DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_ENABLED_KEY,

            DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_ENABLED_DEFAULT,

            DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_SPEC_KEY,

            DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_SPEC_DEFAULT,

            SafeModeException.class);

   

final long version = RPC.getProtocolVersion(ClientNamenodeProtocolPB.class);

// RPC获取服务端代理对象

    ClientNamenodeProtocolPB proxy = RPC.getProtocolProxy(

        ClientNamenodeProtocolPB.class, version, address,ugi, conf,

        NetUtils.getDefaultSocketFactory(conf),

        org.apache.hadoop.ipc.Client.getTimeout(conf),defaultPolicy,

        fallbackToSimpleAuth).getProxy();

//创建一些策略

      RetryPolicy createPolicy = RetryPolicies

          .retryUpToMaximumCountWithFixedSleep(5,

              HdfsConstants.LEASE_SOFTLIMIT_PERIOD, TimeUnit.MILLISECONDS);

//对代理进行增强

ClientProtocoltranslatorProxy =

        new ClientNamenodeProtocolTranslatorPB(proxy);

      return (ClientProtocol) RetryProxy.create(

          ClientProtocol.class,

          new DefaultFailoverProxyProvider<ClientProtocol>(

              ClientProtocol.class, translatorProxy),

          methodNameToPolicyMap,

          defaultPolicy);

利用动态代理创建proxy

  public static <T> Object create(Class<T> iface,

      FailoverProxyProvider<T>proxyProvider,

      Map<String,RetryPolicy>methodNameToPolicyMap,

      RetryPolicy defaultPolicy) {

    return Proxy.newProxyInstance(

       proxyProvider.getInterface().getClassLoader(),

        newClass<?>[] { iface },

        newRetryInvocationHandler<T>(proxyProvider, defaultPolicy,

           methodNameToPolicyMap)

        );

  }

 

//创建ProxyAndInfo

public static <T> ProxyAndInfo<T> createNonHAProxy(

    returnnewProxyAndInfo<T>(proxy, dtService, nnAddr);

//实例化ProxyAndInfo

    public ProxyAndInfo(PROXYTYPE proxy,Text dtService,

       InetSocketAddress address) {

      this.proxy = proxy;

      this.dtService = dtService;

      this.address = address;

    }

 

 

@InterfaceAudience.Private

public class DFSClient implements java.io.Closeable, RemotePeerFactory,

    DataEncryptionKeyFactory {

//是一个接口

  final ClientProtocolnamenode; 

 

//服务端代理对象。

      this.namenode = proxyInfo.getProxy();

    }

 

ClientProtocol接口的实现由服务端来完成。

(3)第二句执行。

public FSDataInputStream open(Path f, final int bufferSize)

      throws IOException {

    statistics.incrementReadOps(1);

    Path absF = fixRelativePart(f);

    return newFileSystemLinkResolver<FSDataInputStream>() {

      @Override

      public FSDataInputStream doCall(final Path p)

          throws IOException,UnresolvedLinkException {

        finalDFSInputStream dfsis =

          dfs.open(getPathName(p), bufferSize, verifyChecksum);

        return dfs.createWrappedInputStream(dfsis);

      }

      @Override

      public FSDataInputStream next(final FileSystem fs, final Path p)

          throws IOException {

        return fs.open(p, bufferSize);

      }

    }.resolve(this, absF);

  }

 

//调用open方法

  public DFSInputStream open(String src, int buffersize, boolean verifyChecksum)

      throws IOException,UnresolvedLinkException {

    checkOpen();

    //    Get block info from namenode

    return new DFSInputStream(this, src, buffersize, verifyChecksum);

  }

//创建DFSInputStream对象

  DFSInputStream(DFSClient dfsClient, Stringsrc, int buffersize, boolean verifyChecksum

                 ) throws IOException,UnresolvedLinkException {

    this.dfsClient = dfsClient;

    this.verifyChecksum = verifyChecksum;

    this.buffersize = buffersize;

    this.src = src;

    this.cachingStrategy =

       dfsClient.getDefaultReadCachingStrategy();

    openInfo();

  }

 

 

//打开文件,在这里发现尝试了三次。

synchronized void openInfo()throws IOException, UnresolvedLinkException {

    lastBlockBeingWrittenLength = fetchLocatedBlocksAndGetLastBlockLength();

    int retriesForLastBlockLength = dfsClient.getConf().retryTimesForGetLastBlockLength;

    while (retriesForLastBlockLength > 0) {

      // Gettinglast block length as -1 is a special case. When cluster

      //restarts, DNs may not report immediately. At this time partial block

      //locations will not be available with NN for getting the length. Lets

      // retryfor 3 times to get the length.

      if (lastBlockBeingWrittenLength == -1) {

        DFSClient.LOG.warn("Last block locations not available. "

            + "Datanodes might not have reported blocks completely."

            + " Will retry for " +retriesForLastBlockLength + "times");

        waitFor(dfsClient.getConf().retryIntervalForGetLastBlockLength);

        lastBlockBeingWrittenLength =fetchLocatedBlocksAndGetLastBlockLength();

      } else {

        break;

      }

      retriesForLastBlockLength--;

    }

    if (retriesForLastBlockLength == 0) {

      throw new IOException("Couldnot obtain the last block locations.");

    }

  }

 

//获取文件的源数据信息

private long fetchLocatedBlocksAndGetLastBlockLength() throws IOException {

//就可以获取块的信息,从而就可以获取文件。流中持有源数据信息。

    finalLocatedBlocks newInfo = dfsClient.getLocatedBlocks(src, 0);

 

 

  static LocatedBlocks callGetBlockLocations(ClientProtocolnamenode,

      String src, long start, long length)

      throws IOException {

    try {

      returnnamenode.getBlockLocations(src, start, length);

    } catch(RemoteException re) {

      throwre.unwrapRemoteException(AccessControlException.class,

                                    FileNotFoundException.class,

                                     UnresolvedPathException.class);

    }

  }

//进入服务端进行执行。

public LocatedBlocks getBlockLocations(String src, long offset, long length)

      throws AccessControlException,FileNotFoundException,

      UnresolvedLinkException, IOException {

    GetBlockLocationsRequestProto req =GetBlockLocationsRequestProto

        .newBuilder()

        .setSrc(src)

        .setOffset(offset)

        .setLength(length)

        .build();

    try {

      GetBlockLocationsResponseProto resp = rpcProxy.getBlockLocations(null,

          req);

      return resp.hasLocations() ?

        PBHelper.convert(resp.getLocations()): null;

    } catch (ServiceException e) {

      throw ProtobufHelper.getRemoteException(e);

    }

  }

 

看源码的关键是看每个方法的功能,某个类是如何创建的。某个集合中装什么,集合是在那里创建的。

(4)图解读取文件:


注:在读取文件的时候默认读取文件元数据信息3次,3次都失败就会读取失败。

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
编译原理是计算机专业的一门核心课程,旨在介绍编译程序构造的一般原理和基本方法。编译原理不仅是计算机科学理论的重要组成部分,也是实现高效、可靠的计算机程序设计的关键。本文将对编译原理的基本概念、发展历程、主要内容和实际应用进行详细介绍编译原理是计算机专业的一门核心课程,旨在介绍编译程序构造的一般原理和基本方法。编译原理不仅是计算机科学理论的重要组成部分,也是实现高效、可靠的计算机程序设计的关键。本文将对编译原理的基本概念、发展历程、主要内容和实际应用进行详细介绍编译原理是计算机专业的一门核心课程,旨在介绍编译程序构造的一般原理和基本方法。编译原理不仅是计算机科学理论的重要组成部分,也是实现高效、可靠的计算机程序设计的关键。本文将对编译原理的基本概念、发展历程、主要内容和实际应用进行详细介绍编译原理是计算机专业的一门核心课程,旨在介绍编译程序构造的一般原理和基本方法。编译原理不仅是计算机科学理论的重要组成部分,也是实现高效、可靠的计算机程序设计的关键。本文将对编译原理的基本概念、发展历程、主要内容和实际应用进行详细介绍编译原理是计算机专业的一门核心课程,旨在介绍编译程序构造的一般原理和基本

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值