Error: org.apache.hadoop.ipc.RemoteException(java.io.IOException): File /user/xxx/part-r-00002 could only be replicated to 0 nodes instead of minReplication (=1). There are 11 datanode(s) running and no node(s) are excluded in this operation. at org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.chooseTarget(BlockManager.java:1327) at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:2278) at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:480) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.addBlock(ClientNamenodeProtocolServerSideTranslatorPB.java:297) at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java:44080) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:453) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1002) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1695) at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:1691) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:396) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1408) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:1689)
阅读一下代码了解hdfs选择块的策略,可能理解有误,如果有错误请指出,代码来自cdh-4.2.1版本 在 FSNamesystem.getAdditionalBlock()里,有个blockManager.chooseTarget(src, replication, clientNode, excludedNodes, blockSize);
这个方法用来决定在那个节点写数据 代码文件:hadoop-2.0.0-cdh4.2.1\src\hadoop-hdfs-project\hadoop-hdfs\src\main\java\org\apache\hadoop\hdfs\server\blockmanagement\BlockPlacementPolicyDefault.java chooseTarget方法调用了几次转换,看最终调用的方法
/* choose <i>numOfReplicas</i> from all data nodes */ private DatanodeDescriptor chooseTarget(int numOfReplicas, DatanodeDescriptor writer, HashMap<Node, Node> excludedNodes, long blocksize, int maxNodesPerRack, List<DatanodeDescriptor> results) { if (numOfReplicas == 0 ¦¦ clusterMap.getNumOfLeaves()==0) { return writer; } int totalReplicasExpected = numOfReplicas; // 总共需要的副本数 // results是当前已分配的节点 int numOfResults = results.size(); boolean newBlock = (numOfResults==0); if (writer == null && !newBlock) { writer = results.get(0); } try { // 如果还没分配过,先选择本地节点 if (numOfResults == 0) { writer = chooseLocalNode(writer, excludedNodes, blocksize, maxNodesPerRack, results); if (--numOfReplicas == 0) { return writer; } } // 如果之前已分配一个或零个,在其他机架的一台机器选择 if (numOfResults <= 1) { chooseRemoteRack(1, results.get(0), excludedNodes, blocksize, maxNodesPerRack, results); if (--numOfReplicas == 0) { return writer; } } // 如果前面还没分配完,已有小于或等于两个副本 if (numOfResults <= 2) { // 如果前两个在同一个机架上,选择一个其他机架的 if (clusterMap.isOnSameRack(results.get(0), results.get(1))) { chooseRemoteRack(1, results.get(0), excludedNodes, blocksize, maxNodesPerRack, results); } else if (newBlock){ chooseLocalRack(results.get(1), excludedNodes, blocksize, maxNodesPerRack, results); } else { chooseLocalRack(writer, excludedNodes, blocksize, maxNodesPerRack, results); } if (--numOfReplicas == 0) { return writer; } } // 如果还需要副本,就随机选择 chooseRandom(numOfReplicas, NodeBase.ROOT, excludedNodes, blocksize, maxNodesPerRack, results); } catch (NotEnoughReplicasException e) { LOG.warn("Not able to place enough replicas, still