Giraph调试奇怪问题记录

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.giraph.examples;

import org.apache.giraph.graph.BasicComputation;
import org.apache.giraph.graph.DefaultVertex;
import org.apache.giraph.graph.GraphTaskManager;
import org.apache.giraph.edge.ArrayListEdges;
import org.apache.giraph.edge.Edge;
import org.apache.giraph.edge.EdgeFactory;
import org.apache.giraph.edge.EdgeNoValue;
import org.apache.giraph.graph.Vertex;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;

import java.io.IOException;
import java.util.ArrayList;

/**
 * Implementation of the HCC algorithm that identifies connected components and
 * assigns each vertex its "component identifier" (the smallest vertex id
 * in the component)
 *
 * The idea behind the algorithm is very simple: propagate the smallest
 * vertex id along the edges to all vertices of a connected component. The
 * number of supersteps necessary is equal to the length of the maximum
 * diameter of all components + 1
 *
 * The original Hadoop-based variant of this algorithm was proposed by Kang,
 * Charalampos, Tsourakakis and Faloutsos in
 * "PEGASUS: Mining Peta-Scale Graphs", 2010
 *
 * http://www.cs.cmu.edu/~ukang/papers/PegasusKAIS.pdf
 */
@Algorithm(
    name = "Connected components",
    description = "Finds connected components of the graph"
)
public class RecodeComputation extends
    BasicComputation<IntWritable, IntWritable, NullWritable, IntWritable> {
  /**
   * 1.0号超步写入zk各自分区的顶点数量。
   * 2.1号超步所有分区根据zk各自更新ID, 并发送更新后的顶点消息.
   * 3.更新接收的顶点ID。
   */
  @Override
  public void compute(
      Vertex<IntWritable, IntWritable, NullWritable> vertex,
      Iterable<IntWritable> messages) throws IOException {
    if (getSuperstep() == 0) {  
      GraphTaskManager.LOG.info("Vertex " + vertex.toString()) ;
    	sendMessageToAllEdges(vertex, vertex.getId()) ; //向目标顶点发送源顶点ID
      return;
    }
    
    if (getSuperstep() == 1) {   //1.每个目标顶点将收到的消息保存,并汇总为入度顶点集合,清空出度边集合,并将出度边集合复用。
    	ArrayListEdges edges = (ArrayListEdges) vertex.getEdges() ;
    	edges.clear() ;
    	
	   for (IntWritable srcId : messages) {
		   GraphTaskManager.LOG.info("Received Messags : " + srcId.toString() ) ;
		   Integer src = srcId.get() ;
		   ((DefaultVertex)vertex).inEdges.add(new IntWritable(src)) ;
	   }
	   
	   StringBuffer sb = new StringBuffer() ;
	   ArrayList<IntWritable> inEdges = ((DefaultVertex)vertex).inEdges ;
	  for(IntWritable inEdge : inEdges) {
		  GraphTaskManager.LOG.info("inEdge " + inEdge.toString() ) ;
		  sb.append(inEdge.toString()) ;		  
	  }
	   
	   GraphTaskManager.LOG.info("New Edges " + vertex.toString() + sb.toString() + " at step " + getSuperstep()) ;
      return;
    }
    
    if(getSuperstep() == 2) {   //2.每个分区根据顶点数量确定各自分区顶点的起始编码,并按照递增的顺序对本分区内部所有顶点ID进行更新,各顶点更新自己的ID后,将更新后的新ID作为消息。按照超步1中获取的入度顶点集合进行发送。 
    	ArrayList<IntWritable> inEdges = ((DefaultVertex)vertex).inEdges ;
    	for (IntWritable srcId : inEdges) {
	           GraphTaskManager.LOG.info("Send New id : " + vertex.toString()  + " to " + srcId + " at step " + getSuperstep()) ;
	           sendMessage(srcId, vertex.getId()) ; //向原来的入度边发送自己的新ID
    	}
    }
    
    if(getSuperstep() == 3) {   //3.每个顶点收到的消息作为新的出度边。 
    	ArrayListEdges edges = (ArrayListEdges) vertex.getEdges() ;
 	   edges.clear(); //当前edges实为入度边集合
    	for (IntWritable srcId : messages) {
    		 Integer src = srcId.get() ;
 		   ((ArrayListEdges)vertex.getEdges()).add(EdgeFactory.createReusable(new IntWritable(src))) ;
 	   }
 	   GraphTaskManager.LOG.info("new Edges " + vertex.toString() +  edges.printEdges() + " at step " + getSuperstep()) ;
 	   vertex.voteToHalt() ;
    }
  }
}


自定义Giraph1.1.0的重编码计算类,其中超步3中如果这样写:

for (IntWritable srcId : messages) {
 		   ((ArrayListEdges)vertex.getEdges()).add(EdgeFactory.createReusable(srcId)) ;

在以上写法中,会造成迭代中值重复,原因还没想明白。



修改后的重编码类:

public class RecodeComputation extends
    BasicComputation<IntWritable, IntWritable, NullWritable, IntWritable> {
  /**
   * 1.0号超步写入zk各自分区的顶点数量。
   * 2.1号超步所有分区根据zk各自更新ID, 并发送更新后的顶点消息.
   * 3.更新接收的顶点ID。
   */
  @Override
  public void compute(
      Vertex<IntWritable, IntWritable, NullWritable> vertex,
      Iterable<IntWritable> messages) throws IOException {
    if (getSuperstep() == 0) {  
    	sendMessageToAllEdges(vertex, vertex.getId()) ; //向目标顶点发送源顶点ID
      return;
    }
    
    if (getSuperstep() == 1) {   //1.每个目标顶点将收到的消息保存,并汇总为入度顶点集合,清空出度边集合,并将出度边集合复用。
	   for (IntWritable srcId : messages) {
		   Integer src = srcId.get() ;
		   ((DefaultVertex)vertex).inEdges.add(new IntWritable(src)) ;
	   }
      return;
    }
    
    if(getSuperstep() == 2) {   //2.每个分区根据顶点数量确定各自分区顶点的起始编码,并按照递增的顺序对本分区内部所有顶点ID进行更新,各顶点更新自己的ID后,将更新后的新ID作为消息。按照超步1中获取的入度顶点集合进行发送。 
    	ArrayList<IntWritable> inEdges = ((DefaultVertex)vertex).inEdges ;
    	for (IntWritable srcId : inEdges) {
	           sendMessage(srcId, vertex.getId()) ; //向原来的入度边发送自己的新ID
    	}
    }
    
    if(getSuperstep() == 3) {   //3.每个顶点收到的消息作为新的出度边。 
    	ArrayListEdges edges = (ArrayListEdges) vertex.getEdges() ;
 	   edges.clear(); //当前edges实为入度边集合
    	for (IntWritable srcId : messages) {
    		 Integer src = srcId.get() ;
 		   ((ArrayListEdges)vertex.getEdges()).add(EdgeFactory.createReusable(new IntWritable(src))) ;
 	   }
 	   vertex.voteToHalt() ;
    }
  }
}



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
毕设新项目-基于Java开发的智慧养老院信息管理系统源码+数据库(含vue前端源码).zip 【备注】 1、该资源内项目代码都经过测试运行成功,功能ok的情况下才上传的,请放心下载使用!有问题请及时沟通交流。 2、适用人群:计算机相关专业(如计科、信息安全、数据科学与大数据技术、人工智能、通信、物联网、自动化、电子信息等)在校学生、专业老师或者企业员工下载使用。 3、用途:项目具有较高的学习借鉴价值,不仅适用于小白学习入门进阶。也可作为毕设项目、课程设计、大作业、初期项目立项演示等。 4、如果基础还行,或热爱钻研,亦可在此项目代码基础上进行修改添加,实现其他不同功能。 欢迎下载!欢迎交流学习!不清楚的可以私信问我! 毕设新项目-基于Java开发的智慧养老院信息管理系统源码+数据库(含vue前端源码).zip毕设新项目-基于Java开发的智慧养老院信息管理系统源码+数据库(含vue前端源码).zip毕设新项目-基于Java开发的智慧养老院信息管理系统源码+数据库(含vue前端源码).zip毕设新项目-基于Java开发的智慧养老院信息管理系统源码+数据库(含vue前端源码).zip毕设新项目-基于Java开发的智慧养老院信息管理系统源码+数据库(含vue前端源码).zip毕设新项目-基于Java开发的智慧养老院信息管理系统源码+数据库(含vue前端源码).zip毕设新项目-基于Java开发的智慧养老院信息管理系统源码+数据库(含vue前端源码).zip毕设新项目-基于Java开发的智慧养老院信息管理系统源码+数据库(含vue前端源码).zip毕设新项目-基于Java开发的智慧养老院信息管理系统源码+数据库(含vue前端源码).zip
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值