/** The number of edges in the graph. */@transientlazyval numEdges: Long = graph.edges.count()
numVertices
/** The number of vertices in the graph. */@transientlazyval numVertices: Long = graph.vertices.count()
inDegrees
/**
* The in-degree of each vertex in the graph.
* @note Vertices with no in-edges are not returned in the resulting RDD.
*/@transientlazyval inDegrees: VertexRDD[Int] =
degreesRDD(EdgeDirection.In).setName("GraphOps.inDegrees")
outDegrees
/**
* The out-degree of each vertex in the graph.
* @note Vertices with no out-edges are not returned in the resulting RDD.
*/@transientlazyval outDegrees: VertexRDD[Int] =
degreesRDD(EdgeDirection.Out).setName("GraphOps.outDegrees")
degrees
/**
* The degree of each vertex in the graph.
* @note Vertices with no edges are not returned in the resulting RDD.
*/@transientlazyval degrees: VertexRDD[Int] =
degreesRDD(EdgeDirection.Either).setName("GraphOps.degrees")
Views of the graph as collections
vertices & edges
/**
* Construct a graph from a collection of vertices and
* edges with attributes. Duplicate vertices are picked arbitrarily and
* vertices found inthe edge collection butnotinthe input
* vertices are assigned the default attribute.
*
* @tparam VD the vertex attribute type
* @tparam ED the edge attribute type
* @param vertices the"set"of vertices and their attributes
* @param edges the collection of edges inthe graph
* @param defaultVertexAttr the default vertex attribute to use for vertices that are
* mentioned in edges butnotin vertices
* @param edgeStorageLevel the desired storage level at which to cache the edges if necessary
* @param vertexStorageLevel the desired storage level at which to cache the vertices if necessary
*/
def apply[VD: ClassTag, ED: ClassTag](
vertices: RDD[(VertexId, VD)],
edges: RDD[Edge[ED]],
defaultVertexAttr: VD = null.asInstanceOf[VD],
edgeStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY,
vertexStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY): Graph[VD, ED] = {
GraphImpl(vertices, edges, defaultVertexAttr, edgeStorageLevel, vertexStorageLevel)
}
triplets
/** Return an RDD that brings edges together with their source and destination vertices. */@transientoverridelazyval triplets: RDD[EdgeTriplet[VD, ED]] = {
replicatedVertexView.upgrade(vertices, true, true)
replicatedVertexView.edges.partitionsRDD.mapPartitions(_.flatMap {
case (pid, part) => part.tripletIterator()
})
}
/**
* Transforms each vertex attribute in the graph using the map function.
*
* @note The new graph has the same structure. As a consequence the underlying index structures
* can be reused.
*
* @param map the function from a vertex object to a new vertex value
*
* @tparam VD2 the new vertex data type
*
* @example We might use this operation to change the vertex values
* from one type to another to initialize an algorithm.
* {{{
* val rawGraph: Graph[(), ()] = Graph.textFile("hdfs://file")
* val root = 42
* var bfsGraph = rawGraph.mapVertices[Int]((vid, data) => if (vid == root) 0else Math.MaxValue)
* }}}
*
*/
overridedefmapVertices[VD2: ClassTag]
(f: (VertexId, VD) => VD2)(implicit eq: VD =:= VD2 = null): Graph[VD2, ED] = {
// TheimplicitparametereqwillbepopulatedbythecompilerifVDandVD2areequal, andleft
// nullifnotif(eq != null) {
vertices.cache()
// Themappreservestype, sowecanuseincrementalreplicationvalnewVerts = vertices.mapVertexPartitions(_.map(f)).cache()valchangedVerts = vertices.asInstanceOf[VertexRDD[VD2]].diff(newVerts)valnewReplicatedVertexView = replicatedVertexView.asInstanceOf[ReplicatedVertexView[VD2, ED]]
.updateVertices(changedVerts)newGraphImpl(newVerts, newReplicatedVertexView)
} else {
// Themapdoesnotpreservetype, sowemustre-replicateallverticesGraphImpl(vertices.mapVertexPartitions(_.map(f)), replicatedVertexView.edges)
}
}
mapEdges
/**
* Transforms each edge attribute in the graph using the map function. The map functionisnot
* passed the vertex value for the vertices adjacent to the edge. If vertex values are desired,
* use `mapTriplets`.
*
* @note This graph isnot changed and that the new graph has the
* same structure. As a consequence the underlying index structures
* can be reused.
*
* @param map the function from an edge object to a new edge value.
*
* @tparam ED2 the new edge data type
*
* @example This function might be used to initialize edge
* attributes.
*
*/
def mapEdges[ED2: ClassTag](map: Edge[ED] => ED2): Graph[VD, ED2] = {
mapEdges((pid, iter) => iter.map(map))
}
overridedefmapEdges[ED2: ClassTag](
f: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2]): Graph[VD, ED2] = {
valnewEdges = replicatedVertexView.edges
.mapEdgePartitions((pid, part) => part.map(f(pid, part.iterator)))newGraphImpl(vertices, replicatedVertexView.withEdges(newEdges))
}
mapTriplets
/**
* Transforms each edge attribute using the map function, passing it the adjacent vertex
* attributes as well. If adjacent vertex values are not required,
* consider using `mapEdges` instead.
*
* @note This does not change the structure of the
* graph or modify the values ofthis graph. As a consequence
* the underlying index structures can be reused.
*
* @param map the function from an edge object to a new edge value.
*
* @tparam ED2 the new edge data type
*
* @example This function might be used to initialize edge
* attributes based on the attributes associated with each vertex.
* {{{
* val rawGraph: Graph[Int, Int] = someLoadFunction()
* val graph = rawGraph.mapTriplets[Int]( edge =>
* edge.src.data - edge.dst.data)
* }}}
*
*/
def mapTriplets[ED2: ClassTag](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2] = {
mapTriplets((pid, iter) => iter.map(map), TripletFields.All)
}
Modify the graph structure
#
/**
* Reverses all edges inthe graph. If this graph contains an edge from a to b thenthe returned
* graph contains an edge from b to a.
*/
override def reverse: Graph[VD, ED] = {
new GraphImpl(vertices.reverseRoutingTables(), replicatedVertexView.reverse())
}
def subgraph(
epred: EdgeTriplet[VD,ED] => Boolean = (x => true),
vpred: (VertexId, VD) => Boolean = ((v, d) => true))
: Graph[VD, ED]
/**
* Restricts the graph to only the vertices and edges that are also in `other`, but keeps the
* attributes from this graph.
* @param other the graph to project this graph onto
* @return a graph with vertices and edges that exist in both the current graph and `other`,
* with vertex and edge data fromthe current graph
*/
def mask[VD2: ClassTag, ED2: ClassTag](other: Graph[VD2, ED2]): Graph[VD, ED]
/**
* Merges multiple edges between two vertices into a single edge. For correct results, the graph
* must have been partitioned using [[partitionBy]].
*
* @param merge the user-supplied commutative associative function to merge edge attributes
* for duplicate edges.
*
* @return The resulting graph with a single edge for each (source, dest) vertex pair.
*/
def groupEdges(merge: (ED, ED) => ED): Graph[VD, ED]
Join RDDs with the graph
joinVertices
/**
* Join the vertices with an RDD andthen apply a function from the
* vertex and RDD entry to a new vertex value. The input table
* should contain at most one entry for each vertex. If no entry is
* provided the map functionis skipped and the old value is used.
*
* @tparam U the type of entry in the table of updates
* @param table the table to join with the vertices in the graph.
* The table should contain at most one entry for each vertex.
* @param mapFunc the function used to compute the new vertex
* values. The map functionis invoked only for vertices with a
* corresponding entry in the table otherwise the old vertex value
* is used.
*
* @example This functionis used to update the vertices withnew
* values based on external data. For example we could add the out
* degree to each vertex record
*
* {{{
* val rawGraph: Graph[Int, Int] = GraphLoader.edgeListFile(sc, "webgraph")
* .mapVertices((_, _) => 0)
* valoutDeg = rawGraph.outDegrees
* valgraph = rawGraph.joinVertices[Int](outDeg)
* ((_, _, outDeg) => outDeg)
* }}}
*
*/
defjoinVertices[U: ClassTag](table: RDD[(VertexId, U)])(mapFunc: (VertexId, VD, U) => VD)
: Graph[VD, ED] = {
valuf = (id: VertexId, data: VD, o: Option[U]) => {
o match {
case Some(u) => mapFunc(id, data, u)
case None => data
}
}
graph.outerJoinVertices(table)(uf)
}
outerJoinVertices
/**
* Joins the vertices with entries inthe `table` RDD and merges the results using `mapFunc`.
* The input table should containat most one entry for each vertex. If no entry in `other` is
* provided for a particular vertex inthe graph, the map function receives `None`.
*
* @tparam U the type of entry inthe table of updates
* @tparam VD2 the new vertex value type
*
* @param other the table to join withthe vertices inthe graph.
* The table should containat most one entry for each vertex.
* @param mapFunc the function used to compute the new vertex values.
* The map function is invoked for all vertices, even those
* that do not have a corresponding entry inthe table.
*
* @example This function is used to update the vertices with new values based on external data.
* For example we could add the out-degree to each vertex record:
*
* {{{
* val rawGraph: Graph[_, _] = Graph.textFile("webgraph")
* val outDeg: RDD[(VertexId, Int)] = rawGraph.outDegrees
* val graph = rawGraph.outerJoinVertices(outDeg) {
* (vid, data, optDeg) => optDeg.getOrElse(0)
* }
* }}}
*/
def outerJoinVertices[U: ClassTag, VD2: ClassTag](other: RDD[(VertexId, U)])
(mapFunc: (VertexId, VD, Option[U]) => VD2)(implicit eq: VD =:= VD2 = null)
: Graph[VD2, ED]
Aggregate information about adjacent triplets
collectNeighborIds
/**
* Collect the neighbor vertex ids for each vertex.
*
* @param edgeDirection the direction along which to collect
* neighboring vertices
*
* @return the set of neighboring ids for each vertex
*/
def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]] = {
val nbrs =
if (edgeDirection == EdgeDirection.Either) {
graph.aggregateMessages[Array[VertexId]](
ctx => { ctx.sendToSrc(Array(ctx.dstId)); ctx.sendToDst(Array(ctx.srcId)) },
_ ++ _, TripletFields.None)
} elseif (edgeDirection == EdgeDirection.Out) {
graph.aggregateMessages[Array[VertexId]](
ctx => ctx.sendToSrc(Array(ctx.dstId)),
_ ++ _, TripletFields.None)
} elseif (edgeDirection == EdgeDirection.In) {
graph.aggregateMessages[Array[VertexId]](
ctx => ctx.sendToDst(Array(ctx.srcId)),
_ ++ _, TripletFields.None)
} else {
thrownew SparkException("It doesn't make sense to collect neighbor ids without a " +
"direction. (EdgeDirection.Both is not supported; use EdgeDirection.Either instead.)")
}
graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) =>
nbrsOpt.getOrElse(Array.empty[VertexId])
}
} // end of collectNeighborIds
def pregel[A](initialMsg: A, maxIterations: Int, activeDirection: EdgeDirection)(
vprog: (VertexId, VD, A) => VD, sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId,A)], mergeMsg: (A, A) => A) : Graph[VD, ED]
Basic graph algorithms
pageRank
/**
* Run a dynamic version of PageRank returning a graph with vertex attributes containing the
* PageRank and edge attributes containing the normalized edge weight.
*
* @see [[org.apache.spark.graphx.lib.PageRank$#runUntilConvergence]]
*/
defpageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double] = {
PageRank.runUntilConvergence(graph, tol, resetProb)
}
Information about the Graph val numEdges: Long val numVertices: Long val inDegrees: VertexRDD[Int] val outDegrees: VertexRDD[Int] val degrees: VertexRDD[Int] // Views of the graph as collect