原生
python
运行速度很慢,只要数据量大于100
,求解就变得十分困难
五组测试数据
测试用例的第一行为图的节点数和边数,第二行为最大流算法的起始节点和中止节点,剩余所有行均为有向加权边,其中前两个数字代表边的两个端点,后一个数字代表边的权重。
·测试用例
原生代码实现
思路是不断找源点到宿点的路径,找到后计算最大可通过流量加到总的最大流计数中,再将路径正向全部减少该流量,反向全部增加该流量。重复操作直到找不到路径时即为最大流。
#!/usr/bin/env python3
#edmonds.py
#fumiama 20201224
import numpy as np
from copy import deepcopy
from time import time
from heapq import heappop, heappush, heapify
class Graph(object):
def __init__(self, nodeSize):
self.nodeSize = nodeSize
self.node2 = dict()
def addEdge(self, a, b, w):
if a not in self.node2.keys(): self.node2[a] = []
heappush(self.node2[a], (b, w))
def addWeight(self, a, b, w):
#print("addWeight:", a, b, w)
if a not in self.node2.keys(): self.addEdge(a, b, w)
else:
notChange = 1
for node in self.node2[a]:
if node[0] == b:
index = self.node2[a].index(node)
#print("index:", index, "node:", node)
previousWeight = self.node2[a][index][1]
newWeight = previousWeight + w
if newWeight: self.node2[a][index] = (b, newWeight)
else: self.delEdge(a, b)
notChange = 0
break
if notChange: self.addEdge(a, b, w)
def delEdge(self, a, b):
for node in self.node2[a]:
if node[0] == b: self.node2[a].remove(node)
heapify(self.node2[a])
def getWeight(self, a, b):
#print("get weight:", a, b, self.node2[a] if a in self.node2.keys() else "null")
if a in self.node2.keys():
for node in self.node2[a]:
if node[0] == b: return node[1]
return -1
def findMaxFlow(self, s, t):
maxFlow = 0
stack = [s]
while stack:
nd = deepcopy(self.node2)
touch = np.zeros(self.nodeSize + 1, dtype=int)
while stack and stack[-1] != t: #找一条路径
edges = nd[stack[-1]]
#print("edges:", edges)
if edges:
nextNode = heappop(edges)[0]
if nextNode != s and not touch[nextNode]:
stack.append(nextNode)
touch[nextNode] = 1
else: stack.pop()
if stack == []: break
#print("stack:", stack)
stack2 = stack.copy()
nextNode = stack.pop()
flowOfThisRoute = -1
while stack: #计算该路径流量
thisNode = stack.pop()
throttle = self.getWeight(thisNode, nextNode)
if flowOfThisRoute == -1 or flowOfThisRoute > throttle: flowOfThisRoute = throttle
nextNode = thisNode
maxFlow += flowOfThisRoute
#print("flowOfThisRoute:", flowOfThisRoute)
#print(flowOfThisRoute, end=' ')
nextNode = stack2.pop()
while stack2: #修改路径
thisNode = stack2.pop()
throttle = self.getWeight(thisNode, nextNode)
self.addWeight(thisNode, nextNode, -flowOfThisRoute)
self.addWeight(nextNode, thisNode, flowOfThisRoute) #反向加边
nextNode = thisNode
stack = [s] #重新开始找一条路
return maxFlow
if __name__ == '__main__':
nodeSize, edgeSize = map(int, input().split())
s, t = map(int, input().split())
g = Graph(nodeSize)
while edgeSize:
a, b, w = map(int, input().split())
g.addEdge(a, b, w)
edgeSize -= 1
time_start = time()
print("最大流:", g.findMaxFlow(s, t))
print("用时:", time() - time_start)
原生实现的样例测试
$ ./edmonds.py < 测试用例1.txt
最大流: 13
用时: 0.0012137889862060547
$ ./edmonds.py < 测试用例2.txt
最大流: 38
用时: 0.050067901611328125
$ ./edmonds.py < 测试用例3.txt
最大流: 869
用时: 4.9637181758880615
$ ./edmonds.py < 测试用例4.txt
(十分钟内未计算完成)
可以看到,随着点的数量增加,求解所需时间迅速呈指数形式增加。但是造成运行缓慢的一个主要原因是python
原生代码的缓慢,因此考虑使用cython
重写算法代码以加快求解速度。cython
会将我们的代码编译为以c语言
实现的python
库供我们调用。
为此,需要编写以下文件,并手动编译库文件:
pyx文件(代码主体)
#cython: language_level=3
#edmonds_c.pyx
#fumiama 20201224
from time import time
#from cpython.mem cimport PyMem_Malloc, PyMem_Free
from libc.string cimport memcpy, memset
from libc.stdlib cimport malloc, free
cdef int** arr
cdef int** arr2
cdef set(int x, int y, int item):
global arr
#print("set", x, y, item)
arr[x][y] = item
cdef get(int x, int y):
global arr
#print("get", x, y)
return arr[x][y]
cdef set2(int x, int y, int item):
global arr2
#print("set", x, y, item)
arr2[x][y] = item
cdef get2(int x, int y):
global arr2
#print("get", x, y)
return arr2[x][y]
cdef createArray(int x, int y):
global arr, arr2
arr = <int**>malloc(x * sizeof(int*))
for i in range(x):
arr[i] = <int*>malloc(y * sizeof(int))
memset(arr[i], 0, y * sizeof(int))
arr2 = <int**>malloc(x * sizeof(int*))
for i in range(x):
arr2[i] = <int*>malloc(y * sizeof(int))
memset(arr2[i], 0, y * sizeof(int))
cdef copyarr2arr2(int x, int y):
for i in range(x): memcpy(arr2[i], arr[i], y * sizeof(int))
cdef changeWeight(int a, int b, int w): set(a-1, b-1, w + get(a-1, b-1))
#cdef delEdge(int a, int b): set(a-1, b-1, 0)
cdef getWeight(int a, int b):
cdef int re = get(a-1, b-1)
return re if re else -1
cdef findMaxFlow(int s, int t, int nodeSize):
cdef int maxFlow = 0
cdef int thisNode, nextNode, flowOfThisRoute, throttle, p, p2, neighborEmpty, thisNodeM1
cdef int length = (nodeSize+1) * sizeof(int)
cdef int* stack = <int*>malloc(length)
cdef int* touch = <int*>malloc(length)
p = 1
p2 = 0
stack[0] = s
while 1:
copyarr2arr2(nodeSize, nodeSize) #nd = deepcopy(self.node2)
memset(touch, 0, length) #touch = np.zeros(self.nodeSize + 1, dtype=int)
while p and stack[p-1] != t: #找一条路径
neighborEmpty = 1
thisNodeM1 = stack[p-1]-1
for i in range(nodeSize):
if get2(thisNodeM1, i): #nextNode = heappop(edges)[0]
neighborEmpty = 0
set2(thisNodeM1, i, 0)
if i != s-1 and not touch[i]: #if nextNode != s and not touch[nextNode]:
stack[p] = i+1 #stack.append(nextNode)
p += 1
touch[i] = 1 #touch[nextNode] = 1
break
#thisNodeM1 = i #edges = nd[stack[-1]]
#if i+1 == t: break
if neighborEmpty: p -= 1
if p == 0: break #if stack == []: break
#print("stack:[", end=' ')
#for i in range(p): print(stack[i], end=" ")
#print(']')
p2 = p #stack2 = stack.copy()
p -= 1
nextNode = stack[p] #nextNode = stack.pop()
flowOfThisRoute = -1
while p: #计算该路径流量
p -= 1
thisNode = stack[p] #thisNode = stack.pop()
#print("thisNode:", thisNode)
#print("nextNode:", nextNode)
throttle = getWeight(thisNode, nextNode)
#print("throttle:", throttle)
#print('')
if flowOfThisRoute == -1 or flowOfThisRoute > throttle: flowOfThisRoute = throttle
nextNode = thisNode
#print("flowOfThisRoute:", flowOfThisRoute)
#print(flowOfThisRoute, end=' ')
maxFlow += flowOfThisRoute
p = p2
p -= 1
nextNode = stack[p] #nextNode = stack2.pop()
while p: #修改路径
p -= 1
thisNode = stack[p]
changeWeight(thisNode, nextNode, -flowOfThisRoute)
changeWeight(nextNode, thisNode, flowOfThisRoute) #反向加边
nextNode = thisNode
stack[0] = s #重新开始找一条路
p = 1
return maxFlow
def main():
cdef int nodeSize, edgeSize, s, t, a, b, w
cdef double time_start
cdef int length
nodeSize, edgeSize = map(int, input().split())
s, t = map(int, input().split())
createArray(nodeSize, nodeSize)
while edgeSize:
a, b, w = map(int, input().split())
changeWeight(a, b, w)
edgeSize -= 1
time_start = time()
print("最大流:", findMaxFlow(s, t, nodeSize))
print("用时:", time() - time_start)
setup文件(构建库)
# cython: language_level=3
from distutils.core import setup
from Cython.Build import cythonize
setup(
ext_modules = cythonize("edmonds_c.pyx")
)
编译
$ python3 ./edmonds_c_setup.py build_ext --inplace
调用库
#edmonds_c_run.py
import edmonds_c
edmonds_c.main()
进行测试
$ ./edmonds_c_run.py < 测试用例1.txt
最大流: 13
用时: 6.914138793945312e-05
$ ./edmonds_c_run.py < 测试用例2.txt
最大流: 38
用时: 0.0015988349914550781
$ ./edmonds_c_run.py < 测试用例3.txt
最大流: 869
用时: 0.1410071849822998
$ ./edmonds_c_run.py < 测试用例4.txt
最大流: 17639
用时: 105.51624917984009
$ ./edmonds_c_run.py < 测试用例5.txt
(20分钟内未计算完成)
可见在使用cython
优化之后,运行速度有了明显改善。但是在大数据下其性能仍不尽人意,因此可以考虑采用更加高效的Preflow Push
算法进行求解。关于该算法的实现以及cython
加速,详见我的下一篇文章Python实现 Preflow Push (Push-Relabel) 算法求最大流 并用Cython优化提速。