我有以下代码:import sys
from pyspark import SparkContext
def mapper(array):
aux = []
array = str(array)
aux = array.split(' | ')
return {(aux[0][:-1],aux[1][:-1]): [(aux[0][1:],aux[1][1:])]}
def reducer(d1, d2):
for k in d1.keys():
if d2.has_key(k):
d1[k] = d1[k] + d2[k]
d2.pop(k)
d1.update(d2)
return d1
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: bruijn ")
exit(-1)
sc = SparkContext(appName="Assembler")
kd = sys.argv[1].lstrip('k').rstrip('mer.txt').split('d')
k, d = int(kd[0]), int(kd[1])
dic = sc.textFile(sys.argv[1],False).map(mapper).reduce(reducer)
filepath = open('DeBruijn.txt', 'w')
for key in sorted(dic):
filepath.write(str(key) + ' -> ' + str(dic[key]) + '\n')
filepath.close()
print('De Bruijn graph successfully generated!')
sc.stop()
我想在main中创建一个名为vertexes的空列表,并在其中添加mapper append元素。但是使用关键字global是行不通的。我尝试过使用累加器,但累加器的值无法在任务中获取。在