# -*- coding: UTF-8 -*-from pyspark import SparkContext
if __name__ =="__main__":"""
需求:对本地文件系统URI为:/root/wordcount.txt 的内容进行词频统计
"""# ********** Begin **********#
sc = SparkContext("local","app");
rdd = sc.textFile("/root/wordcount.txt")
li = rdd.flatMap(lambda x :str(x).split(" ")).map(lambda x :(x,1)).reduceByKey(lambda x,y:x + y).sortBy(lambda x : x[1],False).collect();print(li)# ********** End **********#
第2关:Friend Recommendation - 好友推荐
# -*- coding: UTF-8 -*-from pyspark import SparkContext
# ********** Begin **********#defhashWord(a, b):ifhash(a)>hash(b):return a +"_"+ b
return b +"_"+ a
deff(line):
a =str(line).split(" ")
n =len(a)
b =[]for i inrange(1, n):
b.append((hashWord(a[0], a[i]),0))for j inrange(i +1, n):
b.append((hashWord(a[i], a[j]),1))#print(b)return b
# ********** End **********#if __name__ =="__main__":"""
需求:对本地文件系统URI为:/root/friend.txt 的数据统计间接好友的数量
"""# ********** Begin **********#
sc = SparkContext('local','Simple App')
rdd = sc.textFile("/root/friend.txt")
li = rdd.flatMap(f).reduceByKey(lambda x, y:0if x ==0or y ==0else x + y).filter(lambda x: x[1]>0).collect()print(li)# ********** End **********#