搬代码:
from __future__ import print_function
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import sys
from random import random
from operator import add
from pyspark.sql import SparkSession
##
## 蒙特卡洛计算圆周率Pi
##
## 数学原理,根据随机选择XY为-1到1的点落在半径为1的圆内的概率
## http://stackoverflow.com/questions/34892522/the-principle-of-spark-pi
##
## 在一个边长为2的正方形内画个圆,正方形的面积 S1=4,圆的半径 r=1,面积 S2=πr^2=π
## 现在只需要计算出S2就可以知道π,这里取圆心为坐标轴原点,在正方向中不断的随机选点,总共选n个点,
## 计算在圆内的点的数目为count,则 S2=S1*count/n,然后就出来了。
##
if __name__ == "__main__":
"""
Usage: pi [partitions]
"""
spark = SparkSession\
.builder\
.appName("PythonPi")\
.getOrCreate()
partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
#print(partitions)
n = 100000 * partitions
print(n)
def f(_):
x = random() * 2 - 1
y = random() * 2 - 1
#print(x)
#print(y)
return 1 if x ** 2 + y ** 2 < 1 else 0 #1在圆内,0不在圆内
count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
print(count)
print("Pi is roughly %f" % (4.0 * count / n))
spark.stop()
原理解释:
https://www.zhihu.com/question/39665957
http://blog.csdn.net/guotong1988/article/details/50549297
https://wenku.baidu.com/view/1952074bcf84b9d528ea7a4e.html
https://www.zhihu.com/question/34319749
http://blog.csdn.net/lhkaikai/article/details/25537569
http://blog.sina.com.cn/s/blog_494e45fe0102w6dq.html