http://localhost:4040/stages/stage/?id=0&attempt=0本地访问
代码
# -*- coding: utf-8 -*-
import time
import pandas as pd
import numpy as np
import os
import json
from pyspark import SparkConf, SparkContext
from pyspark.storagelevel import StorageLevel
appName = 'testSpark'
if __name__ == '__main__':
conf = SparkConf().setAppName(appName).setMaster('local[2]') #2个分区?
sc = SparkContext(conf=conf)
kv_list = [1,2,3,4,5,6]
rdd0=sc.parallelize(kv_list,1) #3个分区
rdd1=rdd0.map(lambda x:x+1)
rdd1.persist(storageLevel=StorageLevel.MEMORY_AND_DISK)
result = rdd1.reduce(lambda x,i:x+i)
time.sleep(20)
print(result)
result = rdd1.reduce(lambda x,i:x*i)
print(result)