# 查看任一日志總條數
import findspark
findspark.init()
from pyspark import SparkContext,SparkConf
conf=SparkConf()
conf.setMaster("spark://172.XX.XX.XX:7077")
conf.setAppName("test-count")
sc = SparkContext(conf=conf)
file3 = "hdfs://172.XX.XX.XX:9000/opt/hadoop-2.6.3/test/world20716.bi"
line3 = sc.textFile(file3)
print(line3.count())
#參考的例子
from pyspark import SparkContext,SparkConf
conf=SparkConf()
conf.setMaster("spark://192.168.2.241:7077")
conf.setAppName("test application")
logFile="hdfs://hadoop241:8020/user/root/testfile"
sc=SparkContext(conf=conf)
logData=sc.textFile(logFile).cache()
numAs=logData.filter(lambda s: 'a' in s).count()
numBs=logData.filter(lambda s: 'b' in s).count()
print "Lines with a:%i,lines with b:%i" % (numAs,numBs)