1. 安装JDK7
1.1 解压 tar -xvf jdk-7u79-linux-x64.tar
1.2 配置环境变量 vi .bash_profile
export JAVA_HOME=/home/spark0/soft/jdk1.7.0_79
export PATH=$JAVA_HOME/bin:$PATH
2.安装HDFS
2.1 解压 tar -zxvf hadoop-2.7.0.tar.gz
2.2 编辑 hadoop-env.sh
export JAVA_HOME=/home/spark0/soft/jdk1.7.0_79
2.3 修改配置文件 core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/home/spark0/temp/hadoop</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://supportsit1.core.sfp.com:9000</value>
</property>
</configuration>
2.4 修改配置文件 hdfs-site.xml:
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>supportsit1.core.sfp.com:50090</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/spark0/temp/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/spark0/temp/hadoop/dfs/data</value>
</property>
</configuration>
2.5 配置slaves
hadoop70
supportsit1.core.sfp.com
2.6 复制文件到slaves
scp jdk-7u79-linux-x64.tar hadoop70:/home/spark0/soft
tar -zcf ./hadoop-2.7.0.slave.tar.gz ./hadoop-2.7.0
scp ./hadoop-2.7.0.slave.tar.gz spark0@hadoop70:/home/spark0/soft
2.7 Slaves解压安装jdk和hadoop
2.8 执行namenode的格式化
bin/hdfs namenode -format
2.9 启动hdfs
sbin/start-dfs.sh
2.10 查看JPS
2.11 创建目录
./hdfs dfs -mkdir -p /spark/temp
2.12 放一个文件
./hdfs dfs -put ~/test1 /spark/temp/
3.安装SPARK的Standalone
3.1 解压 tar -xvf spark-1.4.1-bin-hadoop2.6.tgz
3.2 配置 slaves
加入 supportsit1.core.sfp.com
hadoop70
3.3 配置 spark-env
3.4 配置 spark-defaults.conf
3.5 复制文件到slaves
tar -zcf ./spark-1.4.1-bin-hadoop2.6.slave.tgz ./spark-1.4.1-bin-hadoop2.6
scp ./spark-1.4.1-bin-hadoop2.6.slave.tgz spark0@hadoop70:/home/spark0/soft
3.6 启动sbin/start-master.sh sbin/start-slaves.sh
val fileRdd = sc.textFile("hdfs://supportsit1.core.sfp.com:9000/spark/temp/analyData.text");
val express_indexpage = fileRdd.filter(_.contains("EXPRESS= FUNID=INDEXPAGE"))
val express_CHECKPRICE = fileRdd.filter(_.contains("EXPRESS= FUNID=CHECKPRICE"))
val express_CONFIRM = fileRdd.filter(_.contains("EXPRESS= FUNID=CONFIRM"))
val charge_indexpage = fileRdd.filter(_.contains("CHARGE= FUNID=INDEXPAGE"))
val charge_CHECKPRICE = fileRdd.filter(_.contains("CHARGE= FUNID=CHECKPRICE"))
val charge_CONFIRM = fileRdd.filter(_.contains("CHARGE= FUNID=CONFIRM"))
val sfbest_indexpage = fileRdd.filter(_.contains("SFBEST= FUNID=INDEXPAGE"))
val sfbest_load = fileRdd.filter(_.contains("LOAD"))
val sfbest_confirm = fileRdd.filter(_.contains("SFBEST= FUNID=CONFIRM"))
val sfbest = fileRdd.filter( line => (line.contains("SFBEST= FUNID=INDEXPAGE") || line.contains("LOAD") ))
val sfbestmap = sfbest.map( line=> (line.split("=")(9),line.split("=")(3).toLong ) )
val sfbestreduce = sfbestmap.reduceByKey(_-_)
sfbestreduce.saveAsTextFile("hdfs://supportsit1.core.sfp.com:9000/spark/temp/test9")