spark running script setting

1. build a sh(sparkrun.sh) scripts as follows;

#!/usr/bin/env bash
export SPARK_MEM=3g
CP=$CLASSPATH:/opt/spark-0.9.0-incubating-bin-hadoop2/conf:/opt/spark-0.9.0-incubating-bin-hadoop2/assembly/target/scala-2.10/spark-assembly_2.10-0.9.0-incubating-hadoop2.2.0.jar:./*
exec java -cp "$CP" "$@"

you can run as: ./sparkrun.sh   [MainClass]


2. build a spark demo, package it to a jar file, & upload it to ./*, can use java or scala.

    the spark application depend jar file as follows:

spark-mllib_2.10-0.9.0-incubating.jar
spark-streaming_2.10-0.9.0-incubating.jar
spark-graphx_2.10-0.9.0-incubating.jar
spark-assembly_2.10-0.9.0-incubating-hadoop2.2.0.jar

  example 1 use java:  a machine learning method Kmeans using spark.

 

import java.io.Serializable;
import java.util.List;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.broadcast.Broadcast;
import scala.Tuple2;

public class MyKMeans2 {

	/**
	 * @param args
	 */
	public static void main(String[] args) {

		int k = 3;

		String[] arr = new String[2];
		arr[0] = "spark://masterIP:7077";
		arr[1] = "hdfs://namenodeIP:9000/test/kmeans_data.txt";
		JavaSparkContext ctx = new JavaSparkContext(arr[0], "MyKeans",
				System.getenv("SPARK_HOME"),
				JavaSparkContext.jarOfClass(MyKMeans2.class));
		JavaRDD<String> data = ctx.textFile(arr[1]);

		int iter = 20;
		double threshold = 0.0001;

		List<String> ls = data.takeSample(false, k, 1);
		System.out.println("LS:=" + ls.size());

		Point[] curCenterPoint;
		curCenterPoint = new Point[k];
		for (int i = 0; i < k; i++) {
			curCenterPoint[i] = new Point();
			String str[] = ls.get(i).split(" ");
			curCenterPoint[i].setX1(Double.parseDouble(str[0]));
			curCenterPoint[i].setX2(Double.parseDouble(str[1]));
			curCenterPoint[i].setX3(Double.parseDouble(str[2]));
		}

		JavaRDD<Point> data1=null;
		JavaPairRDD<Integer, String> data2;
		Broadcast<Point[]> bp = ctx.broadcast(curCenterPoint);

		for (int i = 0; i < iter; i++) {
			data1 = data.map(new ComputerClass(curCenterPoint, k));

			
			data2 = data1.map(new PairFunction<Point, Integer, String>() {
				@Override
				public Tuple2<Integer, String> call(Point p) throws Exception {
					int k = p.getType();
					String v = "" + p.getX1() + " " + p.getX2() + " "
							+ p.getX3();
					Tuple2<Integer, String> t = new Tuple2<Integer, String>(k,
							v);
					return t;
				}
			});

			JavaPairRDD<Integer, String> data3 = data2
					.reduceByKey(new Function2<String, String, String>() {

						@Override
						public String call(String arg0, String arg1)
								throws Exception {
							String strs1[] = arg0.split(" ");
							String strs2[] = arg1.split(" ");

							double x1 = (Double.parseDouble(strs1[0]) + Double
									.parseDouble(strs2[0])) / 2;
							double x2 = (Double.parseDouble(strs1[1]) + Double
									.parseDouble(strs2[1])) / 2;
							double x3 = (Double.parseDouble(strs1[2]) + Double
									.parseDouble(strs2[2])) / 2;

							return x1 + " " + x2 + " " + x3;
						}
					});

			Point[] NewCenter = new Point[k];

			List<Tuple2<Integer, String>> ls3 = data3.collect();

			for (int j = 0; j < k; j++) {
				NewCenter[j] = new Point();
				Tuple2<Integer, String> t = ls3.get(j);
				NewCenter[j].setType(t._1);
				String strs[] = t._2.split(" ");
				NewCenter[j].setX1(Double.parseDouble(strs[0]));
				NewCenter[j].setX2(Double.parseDouble(strs[1]));
				NewCenter[j].setX3(Double.parseDouble(strs[2]));
			}

			double ms = computerCenterDistance(curCenterPoint, NewCenter, k);
			if (ms < threshold) {
				break;
			}

			data = data1.map(new Function<Point, String>() {
				@Override
				public String call(Point p) throws Exception {
					return p.getX1() + " " + p.getX2() + " " + p.getX3();
				}
			});

			System.out.println("ite:=" + i);

			curCenterPoint = NewCenter;

		}
		
		System.out.println("classfy point====================");
		List<Point> ls1 = data1.collect();
		for (Point p : ls1) {
			System.out.println(p.getType() + " " + p.getX1() + " "
					+ p.getX2() + " " + p.getX3());
		}


		System.out.println("center size:="+curCenterPoint.length);
		for (int i = 0; i < k; i++) {
			Integer type = curCenterPoint[i].getType();
			String x1 = curCenterPoint[i].getX1() + "";
			String x2 = curCenterPoint[i].getX2() + "";
			String x3 = curCenterPoint[i].getX3() + "";
			System.out.println(type + " " + x1 + " " + x2 + " " + x3);
		}
	}

	static double computerPointDistance(Point p1, Point p2) {
		double ret = 0;
		double x1 = p1.getX1();
		double x2 = p1.getX2();
		double x3 = p1.getX3();

		double y1 = p2.getX1();
		double y2 = p2.getX2();
		double y3 = p2.getX3();

		ret = (Math.pow((x1 - y1), 2) + Math.pow((x2 - y2), 2) + Math.pow(
				(x3 - y3), 2));

		return ret;
	}

	static double computerCenterDistance(Point[] oldCenter, Point[] newCenter,
			int k) {
		double ret = 0;

		for (int i = 0; i < k; i++) {
			double ox1 = oldCenter[i].getX1();
			double ox2 = oldCenter[i].getX2();
			double ox3 = oldCenter[i].getX3();

			double nx1 = newCenter[i].getX1();
			double nx2 = newCenter[i].getX2();
			double nx3 = newCenter[i].getX3();

			ret += (Math.pow((nx1 - ox1), 2) + Math.pow((nx2 - ox2), 2) + Math
					.pow((nx3 - ox3), 2));
		}

		return ret;
	}

	static class ComputerClass extends Function<String, Point> {
		private Point[] centerPoints;
		private int k;

		public ComputerClass(Point[] points, int k) {
			this.centerPoints = points;
			this.k = k;
		}

		@Override
		public Point call(String line) throws Exception {
			Point p = new Point();
			String[] strs = line.split(" ");
			p.setX1(Double.parseDouble(strs[0]));
			p.setX2(Double.parseDouble(strs[1]));
			p.setX3(Double.parseDouble(strs[2]));

			int type1 = 0;
			double D = 99999;

			for (int j = 0; j < k; j++) {
				double temp1 = computerPointDistance(p, centerPoints[j]);
				if (temp1 < D) {
					D = temp1;
					type1 = j;
				}
			}

			p.setType(type1);
			return p;
		}
	}

	static class Point implements Serializable {
		private static final long serialVersionUID = 1L;
		private int type;
		private double x1;
		private double x2;
		private double x3;

		public int getType() {
			return type;
		}

		public void setType(int type) {
			this.type = type;
		}

		public double getX1() {
			return x1;
		}

		public void setX1(double x1) {
			this.x1 = x1;
		}

		public double getX2() {
			return x2;
		}

		public void setX2(double x2) {
			this.x2 = x2;
		}

		public double getX3() {
			return x3;
		}

		public void setX3(double x3) {
			this.x3 = x3;
		}
	}

}

the testing data as follows:

0.0 0.0 0.0
0.1 0.1 0.1
0.2 0.2 0.2
9.0 9.0 9.0
9.1 9.1 9.1
9.2 9.2 9.2
2 2 2
2.1 2.1 2.1
2.3 2.4 2.6


example 2: spark code use scala

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.serializer.KryoRegistrator
import com.esotericsoftware.kryo.Kryo
import org.apache.spark.rdd._
import org.apache.spark.SparkContext._
import org.apache.spark._
import org.apache.spark.api.java._

class MyRegistrator extends KryoRegistrator {
  override def registerClasses(kryo: Kryo) {
  }
}

object SortTest {
  def main(args: Array[String]): Unit = {
    val arr = new Array[String](2)
    arr(0) = "spark://masterIP:7077"
    arr(1) = "hdfs://nameNodeIP:9000/test/" + args(0)
    val conf = new SparkConf().setMaster(arr(0)).setAppName("sorttest")
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    conf.set("spark.kryo.registrator", "demo.MyRegistrator")
    conf.set("spark.storage.memoryFraction", "0.5")
    conf.setSparkHome(System.getenv("SPARK_HOME"))
    conf.setJars(SparkContext.jarOfClass(this.getClass()))
    val sc = new SparkContext(conf);
    val dataset1:RDD[(Long, String)] = sc.textFile(arr(1), 1000).map(line => (line.split(',')(0).toLong, line))
    val datasets=dataset1.sortByKey(false, 1000);
    val arrdata2:Array[(Long, String)] =datasets.take(100);
    for(i<-0 to arrdata2.length-1)
    {
      println(i+"  "+arrdata2(i)._1,arrdata2(i)._2)
    }
    println(arrdata2.length)
  }
}


 

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
毕业设计,基于SpringBoot+Vue+MySQL开发的精简博客系统,源码+数据库+毕业论文+视频演示 当下,正处于信息化的时代,许多行业顺应时代的变化,结合使用计算机技术向数字化、信息化建设迈进。以前企业对于博客信息的管理和控制,采用人工登记的方式保存相关数据,这种以人力为主的管理模式已然落后。本人结合使用主流的程序开发技术,设计了一款基于Springboot开发的精简博客系统,可以较大地减少人力、财力的损耗,方便相关人员及时更新和保存信息。本系统主要使用B/S开发模式,在idea开发平台上,运用Java语言设计相关的系统功能模块,MySQL数据库管理相关的系统数据信息,SpringBoot框架设计和开发系统功能架构,最后通过使用Tomcat服务器,在浏览器中发布设计的系统,并且完成系统与数据库的交互工作。本文对系统的需求分析、可行性分析、技术支持、功能设计、数据库设计、功能测试等内容做了较为详细的介绍,并且在本文中也展示了系统主要的功能模块设计界面和操作界面,并对其做出了必要的解释说明,方便用户对系统进行操作和使用,以及后期的相关人员对系统进行更新和维护。本系统的实现可以极大地提高企业的工作效率,提升用户的使用体验,因此在现实生活中运用本系统具有很大的使用价值。 关键词:博客管理;Java语言;B/S结构;MySQL数据库
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值