Spark---Datasource(JDBC)---java

package com.spark.sparksql.datasource.java;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

import scala.Tuple2;

public class JDBCDataSource {

    /*
     * ./bin/spark-submit --master spark://spark001:7077 --class com.spark.sparksql.sql.JDBCDataSource 
     * --driver-class-path ./lib/mysql-connector-java-5.1.32-bin.jar 
     * --jars ./lib/mysql-connector-java-5.1.32-bin.jar  
     * sparksqljdbcdatasource.jar 
     * 
     * 如果使用 standalone cluster模式来运行,配置spark-env.sh里面的
     * export SPARK_CLASSPATH=/usr/hadoopsoft/spark-1.6.0-bin-hadoop2.4/lib/mysql-connector-java-5.1.8-bin.jar
     * ./bin/spark-submit --master spark://node21:7077 --deploy-mode cluster --class com.spark.sparksql.sql.JDBCDataSource 
     * hdfs://node22:8020/sparksql20170807.jar 
     * 
     * ./bin/spark-submit --master yarn-client --class com.spark.sparksql.sql.JDBCDataSource ./sparksql20170807.jar
     * 
     * 在conf/spark-defaults.conf里面配置下面两行
     * spark.driver.extraClassPath=/usr/hadoopsoft/spark-1.6.0-bin-hadoop2.4/lib/mysql-connector-java-5.1.8-bin.jar
     * spark.executor.extraClassPath=/usr/hadoopsoft/spark-1.6.0-bin-hadoop2.4/lib/mysql-connector-java-5.1.8-bin.jar
     * ./bin/spark-submit --master yarn-cluster --class com.spark.sparksql.sql.JDBCDataSource ./sparksql20161101.jar 
     */

    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("JDBCDataSource");
        JavaSparkContext sc = new JavaSparkContext(conf);
        SQLContext sqlContext = new SQLContext(sc);

        Map<String,String> options = new HashMap<String,String>();
        options.put("url", "jdbc:mysql://node15:3306/test");
        options.put("dbtable", "student_infos");
        options.put("user","root");
        options.put("password", "123123");
        DataFrame studentInfosDF = sqlContext.read().format("jdbc").options(options).load();

        options.put("dbtable", "student_scores");
        DataFrame studentScoresDF = sqlContext.read().format("jdbc").options(options).load();

        // 我们将两个DataFrame转换成JavaPairRDD,进行join操作
        JavaPairRDD<String, Tuple2<Integer, Integer>> studentsRDD = studentInfosDF.javaRDD().mapToPair(new PairFunction<Row, String, Integer>() {

            private static final long serialVersionUID = 1L;

            @Override
            public Tuple2<String, Integer> call(Row row) throws Exception {
                return new Tuple2<String,Integer>(row.getString(0)
                        , Integer.valueOf(String.valueOf(row.get(1))));
            }
        }).join(studentScoresDF.javaRDD().mapToPair(new PairFunction<Row, String, Integer>() {

            private static final long serialVersionUID = 1L;

            @Override
            public Tuple2<String, Integer> call(Row row) throws Exception {
                return new Tuple2<String,Integer>(row.getString(0)
                        , Integer.valueOf(String.valueOf(row.get(1))));
            }
        }));

        // 过滤
        JavaRDD<Row> studentsRowRDD = studentsRDD.map(new Function<Tuple2<String,Tuple2<Integer,Integer>>, Row>() {

            private static final long serialVersionUID = 1L;

            @Override
            public Row call(Tuple2<String, Tuple2<Integer, Integer>> tuple)
                    throws Exception {
                return RowFactory.create(tuple._1, tuple._2._1, tuple._2._2);
            }
        }).filter(new Function<Row, Boolean>() {

            private static final long serialVersionUID = 1L;

            @Override
            public Boolean call(Row row) throws Exception {
                if(row.getInt(2)>80){
                    return true;
                }
                return false;
            }
        });

        List<StructField> structFileds = new ArrayList<StructField>();
        structFileds.add(DataTypes.createStructField("name", DataTypes.StringType, true));
        structFileds.add(DataTypes.createStructField("age", DataTypes.IntegerType, true));
        structFileds.add(DataTypes.createStructField("score", DataTypes.IntegerType, true));
        StructType schema = DataTypes.createStructType(structFileds);

        DataFrame studentsDF = sqlContext.createDataFrame(studentsRowRDD, schema);
        Row[] rows = studentsDF.collect();
        for(Row row : rows){
            System.out.println(row);
        }

        // 将DataFrame数据保存到MySQL表中——foreachpartition
        // 这种方式在公司里面会很常用的!有可能插入MySQL,也有可能插入Redis或者HBase都是有可能的!
        studentsDF.javaRDD().foreach(new VoidFunction<Row>() {

            private static final long serialVersionUID = 1L;

            @Override
            public void call(Row row) throws Exception {
                String sql = "insert into good_student_infos values("
                        + "'" + row.getString(0) + "',"
                        + Integer.valueOf(String.valueOf(row.get(1))) + ","
                        + Integer.valueOf(String.valueOf(row.get(2))) + ")" ;
                Class.forName("com.mysql.jdbc.Driver");
                Connection conn = null;
                Statement stat = null;
                try {
                    conn = DriverManager.getConnection("jdbc:mysql://node15:3306/test","root","123123");
                    stat = conn.createStatement();
                    stat.executeUpdate(sql);
                } catch (Exception e) {
                    e.printStackTrace();
                }finally{
                    if(stat != null){
                        stat.close();
                    }
                    if(conn != null){
                        conn.close();
                    }
                }
            }
        });
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值