spark 加载数据库mysql表中数据进行分析

1.工程maven依赖包

<properties>
    <spark_version>2.3.1</spark_version>
    <!-- elasticsearch-->
    <elasticsearch.version>5.5.2</elasticsearch.version>
    <fastjson.version>1.2.28</fastjson.version>
    <elasticsearch-hadoop.version>6.3.2</elasticsearch-hadoop.version>
    <elasticsearch-spark.version>5.5.2</elasticsearch-spark.version>
</properties>
<dependencies>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.11</artifactId>
        <version>${spark_version}</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-sql_2.11</artifactId>
        <version>${spark_version}</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-yarn -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-yarn_2.11</artifactId>
        <version>${spark_version}</version>
    </dependency>
    <dependency>
        <groupId>org.elasticsearch</groupId>
        <artifactId>elasticsearch-spark-20_2.11</artifactId>
        <version>${elasticsearch-spark.version}</version>
    </dependency>
    <dependency>
        <groupId>mysql</groupId>
        <artifactId>mysql-connector-java</artifactId>
        <version>5.1.46</version>
    </dependency>
</dependencies>

2.spark加载数据库中数据

public class GoodsFromMySQL {

    /**
     * 加载数据库数据
     *
     * @param sc           spark context
     * @param sparkSession spark session
     */
    public static void loadGoodsInfo(SparkContext sc, SparkSession sparkSession) {
        String url = "jdbc:mysql://x.x.x.x:3306/db-test";

        String sql = "(SELECT item_name as itemName, goods_category as goodsCategory FROM goods where dict_type='100203' and item_name " +
                "is not null) as my-goods";

        SQLContext sqlContext = SQLContext.getOrCreate(sc);
        DataFrameReader reader = sqlContext.read().format("jdbc").
                option("url", url).option("dbtable", sql).
                option("driver", "com.mysql.jdbc.Driver").
                option("user", "root").
                option("password", "xxxxx");


        Dataset<Row> goodsDataSet = reader.load();

        // Looks the schema of this DataFrame.
        goodsDataSet.printSchema();

        goodsDataSet.write().mode(SaveMode.Overwrite).json("/data/app/source_new.json");
    }


    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setAppName("my-app");
        SparkContext sc = new SparkContext(conf);

        SparkSession sparkSession = new SparkSession(sc);

        loadGoodsInfo(sc, sparkSession);
    }
}

3.spark支持加载多种数据库,仅需要用户依赖不同的数据库驱动包,并且代码进行微调即可

  根据以上java代码,仅需调整18行,更改驱动加载类即可。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值