分析
数据库的选型
创建数据库
create database spark_gmall_report
CREATE TABLE `offset` (
`group_id` varchar(200) NOT NULL,
`topic` varchar(200) NOT NULL,
`partition_id` int(11) NOT NULL,
`topic_offset` bigint(20) DEFAULT NULL,
PRIMARY KEY (`group_id`,`topic`,`partition_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8
CREATE TABLE `spu_order_final_detail_amount_stat` ( stat_time datetime ,spu_id varchar(20) ,spu_name varchar(200),amount decimal(16,2) ,
PRIMARY KEY (`stat_time`,`spu_id`,`spu_name`)
)ENGINE=InnoDB DEFAULT CHARSET=utf8
增加配置application.conf
在spark-gmall-dw-realtime\src\main\resources\application.conf
db.default.driver="com.mysql.jdbc.Driver"
db.default.url="jdbc:mysql://hadoop2/spark_gmall_report?characterEncoding=utf-8&useSSL=false"
db.default.user="root"
db.default.password="000000"
POM
此处引用了一个 scala的MySQL工具:scalikeJdbc
配置文件: 默认使用 application.conf
<properties>
<spark.version>2.4.0</spark.version>
<scala.version>2.11.8</scala.version>
<kafka.version>1.0.0</kafka.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.56</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>${
spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>${
spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>