西安工程大学本科毕业设计(论文)开题报告
学院:计算机科学学院 专业: 填表时间:2023年3月28日
姓 名 | 班级 | 学号 | |||
题 目 | 基于大数据的地铁交通客流量预测系统的设计与实现 | ||||
选题的意义: 随着城市化进程的加快和人口的不断增长,地铁作为城市主要的一种主要交通工具,凭着其准时性、抗灾性等优越性能,越来越受到广大乘客的青睐。但是由于地铁站有着特殊的运营环境,其内的空间有限,客容量也就随之受到限制,因此,对进入地铁站点的乘客来说,在节假日、上下班高峰、气候恶劣变化、周围出现突发事件等情况下,过多的乘客涌入地铁站内,不仅对会乘客的候车时间造成延误,而且还有可能对乘客在地铁站内的安全造成威胁。所以合理预测地铁客流量对于优化地铁运营、提高乘客出行体验具有重要意义。客流量预测系统可以为地铁公司提供决策支持,以便灵活调整运营策略和资源配置,从而实现更高效的地铁运营管理。本设计将以地铁为研究对象,设计和实现基于大数据的的地铁客流量预测系统,旨在提升地铁运营效率,改善乘客出行体验。 | |||||
研究综述: 现如今,中长期客流预测技术在我国的宏观调控中起着举足轻重的作用。但随着研究的深入发展,中长期客流预测技术发展已经趋于平缓、很难在短时间再取得技术上的重大 突破,越来越多的学者由此开始把注意力放到了短时客流预测上。与中长期客流预测相比,短期客流预测具有更短的时间粒度、更庞大的数据量和更多的细节需求,且预测模型方法尚不完善,相关研究成果较少,大多数都是针对某一具体路段的车流量进行预测。如今随着数据时代的到来,出行数据的收集与处理越来越简单并专业化,大量的数据使城市轨道交通的短期客流预测逐渐增多。 | |||||
论文(设计)写作提纲: 本文主要对地铁数据,进行分析,系统技术主要功能有:
用户查看可视化效果,而管理员在后端负责管理,理员功能包含,个人信息,地铁数据,其中地铁数据包含,出行高峰期的10个时间段,地铁限流的10个时间段,地铁限流的前10个站点,人流量随时间的变化趋势预测预测分析,用户管理。用户登录注册,用户查看地铁数据,包含地铁站点数据,地铁热词词云图展示,地铁限流浪饼型图分析前10个站点,地铁限流柱状图分析10个时间段。 |
研究工作进度安排: (根据学校安排自行修改) 1、查阅资料,拟定写作大纲,完成研究内容、现状、方法的研究等,提交开题报告; 2、基本完成毕业设计及毕业论文草稿的撰写; 3、提交中期检查相关资料,参加中期检查; 4、修改完善毕业设计,完成毕业设计和论文定稿的撰写; 5、提交答辩申请,参加答辩; 6、提交论文最终稿,打印装订论文,整理并上交全套毕业论文(设计)资料。 |
参考文献目录:
|
指导教师意见: 签名: 2023年3月 29日 |
教研室主任意见: 签名: 2023年3 月29日 |
核心算法代码分享如下:
package com.metroData;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
/**
* 数据查询语言DQL,select操作
* 数据操纵语言DML,insert delete update操作
* 数据定义语言DDL,创建表、视图、索引等操作
* 数据控制语言DCL,赋权、回滚、提交等操作
*/
// 先有原始数据Source 对数据进行处理 trans 将处理好的数据发送到mysql sink
public class RealtimeAnalysis {
public static void main(String[] args) throws InterruptedException {
// 执行环境
EnvironmentSettings settings = EnvironmentSettings.newInstance().inStreamingMode().build();
TableEnvironment tableEnv= TableEnvironment.create(settings);
// FlinkSQL 数据Sink到Mysql时,非空存在null值,插入mysql报错,配置下面的参数进行解决:
Configuration configuration = tableEnv.getConfig().getConfiguration();
configuration.setString("table.exec.sink.not-null-enforcer","drop");
// 创建source表
String kafkaSource = "CREATE TABLE MetroSwipingData (\n" +
" card_no VARCHAR,\n" +
" deal_date VARCHAR,\n" +
" deal_type VARCHAR,\n" +
" deal_money DECIMAL(16,2),\n" +
" deal_value DECIMAL(16,2),\n" +
" equ_no VARCHAR,\n" +
" company_name VARCHAR,\n" +
" station VARCHAR,\n" +
" car_no VARCHAR,\n" +
" conn_mark VARCHAR,\n" +
" close_date VARCHAR\n" +
") WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'MetroSwipingData',\n" +
" 'properties.bootstrap.servers' = 'bigdata:9092',\n" +
" 'properties.group.id' = 'testGroup123',\n" +
" 'scan.startup.mode' = 'earliest-offset',\n" +
" 'format' = 'json',\n" +
" 'json.fail-on-missing-field' = 'false',\n" +
" 'json.ignore-parse-errors' = 'true'\n" +
")";
// sink测试打印到控制台
String sinkPrint = "CREATE TABLE sinkPrint (\n" +
" station STRING,\n" +
" amount DECIMAL(16,2)\n" +
") WITH (\n" +
" 'connector' = 'print'\n" +
")";
// 创建Sink表,连接到MySQL,每个站的累计营收额
String sink_station_amount = "CREATE TABLE station_amount (\n" +
" station STRING,\n" +
" amount DECIMAL(16,2),\n" +
" PRIMARY KEY (station) NOT ENFORCED\n" +
") WITH (\n" +
" 'connector' = 'jdbc',\n" +
" 'url' = 'jdbc:mysql://bigdata:3306/report?useSSL=false&useUnicode=true&characterEncoding=utf-8',\n" +
" 'table-name' = 'station_amount',\n" +
" 'username' = 'root',\n" +
" 'password' = '123456',\n" +
" 'connection.max-retry-timeout' = '60s',\n" +
" 'sink.max-retries' = '3',\n" +
" 'lookup.max-retries' = '3'\n" +
")";
// 创建Sink表,连接到MySQL,每个站的出入站人数
String sink_station_peopleNum = "CREATE TABLE station_peopleNum ( \n" +
"station STRING, \n" +
"enterNum INT, \n" +
"outNum INT, \n" +
"PRIMARY KEY (station) NOT ENFORCED \n" +
") WITH ( \n" +
"'connector' = 'jdbc', \n" +
"'url' = 'jdbc:mysql://bigdata:3306/report?useSSL=false&useUnicode=true&characterEncoding=utf-8', \n" +
"'table-name' = 'station_peopleNum', \n" +
"'username' = 'root', \n" +
"'password' = '123456', \n" +
"'connection.max-retry-timeout' = '60s', \n" +
"'sink.max-retries' = '3', \n" +
"'lookup.max-retries' = '3' \n" +
")";
// 创建Sink表,连接到MySQL,每条线路的营收额
String sink_line_amount = "CREATE TABLE line_amount ( \n" +
"line STRING, \n" +
"amount DECIMAL(16,2),\n" +
"PRIMARY KEY (line) NOT ENFORCED \n" +
") WITH ( \n" +
"'connector' = 'jdbc', \n" +
"'url' = 'jdbc:mysql://bigdata:3306/report?useSSL=false&useUnicode=true&characterEncoding=utf-8', \n" +
"'table-name' = 'line_amount', \n" +
"'username' = 'root', \n" +
"'password' = '123456', \n" +
"'connection.max-retry-timeout' = '60s', \n" +
"'sink.max-retries' = '3', \n" +
"'lookup.max-retries' = '3' \n" +
")";
// 创建Sink表,连接到MySQL,每天的客运量
// String sink_data_peopleNum = "CREATE TABLE data_peopleNum ( \n" +
// "dt STRING, \n" +
// "peopleNum BIGINT,\n" +
// "PRIMARY KEY (dt) NOT ENFORCED \n" +
// ") WITH ( \n" +
// "'connector' = 'jdbc', \n" +
// "'url' = 'jdbc:mysql://bigdata:3306/report?useSSL=false&useUnicode=true&characterEncoding=utf-8', \n" +
// "'table-name' = 'data_peopleNum', \n" +
// "'username' = 'root', \n" +
// "'password' = '123456', \n" +
// "'connection.max-retry-timeout' = '60s', \n" +
// "'sink.max-retries' = '3', \n" +
// "'lookup.max-retries' = '3' \n" +
// ")";
// 创建Sink表,连接到MySQL,累计营收额
String sink_total_amount = "CREATE TABLE total_amount ( \n" +
"id INT,\n" +
"total_amount DECIMAL(16,2),\n" +
"PRIMARY KEY (id) NOT ENFORCED \n" +
") WITH ( \n" +
"'connector' = 'jdbc', \n" +
"'url' = 'jdbc:mysql://bigdata:3306/report?useSSL=false&useUnicode=true&characterEncoding=utf-8', \n" +
"'table-name' = 'total_amount', \n" +
"'username' = 'root', \n" +
"'password' = '123456', \n" +
"'connection.max-retry-timeout' = '60s', \n" +
"'sink.max-retries' = '3', \n" +
"'lookup.max-retries' = '3' \n" +
")";
// 创建Sink表,连接到MySQL,累计实收额
String sink_real_amount = "CREATE TABLE real_amount ( \n" +
"id INT,\n" +
"real_amount DECIMAL(16,2),\n" +
"PRIMARY KEY (id) NOT ENFORCED \n" +
") WITH ( \n" +
"'connector' = 'jdbc', \n" +
"'url' = 'jdbc:mysql://bigdata:3306/report?useSSL=false&useUnicode=true&characterEncoding=utf-8', \n" +
"'table-name' = 'real_amount', \n" +
"'username' = 'root', \n" +
"'password' = '123456', \n" +
"'connection.max-retry-timeout' = '60s', \n" +
"'sink.max-retries' = '3', \n" +
"'lookup.max-retries' = '3' \n" +
")";
// 创建Sink表,连接到MySQL,累计运营总人次
String sink_people_cnt = "CREATE TABLE people_cnt ( \n" +
"id INT,\n" +
"people_cnt BIGINT,\n" +
"PRIMARY KEY (id) NOT ENFORCED \n" +
") WITH ( \n" +
"'connector' = 'jdbc', \n" +
"'url' = 'jdbc:mysql://bigdata:3306/report?useSSL=false&useUnicode=true&characterEncoding=utf-8', \n" +
"'table-name' = 'people_cnt', \n" +
"'username' = 'root', \n" +
"'password' = '123456', \n" +
"'connection.max-retry-timeout' = '60s', \n" +
"'sink.max-retries' = '3', \n" +
"'lookup.max-retries' = '3' \n" +
")";
// 每个站的累计营收额
String station_amount = "insert into station_amount select station,sum(deal_money) from MetroSwipingData group by station";
// 每个站点的累计出入站人数
String station_peopleNum = "insert into station_peopleNum select \n" +
"station,\n" +
"sum(case when deal_type='地铁入站' then 1 else 0 end)as enterNum,\n" +
"sum(case when deal_type='地铁出站' then 1 else 0 end)as outNum\n" +
"from MetroSwipingData group by station";
// 每条线路的营业额
String line_amount = "insert into line_amount select company_name,sum(deal_value) as amount from MetroSwipingData group by company_name";
// 每天客流量 sink
// String data_peopleNum = "insert into data_peopleNum select close_date,count(DISTINCT card_no) as peopleNum from MetroSwipingData group by close_date";
// 累计营收额 trans
String total_amount = "insert into total_amount select 1 as id, sum(deal_value) as total_amount from MetroSwipingData";
// 累计实收额
String real_amount = "insert into real_amount select 1 as id, sum(deal_money) as real_amount from MetroSwipingData";
// 累计总客运量
String people_cnt = "insert into people_cnt select 1 as id, count(1) as people_cnt from MetroSwipingData where deal_type='地铁入站'";
// 开始启动
System.out.println("========================================== Flink任务开始启动... ==========================================");
// 创建source表
System.out.println("开始创建source表 [MetroSwipingData]...");
tableEnv.executeSql(kafkaSource);
System.out.println("source表 [MetroSwipingData] 创建成功...");
// 创建sink表
System.out.println("开始创建sink表 [station_amount]...");
tableEnv.executeSql(sink_station_amount);
tableEnv.executeSql(sink_station_peopleNum);
tableEnv.executeSql(sink_line_amount);
// tableEnv.executeSql(sink_data_peopleNum);
tableEnv.executeSql(sink_total_amount);
tableEnv.executeSql(sink_real_amount);
tableEnv.executeSql(sink_people_cnt);
System.out.println("sink表 [station_amount | station_peopleNum | station_peopleNum | line_amount | total_amount | real_amount | people_cnt] 创建成功...");
// 开始执行计算逻辑
System.out.println("开始执行逻辑操作");
tableEnv.executeSql(station_amount);
tableEnv.executeSql(station_peopleNum);
tableEnv.executeSql(line_amount);
// tableEnv.executeSql(data_peopleNum);
tableEnv.executeSql(total_amount);
tableEnv.executeSql(real_amount);
tableEnv.executeSql(people_cnt);
System.out.println("========================================== Flink任务启动成功... ==========================================");
}
}