0 前言
🔥这两年开始毕业设计和毕业答辩的要求和难度不断提升,传统的毕设题目缺少创新和亮点,往往达不到毕业答辩的要求,这两年不断有学弟学妹告诉学长自己做的项目系统达不到老师的要求。
为了大家能够顺利以及最少的精力通过毕设,学长分享优质毕业设计项目,今天要分享的是
🚩 大数据医学分析 猴痘疾病数据分析与可视化
🥇学长这里给一个题目综合评分(每项满分5分)
- 难度系数:2分
- 工作量:2分
- 创新点:4分
🧿 选题指导, 项目分享:见文末
1 课题背景
猴痘的全球流行数据可视化
2 数据处理
# 加载包
library(ggplot2)
library(dplyr)
library(maps)
library(viridis)
# 读取数据 数据集来自https://github.com/globaldothealth/monkeypox
# Read worldwide case data
case_series <- read.csv(“/home/mw/input/monkeypot5928/timeseries-country-confirmed.csv”, colClasses = c(“Country” = “character”)) # ,colClasses = c(“Country” = “factor”)
head(case_series)
# 地图
world_map <- map_data(“world”)
head(world_map)
#表示过国家的经纬度
让我们定义一个具有以下特征的函数:
输入:日期、纬度范围、经度范围
输出:所提供日期的累积事例计数地图,以输入的纬度和经度值为界
# 自定义函数
plot_case_map <- function(date, xlim, ylim) {
# Pre-process case and map data
case_map <- case_series[which(case_series
D
a
t
e
=
=
d
a
t
e
)
,
c
(
4
,
3
)
]
c
o
l
n
a
m
e
s
(
c
a
s
e
m
a
p
)
[
1
]
<
−
"
r
e
g
i
o
n
"
c
a
s
e
m
a
p
Date == date), c(4, 3)] colnames(case_map)[1] <- "region" case_map
Date==date),c(4,3)]colnames(casemap)[1]<−"region"casemapregion[which(case_map
r
e
g
i
o
n
=
=
"
U
n
i
t
e
d
S
t
a
t
e
s
"
)
]
<
−
"
U
S
A
"
c
a
s
e
m
a
p
region == "United States")] <- "USA" case_map
region=="UnitedStates")]<−"USA"casemapregion[which(case_map
r
e
g
i
o
n
=
=
"
U
n
i
t
e
d
K
i
n
g
d
o
m
"
)
]
<
−
"
U
K
"
c
a
s
e
m
a
p
region == "United Kingdom")] <- "UK" case_map
region=="UnitedKingdom")]<−"UK"casemapregion[which(case_map
r
e
g
i
o
n
=
=
"
D
e
m
o
c
r
a
t
i
c
R
e
p
u
b
l
i
c
O
f
T
h
e
C
o
n
g
o
"
)
]
<
−
"
D
e
m
o
c
r
a
t
i
c
R
e
p
u
b
l
i
c
o
f
t
h
e
C
o
n
g
o
"
c
a
s
e
m
a
p
region == "Democratic Republic Of The Congo")] <- "Democratic Republic of the Congo" case_map
region=="DemocraticRepublicOfTheCongo")]<−"DemocraticRepublicoftheCongo"casemapregion[which(case_map
r
e
g
i
o
n
=
=
"
B
o
s
n
i
a
A
n
d
H
e
r
z
e
g
o
v
i
n
a
"
)
]
<
−
"
B
o
s
n
i
a
a
n
d
H
e
r
z
e
g
o
v
i
n
a
"
i
f
(
"
G
i
b
r
a
l
t
a
r
"
region == "Bosnia And Herzegovina")] <- "Bosnia and Herzegovina" if ("Gibraltar" %in% case_map
region=="BosniaAndHerzegovina")]<−"BosniaandHerzegovina"if("Gibraltar"region) {
case_map <- case_map[-which(case_mapKaTeX parse error: Expected 'EOF', got '}' at position 34: …ar"), ] }̲ if (len…region, case_mapKaTeX parse error: Expected '}', got 'EOF' at end of input: …tdiff(world_mapregion, case_mapKaTeX parse error: Expected 'EOF', got '}' at position 144: …_other) }̲ case_ma…Cumulative_cases <- as.numeric(case_map$Cumulative_cases)
case_map <- left_join(case_map, world_map, by = “region”)
# Plot case map
ggplot(case_map, aes(long, lat, group = group)) +
geom_polygon(aes(fill = Cumulative_cases), color = “white”, size = 0.2) +
scale_fill_viridis_c() +
theme_linedraw() +
theme(legend.position = “right”) +
labs(fill = “Cumulative cases”) +
theme(legend.direction = “vertical”) +
coord_map(xlim = xlim, ylim = ylim)
}
根据函数绘制地图
3 数据可视化
## 绘制截至2022年7月29日的世界猴痘病例地图:
plot_case_map(“2022-07-29”, c(-180, 180), c(-55, 90))
### 绘制截至2022年5月29日的世界猴痘病例地图
plot_case_map(“2022-05-29”, c(-180, 180), c(-55, 90))
#绘制截至2022年7月29日的猴痘病例地图:
plot_case_map(“2022-07-29”, c(-22, 38), c(35, 64))
#美国的猴痘病例数据来源 https://www.cdc.gov/poxvirus/monkeypox/response/2022/us-map.html
us_case_map<- read.csv(“/home/mw/input/monkeypot5928/Monkeypox.csv”)
head(us_case_map)
us_map <- map_data(“state”)
us_case_map <- us_case_map[-which(us_case_map
S
t
a
t
e
c
o
l
n
a
m
e
s
(
u
s
c
a
s
e
m
a
p
)
[
1
]
<
−
"
r
e
g
i
o
n
"
u
s
c
a
s
e
m
a
p
State %in% c("Alaska", "Hawaii", "Puerto Rico", "Non-US Resident")), -3] colnames(us_case_map)[1] <- "region" us_case_map
Statecolnames(uscasemap)[1]<−"region"uscasemapregion <- tolower(us_case_map
r
e
g
i
o
n
)
i
f
(
l
e
n
g
t
h
(
s
e
t
d
i
f
f
(
u
s
m
a
p
region) if (length(setdiff(us_map
region)if(length(setdiff(usmapregion, us_case_mapKaTeX parse error: Expected '}', got 'EOF' at end of input: …(setdiff(us_mapregion, us_case_mapKaTeX parse error: Expected 'EOF', got '}' at position 139: …ap_other) }̲ us_case_m…Cases <- as.numeric(us_case_map$Cases)
us_case_map <- left_join(us_case_map, us_map, by = “region”)
# Plot US case map
ggplot(us_case_map, aes(long, lat, group = group)) +
geom_polygon(aes(fill = Cases), color = “white”, size = 0.2) +
scale_fill_viridis_c() +
theme_linedraw() +
theme(legend.position = “right”) +
labs(fill = “Total cases”) +
theme(legend.direction = “vertical”)
让我们定义一个函数,该函数将国家/地区名称作为输入并绘制:
x 轴上的日期
左侧 Y 轴上的累积案例计数(红色)
右侧 Y 轴上的每日案例计数(蓝色)
plot_case_series <- function(country) {
# Plot cumulative case counts in red
country_series <- case_series[which(case_series
C
o
u
n
t
r
y
=
=
c
o
u
n
t
r
y
)
,
]
p
a
r
(
o
m
a
=
c
(
1
,
1
,
1
,
3
)
)
p
l
o
t
(
c
o
u
n
t
r
y
s
e
r
i
e
s
Country == country), ] par(oma = c(1, 1, 1, 3)) plot(country_series
Country==country),]par(oma=c(1,1,1,3))plot(countryseriesCumulative_cases, type = “l”, xaxt = “n”, xlab = NA, main = paste(country, “reported case time series”), ylab = “Cumulative cases”, col.lab = “red”, col = “red”)
axis(1, at = 1:nrow(country_series), labels = country_seriesKaTeX parse error: Expected 'EOF', got '#' at position 58: …8) #̲ Plot daily cas…Cases, type = “l”, axes = FALSE, xlab = NA, ylab = NA, col = “blue”)
axis(4, at = pretty(range(country_series$Cases)))
mtext(“Cases”, side = 4, line = 3, col = “blue”)
grid()
}
现在,我们可以使用此函数来探索多个国家/地区的猴痘病例趋势:
加拿大报告病例曲线
plot_case_series(“Canada”)
美国病例曲线
plot_case_series(“United States”)
英国病例曲线
plot_case_series(“United Kingdom”)
最后
**毕设帮助, 选题指导, 项目分享: ** https://gitee.com/yaa-dc/warehouse-1/blob/master/python/README.md