DeepSeek最新版来了 ! 实测统计分析直接出三线表

3月24日晚间,DeepSeek上线了小版本更新后的DeepSeek-V3模型。新模型的版本号为DeepSeek-V3-0324,官方也发布了通知。

据各大测评来看,在性能上,编程能力的优化成了最大亮点,新版本DeepSeek-V3生成前端代码的能力大大提升。

老郑比较好奇的是,在统计分析中,代码生成能力怎么样,和RI比呢?来测试一下!

我分别用DeepSeek-V3-0324和DeepSeek-R1版本,给我写一段logistic回归多模型策略的R语言函数,我提供了示例数据以及供它参考的多模型策略word表格,看看两个版本生成的R语言函数哪个更准确,更加方便!

1

DeepSeek-V3-0324

DeepSeek官网进入后,关闭深度思考,即可体验DeepSeek-V3-0324。输入指令,上传示例数据和参考表格。

以下是DeepSeek-V3-0324版本给我的R语言函数!

library(tidyverse)
library(broom)
library(flextable)
library(officer)
library(gtsummary)


perform_enhanced_logistic_analysis <- function(data, outcome_var, exposure_var, 
                                             adj_vars_model2, adj_vars_model3,
                                             output_file = "logistic_results.docx")
{
  
  # 数据预处理
  data <- data %>%
    mutate(across(where(is.character), as.factor)) %>%
    mutate(across(where(is.factor), ~fct_explicit_na(.x, na_level = "Missing")))
  
  # 确保结果变量是因子且正确编码
  data[[outcome_var]] <- as.factor(data[[outcome_var]])
  if (!all(levels(data[[outcome_var]]) %in% c("0", "1"))) {
    data[[outcome_var]] <- factor(data[[outcome_var]], levels = c(0, 1))
  }
  
  # 模型1: 粗模型(仅暴露变量)
  formula1 <- as.formula(paste(outcome_var, "~", exposure_var))
  model1 <- glm(formula1, data = data, family = binomial())
  
  # 模型2: 调整模型(X1,X2,X3)
  formula2 <- as.formula(paste(outcome_var, "~", exposure_var, "+", 
                              paste(adj_vars_model2, collapse = "+")))
  model2 <- glm(formula2, data = data, family = binomial())
  
  # 模型3: 完全调整模型(所有协变量)
  formula3 <- as.formula(paste(outcome_var, "~", exposure_var, "+", 
                              paste(adj_vars_model3, collapse = "+")))
  model3 <- glm(formula3, data = data, family = binomial())
  
  # 提取模型结果的函数
  extract_model_results <- function(model, var_name) {
    tidy_results <- broom::tidy(model, conf.int = TRUE, exponentiate = TRUE)
    var_results <- tidy_results %>% filter(term == var_name)
    
    if (nrow(var_results) == 1) {
      return(list(
        OR = sprintf("%.2f", var_results$estimate),
        CI = sprintf("%.2f (%.2f-%.2f)", 
                    var_results$estimate,
                    var_results$conf.low,
                    var_results$conf.high),
        P = ifelse(var_results$p.value < 0.001, "<0.001", 
                  sprintf("%.3f", var_results$p.value))
      ))
    } else {
      return(list(OR = NA, CI = NA, P = NA))
    }
  }
  
  # 获取每个模型的结果
  res1 <- extract_model_results(model1, exposure_var)
  res2 <- extract_model_results(model2, exposure_var)
  res3 <- extract_model_results(model3, exposure_var)
  
  # 创建结果数据框
  result_df <- tibble(
    Variables = exposure_var,
    Model1_OR = res1$OR,
    Model1_CI = res1$CI,
    Model1_P = res1$P,
    Model2_OR = res2$OR,
    Model2_CI = res2$CI,
    Model2_P = res2$P,
    Model3_OR = res3$OR,
    Model3_CI = res3$CI,
    Model3_P = res3$P
  )
  
  # 创建专业表格
  ft <- flextable(result_df) %>%
    set_header_labels(
      Variables = "Variables",
      Model1_OR = "OR", Model1_CI = "95% CI", Model1_P = "P*",
      Model2_OR = "OR", Model2_CI = "95% CI", Model2_P = "P*",
      Model3_OR = "OR", Model3_CI = "95% CI", Model3_P = "P*"
    ) %>%
    add_header_row(
      values = c("", "Model 1", "Model 2", "Model 3"),
      colwidths = c(1, 3, 3, 3)
    ) %>%
    theme_booktabs() %>%
    align(align = "center", part = "all") %>%
    fontsize(size = 10, part = "all") %>%
    bold(part = "header") %>%
    autofit()
  
  # 创建Word文档
  doc <- read_docx() %>%
    body_add_par("Logistic回归多模型策略", style = "heading 1") %>%
    body_add_flextable(ft) %>%
    body_add_par("OR: Odds Ratio; CI: Confidence Interval", style = "Normal") %>%
    body_add_par("Model 1: Crude model (unadjusted)", style = "Normal") %>%
    body_add_par(paste("Model 2: Adjusted for", paste(adj_vars_model2, collapse = ", ")), 
                style = "Normal") %>%
    body_add_par(paste("Model 3: Fully adjusted for", 
                      paste(adj_vars_model3, collapse = ", ")), 
                style = "Normal")
  
  # 保存文档
  print(doc, target = output_file)
  
  # 返回结果
  return(list(
    models = list(model1 = model1, model2 = model2, model3 = model3),
    results = result_df,
    table = ft
  ))
}


# 使用示例
# 读取数据
data <- read.csv("示例数据.csv")


# 定义变量
outcome_var <- "outcome"
exposure_var <- "V_D"
adj_vars_model2 <- c("X1", "X2", "X3")
adj_vars_model3 <- c("X1", "X2", "X3", "X4", "X5", "X6", "X7", "X8", "X9", "X10", "X11")


# 执行分析并生成Word文档
results <- perform_enhanced_logistic_analysis(
  data = data,
  outcome_var = outcome_var,
  exposure_var = exposure_var,
  adj_vars_model2 = adj_vars_model2,
  adj_vars_model3 = adj_vars_model3,
  output_file = "logistic_regression_results.docx"
)

复制到R语言中,直接跑出了word文档,结果如下:

2

DeepSeek-R1

同样的指令和材料输入到DeepSeek-R1版本。

也给了我一段R语言函数

library(tidyverse)
library(flextable)
library(officer)


create_logistic_models <- function(data, outcome_var) {
  # 数据预处理
  df <- data %>%
    mutate(across(c(X5, X6, X7, X8, X9, X10, X11), as.factor))
  
  # 定义模型公式
  formula_m1 <- as.formula(paste(outcome_var, "~ V_D"))
  formula_m2 <- as.formula(paste(outcome_var, "~ V_D + X1 + X2 + X3"))
  formula_m3 <- as.formula(paste(outcome_var, "~ V_D + X5 + X6 + X7 + X8 + X9 + X10 + X11 + X1 + X2 + X3 + X4"))
  
  # 拟合模型
  m1 <- glm(formula_m1, family = binomial(), data = df)
  m2 <- glm(formula_m2, family = binomial(), data = df)
  m3 <- glm(formula_m3, family = binomial(), data = df)
  
  # 结果提取函数
  extract_results <- function(model) {
    coefs <- coef(summary(model))[-1, ]  # 排除截距
    or <- exp(coefs[, "Estimate"])
    ci <- exp(confint(model)[-1, ])
    p_value <- coefs[, "Pr(>|z|)"]
    
    data.frame(
      OR = sprintf("%.2f", or),
      CI = sprintf("(%.2f-%.2f)", ci[,1], ci[,2]),
      P = ifelse(p_value < 0.001, "<0.001", sprintf("%.3f", p_value))
  }
  
  # 提取各模型结果
  res_m1 <- extract_results(m1)["V_D", ]
  res_m2 <- extract_results(m2)["V_D", ]
  res_m3 <- extract_results(m3)["V_D", ]
  
  # 构建结果表格
  result_table <- data.frame(
    Variables = "V_D",
    Model1 = paste(res_m1$OR, res_m1$CI, res_m1$P, sep = "\n"),
    Model2 = paste(res_m2$OR, res_m2$CI, res_m2$P, sep = "\n"),
    Model3 = paste(res_m3$OR, res_m3$CI, res_m3$P, sep = "\n")
  )
  
  # 创建Word表格
  ft <- flextable(result_table) %>%
    set_header_labels(Variables = "Variables",
                     Model1 = "Model1\nOR (95%CI)   P",
                     Model2 = "Model2\nOR (95%CI)   P",
                     Model3 = "Model3\nOR (95%CI)   P") %>%
    theme_booktabs() %>%
    autofit()
  
  # 添加表格注释
  ft <- add_footer_lines(ft, values = c(
    "OR: Odds Ratio, CI: Confidence Interval",
    "Model1: Crude model",
    "Model2: Adjusted for X1, X2, X3",
    "Model3: Adjusted for X5-X11 and X1-X4"))
  
  # 输出Word文档
  save_as_docx(ft, path = "Logistic_Models_Table.docx")
  
  return(ft)
}


# 使用示例
data <- read.csv("示例数据.csv")
create_logistic_models(data, "outcome")

但是他报错了。

于是我又追问。

DeepSeek-R1重新提供了一段代码,我跑出了想要的结果,但可能追问时没有说清楚,所以没有导出至word。

3

Zstats风暴统计

输出的结果是否正确呢?我们来Zstats风暴统计平台核实一下。

(1)复制以下链接到浏览器打开2025版Zstats风暴统计平台,Logistic回归控制混杂偏倚界面。

logistic回归控制混杂:http://zstats.medsta.cn/logisticcon/

(2)导入示例数据

(3)选入相应的变量,点击分析即可

多模型策略三线表已经给出!

首先,可以看到,结果上,无论是DeepSeek-V3-0324还是DeepSeek-R1,都没有问题;

其次,就体验感而言,貌似V3更胜一筹,但这也是我一次的体验,哈哈哈,大家可以也试试;

还有就是,追问V3不会轻易卡顿,R1虽然没有以前那样只能问一次,但是我再想追问时已经服务器繁忙了!

真是太智能了,这段函数自己写可能要花个一周时间,借助DeepSeek半小时搞定!

图片

图片

郑重声明

Zstats-AI 平台

√浙中医大统计老师郑卫军主持

√ 基于R语言软件开发

√ 免费使用,无需注册直接使用

√ 一键生成发表级图表

www.medsta.cn/software

(电脑端浏览器打开)

有人问,郑老师,免费你怎么赚钱?郑老师不是有培训课程嘛?我就打打培训广告,你们这个总没有意见吧?

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值