R Programming - Hospital Compare

最新推荐文章于 2024-07-14 18:37:02 发布

skyCeleste.x

最新推荐文章于 2024-07-14 18:37:02 发布

阅读量65

点赞数

分类专栏： R Programming 文章标签： r语言开发语言 python

本文链接：https://blog.csdn.net/jeonghin/article/details/124919793

版权

R Programming 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

Unzip file

> library(utils)
> unzip("rprog-data-ProgAssignment3-data.zip")

Finding the best hospital in a state

best <- function(state, outcome){
  ## state is the 2-character abbreviated name
  ## outcome is the outcome name
  
  ## read outcome data
  file <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
  
  outcomevector <- c("heart attack", "heart failure", "pneumonia")
  ## check that state and outcome are valid
  statename <- unique(c(file[, "State"]))
  
  if (!state %in% statename) 
    stop("invalid state") # stop:停止执行当前表达式，并且输出函数中的字符
  if (!outcome %in% outcomevector)
    stop("invalid outcome")
  
  # return the specific column correspond to the outcome
  ## 通过数字提取列
  hospitalrate <- c(11,17,23) 
  hospitalratecol <- hospitalrate[match(outcome, outcomevector)] 
  
  ## return hospital name in that state with lowest 30-day death
  df1 <- file[file$State==state, ]
  hospital <- which.min(as.numeric(na.omit(df1[, hospitalratecol]))) # which.min 提取最小值所在行
  df1[hospital, "Hospital.Name"]
}

match: 匹配两个向量，返回x中存在的返回索引或TRUE、FALSE

> x <- colnames(diamonds)[1:5]
> x
[1] "carat"   "cut"     "color"   "clarity" "depth"  
> y <- colnames(diamonds)[1:10]
> y
 [1] "carat"   "cut"     "color"   "clarity" "depth"   "table"   "price"   "x"       "y"      
[10] "z"      
  
1.使用match函数找出x中每个元素在y中的位置
> match(x,y)
[1] 1 2 3 4 5

Ranking hospitals by outcome in a state

rankhospital <- function(state, outcome, num) {
  ## state is the 2-character abbreviated name of a state
  ## num is the ranking of a hospital in that state for that outcome
  ## Read outcome data
  file <- read.csv("outcome-of-care-measures.csv", colClasses = "character", na.strings = "Not Available" )
  namesvector <- names(file)
  outcomevector <- c("heart attack", "heart failure", "pneumonia")
  ## check that state and outcome are valid
  statename <- unique(c(file[, "State"]))
  
  if (!state %in% statename) 
    stop("invalid state") # stop:停止执行当前表达式，并且输出函数中的字符
  if (!outcome %in% outcomevector)
    stop("invalid outcome")
  
  # return the specific column correspond to the outcome
  ## 通过数字提取列
  hospitalrate <- c(11,17,23) 
  hospitalratecol <- hospitalrate[match(outcome, outcomevector)] 
  Rate <- namesvector[hospitalratecol]
  ## return hospital name in that state with lowest 30-day death
  col <- c("Hospital.Name", Rate)
  df1 <- file[file$State==state, col]
    
  ## Return hospital name in that state with the given rank
  ## 30-day death rate
  ordereddf <- df1[order(as.numeric(df1[, 2]), df1[,1], na.last = NA, decreasing = FALSE), ]
  
  ## remove the NA, so the num should change 
  ## else if not elif
  if (num == "worst"){
    num <- nrow(ordereddf)
  } else if (num == "best"){
    num <- 1
  } else{
    num <- num
  }
  
  ordereddf[num, 1]
}

Ranking hospitals in all states

return a specific hospital name for each stae

rankall <- function(outcome, num = 'best'){
  
  ## num is a hospital ranking
  ## Read outcome data
  file <- read.csv("outcome-of-care-measures.csv", colClasses = "character", na.strings = "Not Available" )
  outcomevector <- c("heart attack", "heart failure", "pneumonia")
  namesvector <- names(file)
  ## Check that state and outcome are valid
  statename <- sort(unique(c(file[, "State"])))
  if (!outcome %in% outcomevector)
    stop("invalid outcome")
  
  # return the specific column correspond to the outcome
  ## 通过数字提取列
  hospitalrate <- c(11,17,23) 
  hospitalratecol <- hospitalrate[match(outcome, outcomevector)] 
  Rate <- namesvector[hospitalratecol]
  
  ## return hospital name in that state with lowest 30-day death
  df1 <- file[, c("Hospital.Name", 'State', Rate)]
  colnames(df1)[3] <- 'Rate'
  
  ## create a new dataframe
  df2 <- data.frame()
  for (state in statename) {
    ## create a new dataframe for each state
    dfstate <- subset(df1, State == state)
    ## order the dataframe by rate and then hospital name
    ## For each state, find the hospital of the given rank
    dfstate <- dfstate[order(dfstate$Rate, dfstate$Hospital.Name, decreasing = FALSE), ]
    if (num == "worst"){
      num <- nrow(dfstate)
    } else if (num == "best"){
      num <- 1
    } else{
      num <- num
    }
    ## get the hospital name
    hospitalname <- dfstate[num,"Hospital.Name"]
    ## craete a new row with hospital name and its state
    newdf <- data.frame(hospitalname, state)
    ## colbind to the empty datafra
    df2 <- rbind(newdf, df2)
  }
    df2 <- df2[order(df2$state, decreasing = FALSE), ]
    df2
  
}

subset

subset函数，从某一个数据框中选择出符合某条件的数据或是相关的列

another solution (unfinished)

using split and lapply, to be continued…,

rankall <- function(outcome, num = 'best'){
  ## read the data
  ## num is a hospital ranking
  ## Read outcome data
  file <- read.csv("outcome-of-care-measures.csv", colClasses = "character", na.strings = "Not Available" )
  outcomevector <- c("heart attack", "heart failure", "pneumonia")
  namesvector <- names(file)
  ## Check that state and outcome are valid
  statename <- unique(c(file[, "State"]))
  if (!outcome %in% outcomevector)
    stop("invalid outcome")
  
  ## subset to three columns
  # return the specific column correspond to the outcome
  ## 通过数字提取列
  hospitalrate <- c(11,17,23) 
  hospitalratecol <- hospitalrate[match(outcome, outcomevector)] 
  Rate <- namesvector[hospitalratecol]
  
  ## return hospital name in that state with lowest 30-day death
  df1 <- file[, c("Hospital.Name", 'State', Rate)]
  
  ## remove NA values
  df2 <- na.omit(df1$Rate)
  
  ## order by state then outcome then hospital name
  df3 <- df2[order(df2[,2],as.numeric(df2[,3]),df2[,1], decreasing = FALSE), ]
  
  ## split by state
  s <- split(df3, df3$State)
  
  ## run lapply
}

hints


## The function should be defined within your assignment function
assignment_function <- function() {
    lapply_function <- function() {
        ## do something     
    if(condition1) do something
    if(condition2) do something else
    return a value

    }
}
## If you order properly the worst hospital will always be the last one in the state subset. Just be sure you've done things in the correct order prior to running split and lapply -
## Read the data
## Subset to three columns
## Remove NA Values
## Order by state then outcome then hospital name
## Split by state
## Run lapply

## Your function for lapply should take one parameter as the data frame for a state and output a hospital name. The results of lapply will be a named list where the list names are state and the list values are hospital name (one for each state).
## The following function receives a state data frame from the split data
## function_for_lapply(data) { do something with data }

skyCeleste.x

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
R Programming - Hospital Compare

Unzip file> library(utils)> unzip("rprog-data-ProgAssignment3-data.zip")Finding the best hospital in a statebest <- function(state, outcome){ ## state is the 2-character abbreviated name ## outcome is the outcome name ## read outcome
复制链接

扫一扫