R Programming - Hospital Compare

Unzip file

> library(utils)
> unzip("rprog-data-ProgAssignment3-data.zip")

Finding the best hospital in a state

best <- function(state, outcome){
  ## state is the 2-character abbreviated name
  ## outcome is the outcome name
  
  ## read outcome data
  file <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
  
  outcomevector <- c("heart attack", "heart failure", "pneumonia")
  ## check that state and outcome are valid
  statename <- unique(c(file[, "State"]))
  
  if (!state %in% statename) 
    stop("invalid state") # stop:停止执行当前表达式,并且输出函数中的字符
  if (!outcome %in% outcomevector)
    stop("invalid outcome")
  
  # return the specific column correspond to the outcome
  ## 通过数字提取列
  hospitalrate <- c(11,17,23) 
  hospitalratecol <- hospitalrate[match(outcome, outcomevector)] 
  
  ## return hospital name in that state with lowest 30-day death
  df1 <- file[file$State==state, ]
  hospital <- which.min(as.numeric(na.omit(df1[, hospitalratecol]))) # which.min 提取最小值所在行
  df1[hospital, "Hospital.Name"]
}

match: 匹配两个向量,返回x中存在的返回索引或TRUE、FALSE

> x <- colnames(diamonds)[1:5]
> x
[1] "carat"   "cut"     "color"   "clarity" "depth"  
> y <- colnames(diamonds)[1:10]
> y
 [1] "carat"   "cut"     "color"   "clarity" "depth"   "table"   "price"   "x"       "y"      
[10] "z"      
  
1.使用match函数找出x中每个元素在y中的位置
> match(x,y)
[1] 1 2 3 4 5

Ranking hospitals by outcome in a state

rankhospital <- function(state, outcome, num) {
  ## state is the 2-character abbreviated name of a state
  ## num is the ranking of a hospital in that state for that outcome
  ## Read outcome data
  file <- read.csv("outcome-of-care-measures.csv", colClasses = "character", na.strings = "Not Available" )
  namesvector <- names(file)
  outcomevector <- c("heart attack", "heart failure", "pneumonia")
  ## check that state and outcome are valid
  statename <- unique(c(file[, "State"]))
  
  if (!state %in% statename) 
    stop("invalid state") # stop:停止执行当前表达式,并且输出函数中的字符
  if (!outcome %in% outcomevector)
    stop("invalid outcome")
  
  # return the specific column correspond to the outcome
  ## 通过数字提取列
  hospitalrate <- c(11,17,23) 
  hospitalratecol <- hospitalrate[match(outcome, outcomevector)] 
  Rate <- namesvector[hospitalratecol]
  ## return hospital name in that state with lowest 30-day death
  col <- c("Hospital.Name", Rate)
  df1 <- file[file$State==state, col]
    
  ## Return hospital name in that state with the given rank
  ## 30-day death rate
  ordereddf <- df1[order(as.numeric(df1[, 2]), df1[,1], na.last = NA, decreasing = FALSE), ]
  
  ## remove the NA, so the num should change 
  ## else if not elif
  if (num == "worst"){
    num <- nrow(ordereddf)
  } else if (num == "best"){
    num <- 1
  } else{
    num <- num
  }
  
  ordereddf[num, 1]
}

Ranking hospitals in all states

return a specific hospital name for each stae

rankall <- function(outcome, num = 'best'){
  
  ## num is a hospital ranking
  ## Read outcome data
  file <- read.csv("outcome-of-care-measures.csv", colClasses = "character", na.strings = "Not Available" )
  outcomevector <- c("heart attack", "heart failure", "pneumonia")
  namesvector <- names(file)
  ## Check that state and outcome are valid
  statename <- sort(unique(c(file[, "State"])))
  if (!outcome %in% outcomevector)
    stop("invalid outcome")
  
  # return the specific column correspond to the outcome
  ## 通过数字提取列
  hospitalrate <- c(11,17,23) 
  hospitalratecol <- hospitalrate[match(outcome, outcomevector)] 
  Rate <- namesvector[hospitalratecol]
  
  ## return hospital name in that state with lowest 30-day death
  df1 <- file[, c("Hospital.Name", 'State', Rate)]
  colnames(df1)[3] <- 'Rate'
  
  ## create a new dataframe
  df2 <- data.frame()
  for (state in statename) {
    ## create a new dataframe for each state
    dfstate <- subset(df1, State == state)
    ## order the dataframe by rate and then hospital name
    ## For each state, find the hospital of the given rank
    dfstate <- dfstate[order(dfstate$Rate, dfstate$Hospital.Name, decreasing = FALSE), ]
    if (num == "worst"){
      num <- nrow(dfstate)
    } else if (num == "best"){
      num <- 1
    } else{
      num <- num
    }
    ## get the hospital name
    hospitalname <- dfstate[num,"Hospital.Name"]
    ## craete a new row with hospital name and its state
    newdf <- data.frame(hospitalname, state)
    ## colbind to the empty datafra
    df2 <- rbind(newdf, df2)
  }
    df2 <- df2[order(df2$state, decreasing = FALSE), ]
    df2
  
}

subset

subset函数,从某一个数据框中选择出符合某条件的数据或是相关的列

another solution (unfinished)

using split and lapply, to be continued…,

rankall <- function(outcome, num = 'best'){
  ## read the data
  ## num is a hospital ranking
  ## Read outcome data
  file <- read.csv("outcome-of-care-measures.csv", colClasses = "character", na.strings = "Not Available" )
  outcomevector <- c("heart attack", "heart failure", "pneumonia")
  namesvector <- names(file)
  ## Check that state and outcome are valid
  statename <- unique(c(file[, "State"]))
  if (!outcome %in% outcomevector)
    stop("invalid outcome")
  
  ## subset to three columns
  # return the specific column correspond to the outcome
  ## 通过数字提取列
  hospitalrate <- c(11,17,23) 
  hospitalratecol <- hospitalrate[match(outcome, outcomevector)] 
  Rate <- namesvector[hospitalratecol]
  
  ## return hospital name in that state with lowest 30-day death
  df1 <- file[, c("Hospital.Name", 'State', Rate)]
  
  ## remove NA values
  df2 <- na.omit(df1$Rate)
  
  ## order by state then outcome then hospital name
  df3 <- df2[order(df2[,2],as.numeric(df2[,3]),df2[,1], decreasing = FALSE), ]
  
  ## split by state
  s <- split(df3, df3$State)
  
  ## run lapply
}

hints


## The function should be defined within your assignment function
assignment_function <- function() {
    lapply_function <- function() {
        ## do something     
    if(condition1) do something
    if(condition2) do something else
    return a value

    }
}
## If you order properly the worst hospital will always be the last one in the state subset. Just be sure you've done things in the correct order prior to running split and lapply -
## Read the data
## Subset to three columns
## Remove NA Values
## Order by state then outcome then hospital name
## Split by state
## Run lapply

## Your function for lapply should take one parameter as the data frame for a state and output a hospital name. The results of lapply will be a named list where the list names are state and the list values are hospital name (one for each state).
## The following function receives a state data frame from the split data
## function_for_lapply(data) { do something with data }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值