[置顶]R语言 分层抽样---分层随机抽样(SRS)(二 )

############################################################

自定义分层抽样函数:

############################################################

stratified <- function(df, group, size, select = NULL,
                       replace = FALSE, bothSets = FALSE) {
  if (is.null(select)) {
    df <- df
  } else {
    if (is.null(names(select))) stop("'select' must be a named list")
    if (!all(names(select) %in% names(df)))
      stop("Please verify your 'select' argument")
    temp <- sapply(names(select),
                   function(x) df[[x]] %in% select[[x]])
    df <- df[rowSums(temp) == length(select), ]
  }
  df.interaction <- interaction(df[group], drop = TRUE)
  df.table <- table(df.interaction)
  df.split <- split(df, df.interaction)
  if (length(size) > 1) {
    if (length(size) != length(df.split))
      stop("Number of groups is ", length(df.split),
           " but number of sizes supplied is ", length(size))
    if (is.null(names(size))) {
      n <- setNames(size, names(df.split))
      message(sQuote("size"), " vector entered as:\n\nsize = structure(c(",
              paste(n, collapse = ", "), "),\n.Names = c(",
              paste(shQuote(names(n)), collapse = ", "), ")) \n\n")
    } else {
      ifelse(all(names(size) %in% names(df.split)),
             n <- size[names(df.split)],
             stop("Named vector supplied with names ",
                  paste(names(size), collapse = ", "),
                  "\n but the names for the group levels are ",
                  paste(names(df.split), collapse = ", ")))
    }
  } else if (size < 1) {
    n <- round(df.table * size, digits = 0)
  } else if (size >= 1) {
    if (all(df.table >= size) || isTRUE(replace)) {
      n <- setNames(rep(size, length.out = length(df.split)),
                    names(df.split))
    } else {
      message(
        "Some groups\n---",
        paste(names(df.table[df.table < size]), collapse = ", "),
        "---\ncontain fewer observations",
        " than desired number of samples.\n",
        "All observations have been returned from those groups.")
      n <- c(sapply(df.table[df.table >= size], function(x) x = size),
             df.table[df.table < size])
    }
  }
  temp <- lapply(
    names(df.split),
    function(x) df.split[[x]][sample(df.table[x],
                                     n[x], replace = replace), ])
  set1 <- do.call("rbind", temp)
 
  if (isTRUE(bothSets)) {
    set2 <- df[!rownames(df) %in% rownames(set1), ]
    list(SET1 = set1, SET2 = set2)
  } else {
    set1
  }
}

================================================
参数说明:
#######################################################

The arguments to stratified are:

  • df: The input data.frame
  • group: A character vector of the column or columns that make up the "strata".
  • size: The desired sample size.
    • If size is a value less than 1, a proportionate sample is taken from each stratum.
    • If size is a single integer of 1 or more, that number of samples is taken from each stratum.
    • If size is a vector of integers, the specified number of samples is taken for each stratum. It is recommended that you use a named vector. For example, if you have two strata, "A" and "B", and you wanted 5 samples from "A" and 10 from "B", you would enter size = c(A = 5, B = 10).
  • select: This allows you to subset the groups in the sampling process. This is a list. For instance, if your group variable was "Group", and it contained three strata, "A", "B", and "C", but you only wanted to sample from "A" and "C", you can use select = list(Group = c("A", "C")).
  • replace: For sampling with replacement.
########################################################
例子说明:
########################################################
set.seed(1)
##创建数据框
dat1 <- data.frame(ID = 1:100, A = sample(c("AA", "BB", "CC", "DD", "EE"), 100,replace = TRUE), B = rnorm(100), C = abs(round(rnorm(100), digits = 1)), D = sample(c("CA", "NY", "TX"), 100, replace = TRUE), E = sample(c("M","F"), 100, replace = TRUE))
##

library(devtools)
source_gist("https://gist.github.com/mrdwab/6424112")
## [1] "https://raw.github.com/gist/6424112"
## SHA-1 hash of file is 0006d8548785ec8a5651c3dd599648cc88d153a4
# Generate a couple of sample data.frames to play with
set.seed(1)
dat1 <- data.frame(ID = 1:100, A = sample(c("AA", "BB", "CC", "DD", "EE"), 100, 
    replace = TRUE), B = rnorm(100), C = abs(round(rnorm(100), digits = 1)), 
    D = sample(c("CA", "NY", "TX"), 100, replace = TRUE), E = sample(c("M", 
        "F"), 100, replace = TRUE))

# What do the data look like in general?
summary(dat1)
##        ID         A            B                 C          D      E     
##  Min.   :  1.0   AA:13   Min.   :-1.9144   Min.   :0.000   CA:23   F:54  
##  1st Qu.: 25.8   BB:25   1st Qu.:-0.6141   1st Qu.:0.300   NY:42   M:46  
##  Median : 50.5   CC:19   Median :-0.1176   Median :0.650   TX:35         
##  Mean   : 50.5   DD:26   Mean   :-0.0176   Mean   :0.825                 
##  3rd Qu.: 75.2   EE:17   3rd Qu.: 0.5382   3rd Qu.:1.200                 
##  Max.   :100.0           Max.   : 2.4016   Max.   :2.900
# Let's take a 10% sample from all -A- groups in dat1
stratified(dat1, "A", 0.1)
##    ID  A       B   C  D E
## 92 92 AA  1.1766 1.0 CA F
## 86 86 BB -1.5364 0.1 NY F
## 74 74 BB -0.1796 0.3 CA F
## 60 60 CC  1.6822 0.5 TX F
## 58 58 CC  0.9102 0.5 CA F
## 9   9 DD  0.5697 1.4 TX F
## 42 42 DD  1.2079 0.4 TX M
## 46 46 DD  0.5585 1.0 CA M
## 99 99 EE -1.2863 0.2 NY F
## 72 72 EE  1.3430 0.0 NY F
# Let's take a 10% sample from only 'AA' and 'BB' groups from -A- in dat1
stratified(dat1, "A", 0.1, select = list(A = c("AA", "BB")))
##    ID  A      B   C  D E
## 69 69 AA 0.4942 0.6 CA M
## 54 54 BB 0.1580 0.3 TX M
## 5   5 BB 1.4330 1.5 NY F
# Let's take 5 samples from all -D- groups in dat1, specified by column
# number
stratified(dat1, group = 5, size = 5)
##    ID  A       B   C  D E
## 73 73 BB -0.2146 0.6 CA F
## 45 45 CC  1.5868 1.2 CA F
## 87 87 DD -0.3010 0.6 CA M
## 13 13 DD  0.6897 1.1 CA F
## 24 24 AA -0.9341 0.1 CA M
## 40 40 CC  0.2671 0.9 NY F
## 83 83 BB  0.5315 0.6 NY M
## 1   1 BB  0.3981 0.5 NY F
## 56 56 AA  1.7673 2.5 NY M
## 88 88 AA -0.5283 1.2 NY M
## 66 66 BB -0.3928 1.5 TX F
## 22 22 BB -0.7099 0.1 TX M
## 79 79 DD -0.6817 0.2 TX M
## 27 27 AA -0.4433 0.8 TX M
## 51 51 CC -0.6204 0.4 TX F
# Let's take a sample from all -A- groups in dat1, where we specify the
# number wanted from each group
stratified(dat1, "A", size = c(3, 5, 4, 5, 2))
## 'size' vector entered as:
## 
## size = structure(c(3, 5, 4, 5, 2), .Names = c("AA", "BB", "CC", "DD",
## "EE"))
##    ID  A       B   C  D E
## 38 38 AA -0.3042 0.8 NY M
## 10 10 AA -0.1351 1.9 NY M
## 56 56 AA  1.7673 2.5 NY M
## 62 62 BB -0.4616 0.4 CA F
## 74 74 BB -0.1796 0.3 CA F
## 2   2 BB -0.6120 0.0 TX F
## 66 66 BB -0.3928 1.5 TX F
## 22 22 BB -0.7099 0.1 TX M
## 40 40 CC  0.2671 0.9 NY F
## 67 67 CC -0.3200 0.3 CA F
## 16 16 CC  0.1888 2.2 CA M
## 3   3 CC  0.3411 0.3 NY M
## 39 39 DD  0.3700 0.4 CA F
## 49 49 DD -1.2246 0.4 NY F
## 50 50 DD -0.4734 0.4 TX F
## 23 23 DD  0.6107 0.5 NY F
## 17 17 DD -1.8050 0.3 NY M
## 35 35 EE  0.5939 0.5 CA M
## 41 41 EE -0.5425 0.2 NY F
# Use a two-column strata: -E- and -D- -E- varies more slowly, so it is
# better to put that first
stratified(dat1, c("E", "D"), size = 0.15)
##    ID  A         B   C  D E
## 84 84 BB -1.518394 0.6 CA F
## 34 34 AA -1.523567 1.5 CA F
## 53 53 CC -0.910922 1.6 CA M
## 36 36 DD  0.332950 0.2 CA M
## 97 97 CC  2.087167 0.5 NY F
## 49 49 DD -1.224613 0.4 NY F
## 5   5 BB  1.433024 1.5 NY F
## 37 37 DD  1.063100 1.5 NY M
## 17 17 DD -1.804959 0.3 NY M
## 33 33 CC  1.178087 0.2 NY M
## 14 14 BB  0.028002 0.9 TX F
## 90 90 AA -0.056897 0.0 TX F
## 28 28 BB  0.001105 2.1 TX F
## 80 80 EE -0.324270 0.3 TX M
## 22 22 BB -0.709946 0.1 TX M
# Use a two-column strata (-E- and -D-) but only interested in cases where
# -E- == 'M'
stratified(dat1, c("E", "D"), 0.15, select = list(E = "M"))
##      ID  A        B   C  D E
## 69   69 AA  0.49419 0.6 CA M
## 32   32 CC -0.13518 1.0 CA M
## 12   12 AA -0.03924 0.2 NY M
## 100 100 DD -1.64061 1.0 NY M
## 56   56 AA  1.76729 2.5 NY M
## 79   79 DD -0.68166 0.2 TX M
## 80   80 EE -0.32427 0.3 TX M
## As above, but where -E- == 'M' and -D- == 'CA' or 'TX'
stratified(dat1, c("E", "D"), 0.15, select = list(E = "M", D = c("CA", "TX")))
##    ID  A       B   C  D E
## 53 53 CC -0.9109 1.6 CA M
## 36 36 DD  0.3330 0.2 CA M
## 80 80 EE -0.3243 0.3 TX M
## 8   8 DD -1.0441 0.6 TX M
# Use a three-column strata: -E-, -D-, and -A-
s.out <- stratified(dat1, c("E", "D", "A"), size = 2)
## Some groups ---M.CA.BB, M.CA.EE--- contain fewer observations than desired
## number of samples. All observations have been returned from those groups.
list(head(s.out), tail(s.out))
## [[1]]
##    ID  A       B   C  D E
## 92 92 AA  1.1766 1.0 CA F
## 34 34 AA -1.5236 1.5 CA F
## 24 24 AA -0.9341 0.1 CA M
## 69 69 AA  0.4942 0.6 CA M
## 56 56 AA  1.7673 2.5 NY M
## 10 10 AA -0.1351 1.9 NY M
## 
## [[2]]
##    ID  A        B   C  D E
## 61 61 EE -0.63574 0.2 NY M
## 18 18 EE  1.46555 1.4 NY M
## 21 21 EE  0.47551 2.3 TX F
## 70 70 EE -0.17733 0.0 TX F
## 80 80 EE -0.32427 0.3 TX M
## 77 77 EE -0.07356 0.3 TX M
# How many samples were taken from each strata?
table(interaction(s.out[c("E", "D", "A")]))
## 
## F.CA.AA M.CA.AA F.NY.AA M.NY.AA F.TX.AA M.TX.AA F.CA.BB M.CA.BB F.NY.BB 
##       2       2       0       2       2       2       2       1       2 
## M.NY.BB F.TX.BB M.TX.BB F.CA.CC M.CA.CC F.NY.CC M.NY.CC F.TX.CC M.TX.CC 
##       2       2       2       2       2       2       2       2       0 
## F.CA.DD M.CA.DD F.NY.DD M.NY.DD F.TX.DD M.TX.DD F.CA.EE M.CA.EE F.NY.EE 
##       2       2       2       2       2       2       0       1       2 
## M.NY.EE F.TX.EE M.TX.EE 
##       2       2       2
# Can we verify the message about group sizes?
names(which(table(interaction(dat1[c("E", "D", "A")])) < 2))
## [1] "F.NY.AA" "M.CA.BB" "M.TX.CC" "F.CA.EE" "M.CA.EE"
names(which(table(interaction(s.out[c("E", "D", "A")])) < 2))
## [1] "F.NY.AA" "M.CA.BB" "M.TX.CC" "F.CA.EE" "M.CA.EE"
















vue-echarts是一个基于Vue.js的图表库,它提供了对ECharts图表库的封装和集成,使得在Vue.js项目使用ECharts更加方便。折线置顶是指在折线图,将某一条折线显示在其他折线的上方,以突出该折线的重要性或者优先级。 要实现折线置顶,可以通过设置ECharts的series配置项的z值来控制折线的层级。z值越大,表示该折线在图表的层级越高,即显示在其他折线的上方。 以下是实现折线置顶的步骤: 1. 在Vue.js项目引入vue-echarts库,并初始化一个ECharts实例。 2. 在ECharts的option配置项定义多条折线,并设置不同的z值。 3. 将option配置项传递给vue-echarts组件进行渲染。 下面是一个示例代码,演示了如何使用vue-echarts实现折线置顶: ```vue <template> <div> <vue-echarts :options="chartOptions"></vue-echarts> </div> </template> <script> import VueECharts from 'vue-echarts'; export default { components: { VueECharts, }, data() { return { chartOptions: { xAxis: { type: 'category', data: ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], }, yAxis: { type: 'value', }, series: [ { name: 'Line 1', type: 'line', data: [120, 200, 150, 80, 70, 110, 130], z: 2, // 设置z值为2,表示该折线置顶 }, { name: 'Line 2', type: 'line', data: [90, 150, 200, 120, 100, 80, 70], z: 1, // 设置z值为1,表示该折线在第层级 }, // 其他折线... ], }, }; }, }; </script> ``` 通过设置series的z值,可以控制折线的层级,从而实现折线置顶效果。
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值