Split vector separated by n zeros into different group

后端 未结 5 1224
情歌与酒
情歌与酒 2020-12-17 22:05

I have a vector x

x = c(1, 1, 2.00005, 1, 1, 0, 0, 0, 0, 1, 2, 0, 3, 4, 0, 0, 0, 0, 1, 2, 3, 1, 3)

I need to split values sepa

相关标签:
5条回答
  • 2020-12-17 22:43

    This method is just slightly different from what you already proposed, and includes a first step of replacing all stretches of n or more zeroes by a value not found in x, for example max+1:

    r = rle(x)
    val = max(x,na.rm=T)+1
    r$values[r$values==0 & r$lengths>2] = val
    x2 = inverse.rle(r)
    temp = cumsum(x2 == val)
    split(x2[x2!=val], temp[x2!=val])
    
    $`0`
    [1] 1.00000 1.00000 2.00005 1.00000 1.00000
    
    $`4`
    [1] 1 2 0 3 4
    
    $`8`
    [1] 1 2 3 1 3
    
    0 讨论(0)
  • 2020-12-17 22:48

    Here's my attempt at it. This method replaces runs of zero that are length less than or equal to 3 with NA. Since NA is removed when using split(), we are left with the desired output.

    x <- c(1, 1, 2.00005, 1, 1, 0, 0, 0, 0, 1, 2, 0, 3, 4, 0, 0, 0, 0, 1, 2, 3, 1, 3)
    
    ll <- with(rle(x == 0), {
      ifelse(x == 0 & (seq_along(x) != cumsum(lengths)[lengths <= 3 & values]), NA, x)
    })
    
    split(x, with(rle(is.na(ll)), rep(1:length(lengths), lengths) + ll * 0))
    # $`1`
    # [1] 1.00000 1.00000 2.00005 1.00000 1.00000
    #
    # $`3`
    # [1] 1 2 0 3 4
    #
    # $`5`
    # [1] 1 2 3 1 3
    
    0 讨论(0)
  • 2020-12-17 22:56

    Here is an idea using rle and inverse.rle several times to create a subset of x (x_sub) and group number (group_sub). Finally, use split to get the final results.

    x <- c(1, 1, 2.00005, 1, 1, 0, 0, 0, 0, 1, 2, 0, 3, 4, 0, 0, 0, 0, 1, 2, 3, 1, 3)
    
    ### Step 1: Filtet the index with values == 0 and length > 3
    x2 <- as.integer(x != 0)
    run <- rle(x2)
    index <- which(run$values == 0 & run$lengths > 3)
    
    ### Step 2: Replace the values in index to -1
    ### Create an intermediate index (x3)
    run2 <- run
    run2$values[index] <- -1
    run2$values[run2$values == 0] <- 1
    x3 <- inverse.rle(run2)
    
    ### Step 3: Create grouping variable (x4)
    run3 <- rle(x3)
    run3$values <- 1:length(run3$values)
    x4 <- inverse.rle(run3)
    
    ### Step 4: Subset x by x3 and x4 (x_sub) and create group number (group_sub)
    x_sub <- x[x3 != -1]
    group_sub <- x4[x3 != -1] %/% 2 + 1
    
    ### Step 5: Split x_sub to get the final output (final_list)
    final_list <- split(x_sub, f = group_sub)
    
    final_list
    $`1`
    [1] 1.00000 1.00000 2.00005 1.00000 1.00000
    
    $`2`
    [1] 1 2 0 3 4
    
    $`3`
    [1] 1 2 3 1 3
    
    0 讨论(0)
  • 2020-12-17 23:07

    Yet another solution using rle (twice) and inverse.rle.

    n <- 3
    r <- rle(as.integer(x == 0))
    r$values[r$values == 1 & r$lengths < n] <- 0
    r <- rle(inverse.rle(r))
    
    group <- integer(length(x))
    start <- 1
    for(i in seq_along(r$values)){
        group[start:(start + r$lengths[i] - 1)] <- c(1L, rep(0L, r$lengths[i] - 1))
        start <- start + r$lengths[i]
    }
    

    In the mean time I realized that the code that prepares the loop above and the loop itself could be greatly simplified. In order to make it complete, I will repeat the initial lines of code.

    r <- rle(as.integer(x == 0))
    r$values[r$values == 1 & r$lengths < n] <- 0
    
    # This is the simplification
    group <- c(1L, diff(inverse.rle(r)) != 0)
    
    res <- split(x, cumsum(group))
    res <- res[-which(sapply(res, function(y) all(y == 0)))]
    res
    #$`1`
    #[1] 1.00000 1.00000 2.00005 1.00000 1.00000
    #
    #$`3`
    #[1] 1 2 0 3 4
    #
    #$`5`
    #[1] 1 2 3 1 3
    
    0 讨论(0)
  • 2020-12-17 23:08

    Here is a method with rle, split, and lapply

    # get RLE
    temp <- rle(x)
    # replace values with grouping variables
    temp$values <- cumsum(temp$values == 0 & temp$lengths > 2)
    
    # split on group and lapply through, dropping 0s at beginning which are start of each group
    lapply(split(x, inverse.rle(temp)), function(y) y[cummax(y) > 0])
    $`0`
    [1] 1.00000 1.00000 2.00005 1.00000 1.00000
    
    $`1`
    [1] 1 2 0 3 4
    
    $`2`
    [1] 1 2 3 1 3
    

    A second method without lapply is as follows

    # get RLE
    temp <- rle(x)
    # get positions of 0s that force grouping
    changes <- which(temp$values == 0 & temp$lengths > 2)
    # get group indicators
    temp$values <- cumsum(temp$values == 0 & temp$lengths > 2)
    # make 0s a new group
    temp$values[changes] <- max(temp$values) + 1L
    
    # create list
    split(x, inverse.rle(temp))
    $`0`
    [1] 1.00000 1.00000 2.00005 1.00000 1.00000
    
    $`1`
    [1] 1 2 0 3 4
    
    $`2`
    [1] 1 2 3 1 3
    
    $`3`
    [1] 0 0 0 0 0 0 0 0
    

    Finally, you'd just drop the last list item, like head(split(x, inverse.rle(temp)), -1).

    0 讨论(0)
提交回复
热议问题