I have a vector x
x = c(1, 1, 2.00005, 1, 1, 0, 0, 0, 0, 1, 2, 0, 3, 4, 0, 0, 0, 0, 1, 2, 3, 1, 3)
I need to split values sepa
This method is just slightly different from what you already proposed, and includes a first step of replacing all stretches of n or more zeroes by a value not found in x, for example max+1:
r = rle(x)
val = max(x,na.rm=T)+1
r$values[r$values==0 & r$lengths>2] = val
x2 = inverse.rle(r)
temp = cumsum(x2 == val)
split(x2[x2!=val], temp[x2!=val])
$`0`
[1] 1.00000 1.00000 2.00005 1.00000 1.00000
$`4`
[1] 1 2 0 3 4
$`8`
[1] 1 2 3 1 3
Here's my attempt at it. This method replaces runs of zero that are length less than or equal to 3 with NA. Since NA is removed when using split()
, we are left with the desired output.
x <- c(1, 1, 2.00005, 1, 1, 0, 0, 0, 0, 1, 2, 0, 3, 4, 0, 0, 0, 0, 1, 2, 3, 1, 3)
ll <- with(rle(x == 0), {
ifelse(x == 0 & (seq_along(x) != cumsum(lengths)[lengths <= 3 & values]), NA, x)
})
split(x, with(rle(is.na(ll)), rep(1:length(lengths), lengths) + ll * 0))
# $`1`
# [1] 1.00000 1.00000 2.00005 1.00000 1.00000
#
# $`3`
# [1] 1 2 0 3 4
#
# $`5`
# [1] 1 2 3 1 3
Here is an idea using rle
and inverse.rle
several times to create a subset of x (x_sub
) and group number (group_sub
). Finally, use split
to get the final results.
x <- c(1, 1, 2.00005, 1, 1, 0, 0, 0, 0, 1, 2, 0, 3, 4, 0, 0, 0, 0, 1, 2, 3, 1, 3)
### Step 1: Filtet the index with values == 0 and length > 3
x2 <- as.integer(x != 0)
run <- rle(x2)
index <- which(run$values == 0 & run$lengths > 3)
### Step 2: Replace the values in index to -1
### Create an intermediate index (x3)
run2 <- run
run2$values[index] <- -1
run2$values[run2$values == 0] <- 1
x3 <- inverse.rle(run2)
### Step 3: Create grouping variable (x4)
run3 <- rle(x3)
run3$values <- 1:length(run3$values)
x4 <- inverse.rle(run3)
### Step 4: Subset x by x3 and x4 (x_sub) and create group number (group_sub)
x_sub <- x[x3 != -1]
group_sub <- x4[x3 != -1] %/% 2 + 1
### Step 5: Split x_sub to get the final output (final_list)
final_list <- split(x_sub, f = group_sub)
final_list
$`1`
[1] 1.00000 1.00000 2.00005 1.00000 1.00000
$`2`
[1] 1 2 0 3 4
$`3`
[1] 1 2 3 1 3
Yet another solution using rle
(twice) and inverse.rle
.
n <- 3
r <- rle(as.integer(x == 0))
r$values[r$values == 1 & r$lengths < n] <- 0
r <- rle(inverse.rle(r))
group <- integer(length(x))
start <- 1
for(i in seq_along(r$values)){
group[start:(start + r$lengths[i] - 1)] <- c(1L, rep(0L, r$lengths[i] - 1))
start <- start + r$lengths[i]
}
In the mean time I realized that the code that prepares the loop above and the loop itself could be greatly simplified. In order to make it complete, I will repeat the initial lines of code.
r <- rle(as.integer(x == 0))
r$values[r$values == 1 & r$lengths < n] <- 0
# This is the simplification
group <- c(1L, diff(inverse.rle(r)) != 0)
res <- split(x, cumsum(group))
res <- res[-which(sapply(res, function(y) all(y == 0)))]
res
#$`1`
#[1] 1.00000 1.00000 2.00005 1.00000 1.00000
#
#$`3`
#[1] 1 2 0 3 4
#
#$`5`
#[1] 1 2 3 1 3
Here is a method with rle
, split
, and lapply
# get RLE
temp <- rle(x)
# replace values with grouping variables
temp$values <- cumsum(temp$values == 0 & temp$lengths > 2)
# split on group and lapply through, dropping 0s at beginning which are start of each group
lapply(split(x, inverse.rle(temp)), function(y) y[cummax(y) > 0])
$`0`
[1] 1.00000 1.00000 2.00005 1.00000 1.00000
$`1`
[1] 1 2 0 3 4
$`2`
[1] 1 2 3 1 3
A second method without lapply
is as follows
# get RLE
temp <- rle(x)
# get positions of 0s that force grouping
changes <- which(temp$values == 0 & temp$lengths > 2)
# get group indicators
temp$values <- cumsum(temp$values == 0 & temp$lengths > 2)
# make 0s a new group
temp$values[changes] <- max(temp$values) + 1L
# create list
split(x, inverse.rle(temp))
$`0`
[1] 1.00000 1.00000 2.00005 1.00000 1.00000
$`1`
[1] 1 2 0 3 4
$`2`
[1] 1 2 3 1 3
$`3`
[1] 0 0 0 0 0 0 0 0
Finally, you'd just drop the last list item, like head(split(x, inverse.rle(temp)), -1)
.