I have a data frame containing an ID, a start date and an end date. My data is ordered by ID, start, end (in this sequence).
Now I want all rows with the same ID hav
I did it slightly different to avoid deleting empty rows in the end:
smoothingEpisodes <- function (theData) {
curId <- theData[1, "ID"]
curStart <- theData[1, "START"]
curEnd <- theData[1, "END"]
theLength <- nrow(theData)
out.1 <- integer(length = theLength)
out.2 <- out.3 <- numeric(length = theLength)
j <- 1
for(i in 2:nrow(theData)) {
nextId <- theData[i, "ID"]
nextStart <- theData[i, "START"]
nextEnd <- theData[i, "END"]
if (curId != nextId | (curEnd + 1) < nextStart) {
out.1[j] <- curId
out.2[j] <- curStart
out.3[j] <- curEnd
j <- j + 1
curId <- nextId
curStart <- nextStart
curEnd <- nextEnd
} else {
curEnd <- max(curEnd, nextEnd, na.rm = TRUE)
}
}
out.1[j] <- curId
out.2[j] <- curStart
out.3[j] <- curEnd
theOutput <- data.frame(ID = out.1[1:j], START = as.Date(out.2[1:j], origin = "1970-01-01"), END = as.Date(out.3[1:j], origin = "1970-01-01"))
theOutput
}
quite a big improvement to my original version!