I have a dataset with 500k appointments lasting between 5 and 60 minutes.
tdata <- structure(list(Start = structure(c(1325493000, 1325493600, 1325494200,
I am not exactly sure, if I understand your goal. Still, this might be of use:
#I changed the example to actually have concurrent appointments
DF <- read.table(text=" Start, End, Location, Room
1, 2012-01-02 08:30:00, 2012-01-02 08:40:00, LocationA, RoomA
2, 2012-01-02 08:40:00, 2012-01-02 08:50:00, LocationA, RoomA
3, 2012-01-02 08:50:00, 2012-01-02 09:55:00, LocationA, RoomA
4, 2012-01-02 09:00:00, 2012-01-02 09:10:00, LocationA, RoomA
5, 2012-01-02 09:00:00, 2012-01-02 09:10:00, LocationA, RoomB
6, 2012-01-02 09:10:00, 2012-01-02 09:20:00, LocationA, RoomB",header=TRUE,sep=",",stringsAsFactors=FALSE)
DF$Start <- as.POSIXct(DF$Start,format="%Y-%d-%m %H:%M:%S",tz="GMT")
DF$End <- as.POSIXct(DF$End,format="%Y-%d-%m %H:%M:%S",tz="GMT")
library(data.table)
DT <- data.table(DF)
DT[,c("Start_num","End_num"):=lapply(.SD,as.numeric),.SDcols=1:2]
fun <- function(s,e) {
require(intervals)
mat <- cbind(s,e)
inter <- Intervals(mat,closed=c(FALSE,FALSE),type="R")
io <- interval_overlap( inter, inter )
tablengths <- table(sapply(io,length))[-1]
sum(c(0,as.vector(tablengths/as.integer(names(tablengths)))))
}
#number of overlapping events per room and location
DT[,fun(Start_num,End_num),by=list(Location,Room)]
# Location Room V1
#1: LocationA RoomA 1
#2: LocationA RoomB 0
I didn't test this, especially not for speed.