I have two dataframes each with multiple rows per ID. I need to return the closest date and related data from the second dataframe based on the ID and date of the first data
Here is the solution based on the base package:
z <- lapply(intersect(df1$ID,df2$ID),function(id) {
d1 <- subset(df1,ID==id)
d2 <- subset(df2,ID==id)
d1$indices <- sapply(d1$dateTarget,function(d) which.min(abs(d2$dateTarget - d)))
d2$indices <- 1:nrow(d2)
merge(d1,d2,by=c('ID','indices'))
})
z2 <- do.call(rbind,z)
z2$indices <- NULL
print(z2)
# ID dateTarget.x Value dateTarget.y ValueMatch
# 1 3 2015-11-14 47 2015-07-06 48
# 2 3 2015-12-08 98 2015-07-06 48
# 3 3 2015-02-22 52 2015-03-09 94
# 4 3 2014-11-17 68 2014-12-15 95
# 5 3 2013-05-30 91 2013-04-01 85
# 6 1 2013-11-04 70 2014-02-21 35
# 7 1 2014-12-29 18 2014-12-06 88
# 8 2 2013-01-14 52 2013-04-08 77
# 9 2 2015-07-29 97 2015-08-01 68
# 10 2 2015-06-15 98 2015-08-01 68