I have two dataframes each with multiple rows per ID. I need to return the closest date and related data from the second dataframe based on the ID and date of the first data
Here's my take using dplyr, based on the accepted answer. I wanted to have a bit more freedom on the grouping column.
match_by_group_date <- function(df1, df2, grp, datecol) {
grp1 <- df1 %>% pull({{grp}}) %>% unique()
grp2 <- df2 %>% pull({{grp}}) %>% unique()
li <-
lapply(intersect(grp1, grp2), function(tt) {
d1 <- filter(df1, {{grp}}== tt)
d2 <- filter(df2, {{grp}}==tt) %>% mutate(indices = 1:n())
d2_date <- d2 %>% pull({{datecol}}) %>% as.POSIXct()
print(d2_date)
d1 <- mutate(d1, indices = map_dbl({{datecol}}, function(d) which.min(abs(d2_date - as.POSIXct(d)))))
left_join(d1,d2, by=c(quo_name(enquo(grp)), "indices"))
})
# bind rows
return(bind_rows(li))
}