问题
Data:
set.seed(42)
df1 = data.frame(
Date = seq.Date(as.Date("2018-01-01"),as.Date("2018-01-30"),1),
value = sample(1:30),
Y = sample(c("yes", "no"), 30, replace = TRUE)
)
df2 = data.frame(
Date = seq.Date(as.Date("2018-01-01"),as.Date("2018-01-30"),7)
)
I want for each date in df2$Date
calculate the sum of df1$Value if date in df1$Date
falls within df2$Date
and df2$Date
+6
Inshort I need to calculate weekly sums
回答1:
Using data.table, create a range start/end, then merge on overlap, then get sum over group:
library(data.table)
df1$start <- df1$Date
df1$end <- df1$Date
df2$start <- df2$Date
df2$end <- df2$Date + 6
setDT(df1, key = c("start", "end"))
setDT(df2, key = c("start", "end"))
foverlaps(df1, df2)[, list(mySum = sum(value)), by = Date ]
# Date mySum
# 1: 2018-01-01 138
# 2: 2018-01-08 96
# 3: 2018-01-15 83
# 4: 2018-01-22 109
# 5: 2018-01-29 39
回答2:
Check out library lubridate and dplyr, those two are quiet common.
library(lubridate)
library(dplyr)
df1$last_week_day <- ceiling_date(df1$Date, "week") + 1
df1 %>% group_by(last_week_day) %>% summarize(week_value = sum(value))
回答3:
We can use fuzzyjoin
library(dplyr)
library(fuzzyjoin)
df2$EndDate <- df2$Date+6
fuzzy_left_join(
df1, df2,
by = c(
"Date" = "Date",
"Date" = "EndDate"
), match_fun = list(`>=`, `<=`)) %>%
group_by(Date.y) %>% summarise(Sum=sum(value))
# A tibble: 5 x 2
Date.y Sum
<date> <int>
1 2018-01-01 138
2 2018-01-08 96
3 2018-01-15 83
4 2018-01-22 109
5 2018-01-29 39
来源:https://stackoverflow.com/questions/55354071/how-to-do-sumifs-in-r