I have these three intervals defined:
YEAR_1 <- interval(ymd(\'2002-09-01\'), ymd(\'2003-08-31\'))
YEAR_2 <- interval(ymd(\'2003-09-01\'), ymd(\'20
Here is my take on it all. I Like to keep things tidy ;)
> ## load libraries
> library(tidyverse)
> library(lubridate)
>
> ## define times
> times <- c(1055289600, 1092182400, 1086220800, 1074556800, 1109289600,
+ 1041897600, 1069200000, 1047427200, 1072656000, 1048636800, 1092873600,
+ 1090195200, 1051574400, 1052179200, 1130371200, 1242777600, 1140652800,
+ 1137974400, 1045526400, 1111104000, 1073952000, 1052870400, 1087948800,
+ 1053993600, 1039564800, 1141603200, 1074038400, 1105315200, 1060560000,
+ 1072051200, 1046217600, 1107129600, 1088553600, 1071619200, 1115596800,
+ 1050364800, 1147046400, 1083628800, 1056412800, 1159747200, 1087257600,
+ 1201478400, 1120521600, 1066176000, 1034553600, 1057622400, 1078876800,
+ 1010880000, 1133913600, 1098230400, 1170806400, 1037318400, 1070409600,
+ 1091577600, 1057708800, 1182556800, 1091059200, 1058227200, 1061337600,
+ 1034121600, 1067644800, 1039478400, 1022198400, 1063065600, 1096329600,
+ 1049760000, 1081728000, 1016150400, 1029801600, 1059350400, 1087257600,
+ 1181692800, 1310947200, 1125446400, 1057104000, NA, 1085529600,
+ 1037664000, 1091577600, 1080518400, 1110758400, 1092787200, 1094601600,
+ 1169424000, 1232582400, 1058918400, 1021420800, 1133136000, 1030320000,
+ 1060732800, 1035244800, 1090800000, 1129161600, 1055808000, 1060646400,
+ 1028678400, 1075852800, 1144627200, 1111363200, 1070236800)
> times <- tibble(time = as.POSIXct(times, origin = "1970-01-01", tz = "UTC")) %>%
+ mutate(time = as_date(time),
+ duplicated = duplicated(time)) ## there are duplicated times!
>
>
> ## define years
> year <- c("YEAR_1", "YEAR_2", "YEAR_3")
> interval <- c(interval(ymd("2002-09-01", tz = "UTC"), ymd("2003-08-31", tz = "UTC")),
+ interval(ymd("2003-09-01", tz = "UTC"), ymd("2004-08-31", tz = "UTC")),
+ interval(ymd("2004-09-01", tz = "UTC"), ymd("2005-08-31", tz = "UTC")))
> years <- tibble(year, interval)
>
> ## check data
> times
# A tibble: 100 x 2
time duplicated
1 2003-06-11 FALSE
2 2004-08-11 FALSE
3 2004-06-03 FALSE
4 2004-01-20 FALSE
5 2005-02-25 FALSE
6 2003-01-07 FALSE
7 2003-11-19 FALSE
8 2003-03-12 FALSE
9 2003-12-29 FALSE
10 2003-03-26 FALSE
# ... with 90 more rows
> years
# A tibble: 3 x 2
year interval
1 YEAR_1 2002-09-01 UTC--2003-08-31 UTC
2 YEAR_2 2003-09-01 UTC--2004-08-31 UTC
3 YEAR_3 2004-09-01 UTC--2005-08-31 UTC
>
> ## create new indicator variavble
> ##
> ## join datasets (length = 3 x 100)
> ## indicator for year
> ## drop NAs
> ## keep "time" and "active"
> ## join with times to get back at full dataset
> ## as duplications, keep only one of them
> crossing(times, years) %>%
+ mutate(active = if_else(time %within% interval, year, NA_character_)) %>%
+ drop_na(active) %>%
+ select(time, active) %>%
+ right_join(times, by = "time") %>%
+ distinct() %>%
+ select(-duplicated)
# A tibble: 100 x 2
time active
1 2003-06-11 YEAR_1
2 2004-08-11 YEAR_2
3 2004-06-03 YEAR_2
4 2004-01-20 YEAR_2
5 2005-02-25 YEAR_3
6 2003-01-07 YEAR_1
7 2003-11-19 YEAR_2
8 2003-03-12 YEAR_1
9 2003-12-29 YEAR_2
10 2003-03-26 YEAR_1
# ... with 90 more rows