Check if a date is within an interval in R

前端 未结 6 922
暗喜
暗喜 2020-11-30 12:48

I have these three intervals defined:

YEAR_1  <- interval(ymd(\'2002-09-01\'), ymd(\'2003-08-31\'))
YEAR_2  <- interval(ymd(\'2003-09-01\'), ymd(\'20         


        
6条回答
  •  隐瞒了意图╮
    2020-11-30 13:35

    Here is my take on it all. I Like to keep things tidy ;)

    > ## load libraries
    > library(tidyverse)
    > library(lubridate)
    > 
    > ## define times
    > times <- c(1055289600, 1092182400, 1086220800, 1074556800, 1109289600, 
    +            1041897600, 1069200000, 1047427200, 1072656000, 1048636800, 1092873600, 
    +            1090195200, 1051574400, 1052179200, 1130371200, 1242777600, 1140652800, 
    +            1137974400, 1045526400, 1111104000, 1073952000, 1052870400, 1087948800, 
    +            1053993600, 1039564800, 1141603200, 1074038400, 1105315200, 1060560000, 
    +            1072051200, 1046217600, 1107129600, 1088553600, 1071619200, 1115596800, 
    +            1050364800, 1147046400, 1083628800, 1056412800, 1159747200, 1087257600, 
    +            1201478400, 1120521600, 1066176000, 1034553600, 1057622400, 1078876800, 
    +            1010880000, 1133913600, 1098230400, 1170806400, 1037318400, 1070409600, 
    +            1091577600, 1057708800, 1182556800, 1091059200, 1058227200, 1061337600, 
    +            1034121600, 1067644800, 1039478400, 1022198400, 1063065600, 1096329600, 
    +            1049760000, 1081728000, 1016150400, 1029801600, 1059350400, 1087257600, 
    +            1181692800, 1310947200, 1125446400, 1057104000, NA, 1085529600, 
    +            1037664000, 1091577600, 1080518400, 1110758400, 1092787200, 1094601600, 
    +            1169424000, 1232582400, 1058918400, 1021420800, 1133136000, 1030320000, 
    +            1060732800, 1035244800, 1090800000, 1129161600, 1055808000, 1060646400, 
    +            1028678400, 1075852800, 1144627200, 1111363200, 1070236800)
    > times <- tibble(time = as.POSIXct(times, origin = "1970-01-01", tz = "UTC")) %>% 
    +   mutate(time = as_date(time),
    +          duplicated = duplicated(time)) ## there are duplicated times!
    > 
    > 
    > ## define years
    > year <- c("YEAR_1", "YEAR_2", "YEAR_3")
    > interval <- c(interval(ymd("2002-09-01", tz = "UTC"), ymd("2003-08-31", tz = "UTC")),
    +               interval(ymd("2003-09-01", tz = "UTC"), ymd("2004-08-31", tz = "UTC")),
    +               interval(ymd("2004-09-01", tz = "UTC"), ymd("2005-08-31", tz = "UTC")))
    > years <- tibble(year, interval)
    > 
    > ## check data
    > times
    # A tibble: 100 x 2
       time       duplicated
                 
     1 2003-06-11 FALSE     
     2 2004-08-11 FALSE     
     3 2004-06-03 FALSE     
     4 2004-01-20 FALSE     
     5 2005-02-25 FALSE     
     6 2003-01-07 FALSE     
     7 2003-11-19 FALSE     
     8 2003-03-12 FALSE     
     9 2003-12-29 FALSE     
    10 2003-03-26 FALSE     
    # ... with 90 more rows
    > years
    # A tibble: 3 x 2
      year   interval                      
                        
    1 YEAR_1 2002-09-01 UTC--2003-08-31 UTC
    2 YEAR_2 2003-09-01 UTC--2004-08-31 UTC
    3 YEAR_3 2004-09-01 UTC--2005-08-31 UTC
    > 
    > ## create new indicator variavble
    > ##
    > ## join datasets (length = 3 x 100)
    > ## indicator for year
    > ## drop NAs
    > ## keep "time" and "active"
    > ## join with times to get back at full dataset
    > ## as duplications, keep only one of them
    > crossing(times, years) %>% 
    +   mutate(active = if_else(time %within% interval, year, NA_character_)) %>% 
    +   drop_na(active) %>% 
    +   select(time, active) %>% 
    +   right_join(times, by = "time") %>% 
    +   distinct() %>% 
    +   select(-duplicated)
    # A tibble: 100 x 2
       time       active
             
     1 2003-06-11 YEAR_1
     2 2004-08-11 YEAR_2
     3 2004-06-03 YEAR_2
     4 2004-01-20 YEAR_2
     5 2005-02-25 YEAR_3
     6 2003-01-07 YEAR_1
     7 2003-11-19 YEAR_2
     8 2003-03-12 YEAR_1
     9 2003-12-29 YEAR_2
    10 2003-03-26 YEAR_1
    # ... with 90 more rows
    

提交回复
热议问题