R: Calculate moving maximum slope by week accounting for factors

亡梦爱人 提交于 2020-01-03 10:54:52

问题


I have a data.frame that includes heating degree day (HDD) below.

structure(list(WinterID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L), .Label = c("2002", "2002_2003", "2003", "2003_2004", 
"2004", "2004_2005", "2005", "2005_2006", "2006", "2006_2007", 
"2007", "2007_2008", "2008"), class = "factor"), Date = structure(c(11968, 
11969, 11970, 11971, 11972, 11973, 11974, 11975, 11976, 11977, 
11978, 11979, 11980, 11981, 11982, 11983, 11984, 11985, 11986, 
11987, 11988, 11989, 11990, 11991, 11992, 11993, 11994, 11995, 
11996, 11997, 11998, 11999, 12000, 12001, 12002, 12003, 12004, 
12005, 12006, 12007, 12008, 12009, 12010, 12011, 12012, 12013, 
12014, 12015, 12016, 12017, 12018, 12019, 12020, 12021, 12022, 
12023, 12024, 12025, 12026, 12027, 12028, 12029, 12030, 12031, 
12032, 12033, 12034, 12035, 12036, 12037, 12038, 12039, 12040, 
12041, 12042, 12043, 12044, 12045, 12046, 12047, 12048, 12049, 
12050, 12051, 12052, 12053, 12054, 12055, 12056, 12057, 12058, 
12059, 12060, 12061, 12062, 12063, 12064, 12065, 12066, 12067, 
12068, 12069, 12070, 12071, 12072, 12073, 12074, 12075, 12076, 
12077, 12078, 12079, 12080, 12081, 12082, 12083, 12084, 12085, 
12086, 12087, 12088, 12089, 12090, 12091, 12092, 12093, 12094, 
12095, 12096, 12097, 12098, 12099, 12100, 12101, 12102, 12103, 
12104, 12105, 12106, 12107, 12108, 12109, 12110, 12111, 12112, 
12113, 12114, 12115, 12116, 12117, 12118, 12119, 12120, 12121, 
12122, 12123, 12124, 12125, 12126, 12127, 12128, 12129, 12130, 
12131, 12132, 12133, 12134, 12135, 12136, 12137, 12138, 12139, 
12140, 12141, 12142, 12010, 12011, 12014, 12015, 12017, 12023, 
12024, 12025, 12026, 12027, 12028, 12029, 12030, 12042, 12070, 
12071, 12075, 12076, 12077, 12078, 12079, 12080, 12082, 12083, 
12084), class = "Date"), SiteID = structure(c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L), .Label = "NW_SB", class = "factor"), SubstrateConcat = structure(c(2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("B_A", "B_B", "B_E"), class = "factor"), 
    HDD = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0.246666666666667, 7.12666666666667, 10.6133333333333, 
    2.96666666666667, 0, 0.0933333333333337, 7.31333333333334, 
    10.7133333333333, 6.20000000000001, 2.70666666666667, 6.20000000000001, 
    3.88666666666667, 16.5866666666667, 28.3933333333333, 12.98, 
    21.6133333333333, 19.14, 12.6666666666667, 7.52, 3.33333333333334, 
    18.2933333333333, 4.14666666666667, 2.17333333333334, 26.08, 
    1.38, 7.48000000000001, 36.5733333333333, 53.4666666666667, 
    98.4533333333333, 109.093333333333, 104.14, 80.2466666666667, 
    47.0333333333333, 14.7133333333333, 15.7266666666667, 21.1066666666667, 
    5.07333333333334, 0.613333333333334, 6.18000000000001, 29.5666666666667, 
    45.5333333333333, 59.5666666666667, 91.44, 85.38, 51.1, 25.9666666666667, 
    14.8266666666667, 34.48, 79.16, 90.08, 66.3533333333333, 
    75.14, 97.1733333333333, 83.3066666666667, 50.0133333333333, 
    37.2733333333333, 88.9133333333334, 101.926666666667, 100.56, 
    99.2933333333334, 97.66, 89.6466666666667, 110.613333333333, 
    79.1466666666667, 92.6066666666667, 71.7133333333333, 31.32, 
    27.02, 39.02, 98.14, 62.5866666666667, 46.7933333333333, 
    47.5133333333333, 48.3666666666667, 25.5333333333333, 13.6, 
    17.9133333333333, 14.16, 7.98666666666667, 3.44, 1.86666666666667, 
    12.66, 0, 7.09333333333334, 21.3266666666667, 40.52, 18.8466666666667, 
    37.8466666666667, 33.42, 33.7133333333333, 15.6133333333333, 
    0.720000000000001, 2.31333333333334, 12.3066666666667, 8.48666666666667, 
    2.86, 0, 0, 0, 6.98666666666667, 6.67333333333334, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6.58000000000001, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13.42, 30.5266666666667, 
    1.12, 28.5066666666667, 6.82666666666667, 10.3933333333333, 
    3.18, 11.0466666666667, 0, 0, 0)), .Names = c("WinterID", 
"Date", "SiteID", "SubstrateConcat", "HDD"), row.names = c(NA, 
200L), class = "data.frame")

I'm trying to calculate the moving maximum slope over 7 days beginning on 4 November of each year without using a loop. This moving maximum slope needs to account for WinterID, SiteID, and SubstrateConcat.

For clarification, the calculation I'm trying to obtain is this:

Slope=(max-min)/7, where:
Max= (i-3)+(i-2)+(i-1)+i+(i+1)+(i+2)+(i+3) 
Min= (i-3)

(((i-3)+(i-2)+(i-1)+i+(i+1)+(i+2)+(i+3)) - (i-3))/7

So, using a real example starting with 2002-11-19 as i:

(0+0.24+7.13+10.61+2.97+0+0.97) - 0)/7 = 3.13

I tried using zoo package rollmean, however, I could not figure out how to account for WinterID, SiteID, and SubstrateConcat. This gave me an "order.by" error where my Date values were not unique, since I have dates with different SubstrateConcat and WinterID criteria. As I enter more data into the database, there will eventually be dates with multiple SiteID criteria as well.

I thought maybe xts, TTR and ROC would be what I could use as in this question: Maximum slope for a given interval each day. But again, I don't understand how to specify the multiple group factors, as well as going three days forward and three days back as in align=center with rollmean.

Will someone please point me in the right direction here? Will one of the above functions combined with ddply work?

Thank you!

EDITED to include the answer after the answer supplied by @eddi.

dt <- data.table(df)
dt[, MaxSlope := if(length(HDD)<7) {rep(NA_real_, length(HDD))} else {filter(HDD, c(1,1,1,1,1,1,0)/7)}, by=list(Winter, Site, Substrate)]

This code works perfectly for dates that are continuous. Can anyone recommend how to tweak this code for data that has missing dates? For instance, I have:

   Date  Temp 
 Nov 21  14 
 Nov 23  10 
 Nov 24  12 
 Nov 27  11 
 Nov 28  7 
 Nov 29  9 
 Nov 30  10 
 Dec 01  12 
 Dec 02  8  
 Dec 03  7

I don't want the Max Slope calculated for Nov 21, Nov 23 and Nov 24 because there isn't consecutive data for the calculation. Instead, I want "NA" inserted. Can the existing code above, be modified to accommodate this?


回答1:


Sounds like you need filter (or you could also use one of the rolling mean/sum functions). And the grouping part is easiest to do with data.table:

library(data.table)
dt = data.table(your_df)

dt[, filter(HDD, c(1,1,1,1,1,1,0))/7,
     by = list(WinterID, SiteID, SubstrateConcat)]



回答2:


I couldn't get working solution with ddply, though I didn't spend much time debugging. Here's a solution using base functions (assuming your object is named hdd).

# split your object into groups
shdd <- split(hdd, hdd[,c("WinterID","SiteID","SubstrateConcat")], drop=TRUE)
# create a function to apply to each group
f <- function(d) transform(d, MaxSlopeHDD=rollmax(c(NA,diff(d$HDD)),7,fill=NA))
# apply the function to each group and rbind the results together
shdd <- do.call(rbind, lapply(shdd, f))


来源:https://stackoverflow.com/questions/18624915/r-calculate-moving-maximum-slope-by-week-accounting-for-factors

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!