问题
This is an updated (and hopefully simplified) problem to one I posted earlier.
I have a custom function, the purpose of which is to optimize spend over a number of weeks, where spend is allocated to periods of high sales activity.
I'm happy with the function, but need a way to be able to iterate through the data a number of times. I was hoping to use the 'reduce' function to accomplish this but haven't had much luck.
Here is the initial data to feed into the function:
sales <- data.frame(salesx = c(3000, 2250,850,1800,1700,560,58,200,965,1525)
,week = seq(from = 1, to = 10, by = 1)
,uplift = c(0.04)
,slope = c(100))
spend <- data.frame(spend = seq(from = 1, to = 500, by = 1))
datasetfinal <- merge(spend,sales,all=TRUE)
And here is a somewhat simplified version of the function (all the function does is identify the best place to put $500 worth of spend based on sales activity...for each iteration I want to exclude the 'reverse' values from the spend data:
library(dplyr)
library(zoo)
library(data.table)
library(plyr)
library(sqldf)
optimizationfunc <- function(data) {
datasetfinal2 <- data %>% mutate(optimized = salesx*(uplift*(1-exp(-spend/slope))))
datasetfinal2$spend <- with(datasetfinal2, if ("reverse" %in% colnames(datasetfinal2)) spend - reverse else spend)
datasetfinal2 <- with(datasetfinal2, if ("reverse" %in% colnames(datasetfinal2)) within(datasetfinal2, rm(reverse)) else datasetfinal2)
datasetfinal2$optimized2 <- datasetfinal2$optimized/datasetfinal2$spend
datasetfinal2$spend <- ave(datasetfinal2$spend, datasetfinal2$week, FUN = seq_along)
datasetfinal2 <- datasetfinal2 %>% arrange(desc(optimized2))
datasetfinal2$counter <- seq.int(nrow(datasetfinal2))
datasetfinal3 <- datasetfinal2 %>% dplyr::filter(counter <= 500) %>% dplyr::mutate(value = optimized2*spend)
datasetfinal4 <- datasetfinal3 %>% group_by(week) %>% top_n(1, value) %>% dplyr::select(-salesx)
datasetfinal4 <- merge(datasetfinal4[, c('week', 'spend', 'optimized', 'optimized2', 'value')],sales,by="week",all = TRUE)
datasetfinal4[is.na(datasetfinal4)] <- 0
datasetfinal4 <- colwise(na.locf)(datasetfinal4)
#This is a filter I want to exclude from spend in the next run.
#So if it is 20 for week 1 I want to exclude the first $20 of spend.
datasetfinal4$randomfilter <- sample(100, size = nrow(datasetfinal4))
datasetfinal4$difference <- with(datasetfinal4, randomfilter - optimized)
datasetfinal4$difference <- with(datasetfinal4, ifelse(difference < 0, 0, difference))
datasetfinal4$reverse <- with(datasetfinal4, round(-log(1-(difference/salesx/uplift))*slope),1)
datasetfinal4$reverse[is.na(datasetfinal4$reverse)] <- 0
return(datasetfinal4)
}
Let's run the function:
datasetfinal4 <- optimizationfunc(datasetfinal)
Now I want to use the output of the function, to join back to the original data, and filter out 'spend' that is already allocated:
reversefunc <- function(data1, data2) {sqldf("select a.*, b.reverse from data1 a left join data2 b on a.week = b.week") %>% filter(spend > reverse) %>% dplyr::select(-reverse)}
datasetfinal5 <- reversefunc(datasetfinal, datasetfinal4)
This works fine, but I need to repeat the process a number of times (lets say 5) eg.
datasetfinal6 <- optimizationfunc(datasetfinal5)
datasetfinal7 <- reversefunc(datasetfinal5, datasetfinal6)
I was hoping the reduce function would work here but haven't had much luck. If I don't get any bites I'll have a go at simplifying it further.
There is a solution for a simple version of this problem here: R: run function over same dataframe multiple times
UPDATE So based on the answers below and elsewhere, this is pretty much what I want. Seems a little inefficient as running optimizationfunc twice:
iterationFunc <- function(x,...){
optimizedData <- optimizationfunc(x)
finalData <- reversefunc(x, optimizedData)
return(finalData)}
out <- Reduce(iterationFunc, 1:10, init=datasetfinal, accumulate = TRUE)
out2 <- lapply(out, function(x) optimizationfunc(x))
out3 <- lapply(out2, function(x) sum(x$value))
out4 <- ldply(out3, data.frame)
回答1:
require(purrr)
#put data into a list
dfList <- list(datasetfinal,datasetfinal4)
#pass list to reversefunc
finalDF <- dfList %>% reduce(reversefunc)
identical(datasetfinal5,finalDF)
[1] TRUE
I don't think this is really what you're trying to do though. Here's one way of iterating the function, I used your object names, which makes it kind of confusing, but I'm pretty sure it works. Note that datasetfinal5
is being re-written with the new output each time, and for
loop assumes 10 iterations.
iterationFunc <- function(x){
datasetfinal6 <- optimizationfunc(x)
datasetfinal7 <- reversefunc(x, datasetfinal6)
datasetfinal5 <- datasetfinal7
return(datasetfinal5)
}
for (i in 1:10){
iterationFunc(datasetfinal5)
finalData <- datasetfinal5
}
Below with better variable names:
finalData <- datasetfinal4
iterationFunc <- function(x){
optimizedData <- optimizationfunc(x)
finalData <- reversefunc(x, optimizedData)
return(finalData)
}
for (i in 1:10){
iterationFunc(finalData)
}
Try to use variable names that actually give valuable information about the object. Calling everything datasetfinal[1-10] makes it really hard to keep track of what's happening each time.
回答2:
My recommendation is to use a recursion
rf <- function(data, n, threshold) {
if (n <= threshold) {
reverse <- optimizationfunc(data)
new <- reversefunc(data, reverse)
rf(new, n+1, threshold)
} else {
return(data)
}
}
datasetfinalX <- rf(datasetfinal,1,5)
Your individual functions opitimizationfunc
and reversefunc
would still be declared outside of and before rf
---RETURNING ALL REVERSE DFs----
Adding return(reverse)
at the end might work, but I'm not able to test it...let me know if it works?
rf <- function(data, n, threshold) {
if (n <= threshold) {
reverse <- optimizationfunc(data)
new <- reversefunc(data, reverse)
rf(new, n+1, threshold)
} else {
return(data)
}
return(reverse)
}
来源:https://stackoverflow.com/questions/45242969/using-reduce-with-custom-function