Assume I have data (t,y)
, where I expect a linear dependency y(t)
. Furthermore, there exist attributes to each observation par1, par2, par3
The LASSO can come pretty close (although it identifies still too many effects):
#I assume these are supposed to be factors:
mydata$par1 <- factor(mydata$par1)
mydata$par2 <- factor(mydata$par2)
mydata$par3 <- factor(mydata$par3)
#create model matrix, remove intercept since glmnet adds it
x <- model.matrix(y ~ (par1 * par2 * par3) * t, data = mydata)[,-1]
#cross-validated LASSO
library(glmnet)
set.seed(42)
fit <- cv.glmnet(x, mydata$y, intercept = TRUE, nfolds = 10, alpha = 1)
plot(fit)
coef <- as.matrix(coef(fit, s = "lambda.1se"))
coef[coef != 0,]
#(Intercept) par230 t par12:t par230:t par3300:t
# 0.47542479 -0.27612966 0.75497711 -0.42493030 -0.15044371 0.03033057
#The groups:
mydata$g <- factor((mydata$par2 == 30) + 10 * (mydata$par1 == 2) + 100 * (mydata$par3 == 300))
mydata$pred.1se <- predict(fit, newx = x, s = "lambda.1se")
library(ggplot2)
ggplot(mydata, aes(x = t, color = g)) +
geom_point(aes(y = y)) +
geom_line(aes(y = pred.1se))
You can then calculate the desired intercepts and slopes from the coefficients.