问题
Im working with a huge data frame with structure similar to the followings. I use output_reg to store slope and intercept for each treatment but I need to add r.squared for each lm (y~x) and store it in another column besides the other two. Any hint on that?
library(plyr)
field <- c('t1','t1','t1', 't2', 't2','t2', 't3', 't3','t3')
predictor <- c(4.2, 5.3, 5.4,6, 7,8.5,9, 10.1,11)
response <- c(5.1, 5.1, 2.4,6.1, 7.7,5.5,1.99, 5.42,2.5)
my_df <- data.frame(field, predictor, response, stringsAsFactors = F)
output_reg<-list()
B<-(unique(my_df$field))
for (i in 1:length(B)) {
index <- my_df[my_df$field==B[i],]
x<- index$predictor
y<- index$response
output_reg[[i]] <- lm (y ~ x) # gets estimates for each field
}
Thanks
回答1:
r.squared
can be accessed via the summary of the model, try this:
m <- lm(y ~ x)
rs <- summary(m)$r.squared
The summary object of the linear regression result contains almost everything you need:
output_reg<-list()
B<-(unique(my_df$field))
for (i in 1:length(B)) {
index <- my_df[my_df$field==B[i],]
x<- index$predictor
y<- index$response
m <- lm (y ~ x)
s <- summary(m) # get the summary of the model
# extract every thing you need from the summary object
output_reg[[i]] <- c(s$coefficients[, 'Estimate'], r.squared = s$r.squared)
}
output_reg
#[[1]]
#(Intercept) x r.squared
# 10.7537594 -1.3195489 0.3176692
#[[2]]
#(Intercept) x r.squared
# 8.8473684 -0.3368421 0.1389040
#[[3]]
#(Intercept) x r.squared
#-0.30500000 0.35963455 0.03788593
To bind the result together:
do.call(rbind, output_reg)
# (Intercept) x r.squared
# [1,] 10.753759 -1.3195489 0.31766917
# [2,] 8.847368 -0.3368421 0.13890396
# [3,] -0.305000 0.3596346 0.03788593
回答2:
Check-out the broom package and sprinkle in some dplyr
(see this vignette):
library(broom)
library(dplyr)
my_df %>%
group_by(field) %>%
do(glance(lm(predictor ~ response, data = .))) #also see do(tidy(...))
# field r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <int>
# 1 t1 0.31766917 -0.3646617 0.7778175 0.46556474 0.6188153 2 -1.855107 9.710214 7.006051 0.605000 1
# 2 t2 0.13890396 -0.7221921 1.6513038 0.16131065 0.7568653 2 -4.113593 14.227185 11.523022 2.726804 1
# 3 t3 0.03788593 -0.9242281 1.3894755 0.03937779 0.8752903 2 -3.595676 13.191352 10.487189 1.930642 1
Alternatively, save the regressions first:
regressions <- my_df %>% group_by(field) %>% do(fit = lm(predictor ~ response, data = .))
regressions %>% tidy(fit)
regressions %>% glance(fit)
回答3:
You can do the following using purrr
require(purrr)
my_df %>%
slice_rows("field") %>%
by_slice(partial(lm, predictor ~ response), .labels = FALSE) %>%
flatten %>%
map(~c(coef(.), r.squared=summary(.)$r.squared))
Which gives you:
[[1]]
(Intercept) response r.squared
5.9777778 -0.2407407 0.3176692
[[2]]
(Intercept) response r.squared
9.8195876 -0.4123711 0.1389040
[[3]]
(Intercept) response r.squared
9.68534163 0.10534562 0.03788593
If you want a data.frame back instead use this as last line:
map_df(~as.data.frame(t(c(coef(.), r.squared=summary(.)$r.squared))))
回答4:
You can create a data frame with model stats like this:
model_stats <- data.frame(model$coefficients)
model_stats <- rbind(model_stats, r.sq = summary(model)$r.squared)
来源:https://stackoverflow.com/questions/39354532/how-to-get-r-squared-for-each-regression