Add the new regression line but keep the regression lines from previous runs in R

て烟熏妆下的殇ゞ 提交于 2019-12-08 03:31:11

问题


Background

I have a function called TPN (R code is below the picture). When you run this function, it produces two plots (see picture below). The bottom-row plot samples from the top-row plot and then adds a red regression line. Each time you run the TPN function, the bottom-row plot produces a new red-colored regression line.

Question

In the bottom-row plot, I was wondering if there is a way I could KEEP the regression lines from previous runs each time I run the TPN function (see picture below)?

That is, each time that I run a new TPN function the regression line from a previous run is kept in its place (probably in a color other than "red" for distinction purposes), and the new regression line is just added to he bottom-row plot?

############## Input Values #################
 TPN = function(      each.sub.pop.n = 150, 
                      sub.pop.means = 20:10, 
                      predict.range = 10:0, 
                      sub.pop.sd = .75,
                      n.sample = 2   ) {
#############################################
 par( mar = c(2, 4.1, 2.1, 2.1) )

 m = matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
 set.seed(2460986)
 Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')

 y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
 set.seed(NULL)
 x <- rep(predict.range, each = each.sub.pop.n)

 plot(x, y, ylim = range(y)) ## Top-Row Plot


  sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
  sample <- data.frame(y = unlist(sample), 
                 x = as.numeric(rep(names(sample), each = n.sample)))

     x = sample$x  ;  y = sample$y

     plot(x, y, ylim = range(y))  #### BOTTOM-ROW PLOT

     abline(lm(y ~ x), col = 'red') # Regression Line

      }
      ## TEST HERE:
      TPN()

回答1:


It ain't that easy. I made another function and edit the first one as well.

To summarize what I have done:

I made the first function to set par(new = TRUE) at the end of it. Also, set the color for points in the bottom row plot to be white only for formatting. You can get rid of col = 'white', bg = 'white' if you wish.

Then, in the second function top row plot does not get plotted and yaxis won't be added to the bottom row plot from each "test".

Look below:

############## Input Values #################
TPN = function(      each.sub.pop.n = 150, 
                     sub.pop.means = 20:10, 
                     predict.range = 10:0, 
                     sub.pop.sd = .75,
                     n.sample = 2   ) {
  #############################################
  par( mar = c(2, 4.1, 2.1, 2.1) )

  m = matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
  set.seed(2460986)
  Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')

  y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
  set.seed(NULL)
  x <- rep(predict.range, each = each.sub.pop.n)

  par(new = FALSE)
  plot(x, y, ylim = range(y)) ## Top-Row Plot


  sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
  sample <- data.frame(y = unlist(sample), 
                       x = as.numeric(rep(names(sample), each = n.sample)))

  x = sample$x  ;  y = sample$y

  plot(x, y, ylim = range(y), col = 'white', bg = 'white')  #### BOTTOM-ROW PLOT
  abline(lm(y ~ x), col = 'red') # Regression Line
  par(new = TRUE)
}

The second one does not plot the top row one:

############## Input Values #################
TPN2 = function(      each.sub.pop.n = 150, 
                     sub.pop.means = 20:10, 
                     predict.range = 10:0, 
                     sub.pop.sd = .75,
                     n.sample = 2   ) {
  #############################################
  par( mar = c(2, 4.1, 2.1, 2.1) )

  m = matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
  set.seed(2460986)
  Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')

  y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
  set.seed(NULL)
  x <- rep(predict.range, each = each.sub.pop.n)

  #par(new = FALSE)                           #comment-out
  #plot(x, y, ylim = range(y)) ##Top-Row Plot #comment-out


  sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
  sample <- data.frame(y = unlist(sample), 
                       x = as.numeric(rep(names(sample), each = n.sample)))

  x = sample$x  ;  y = sample$y

  plot(x, y, ylim = range(y), axes = FALSE,  col = 'white', bg = 'white') ##BOTTOM-ROW PLOT
  abline(lm(y ~ x), col = 'blue') # Regression Line
  par(new = TRUE)
}

Then your test would be like this:

## TEST HERE:
TPN()

TPN2()
TPN2()
TPN2()

This is the output:




回答2:


A simple way to do what you want is to change your main effect (currently none) to return an accumulation of previous regressions and your side effect (plotting) to loop through these previous regressions (in blue) in addition to the current one (in red).

Another tip: you can use the abline(reg=lm(y~x)) argument and just accumulate the lm objects in a list. It's not necessary to store coefficients and intercepts separately as suggested in the other answer. Keeping the lm objects is also a good idea in case you want to go back and look at average R-squared, etc. -- you couldn't do that using only the coefficients.

Your new function could look like:

TPN.accum <- function(  each.sub.pop.n = 150, 
                        sub.pop.means = 20:10, 
                        predict.range = 10:0, 
                        sub.pop.sd = .75,
                        n.sample = 2,
                        lm.history = list() # the accumulator
                       ){
  par( mar = c(2, 4.1, 2.1, 2.1) )
  m <- matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
  set.seed(2460986)
  Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')

  y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
  set.seed(NULL)
  x <- rep(predict.range, each = each.sub.pop.n)

  plot(x, y, ylim = range(y))               ### Top-Row Plot

  sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
  sample <- data.frame(y = unlist(sample), 
                       x = as.numeric(rep(names(sample), each = n.sample)))

  x <- sample$x  ;  y <- sample$y

  lm.current <- lm(y~x)                     # the current regression

  plot(x, y, ylim = range(y))               ### Bottom-Row Plot

  abline(reg = lm.current, col = 'red')     # plot current regression (red)
  for( i in seq_along(lm.history) ){
    abline(reg=lm.history[[i]], col='blue') # plot any previous regressions (blue)
  }
  return(c(lm.history, list(lm.current)))   # append current regression to accumulator
}

To initialize it and then run it repeatedly, just do something like:

tpn.history <- TPN.accum()
for (i in 1:5) tpn.history <- TPN.accum(lm.history=tpn.history)

And your output will look like:




回答3:


I propose two possibilities:

  1. Use par(mfg) to define on which panel to draw, so that you can add new points or lines on any of the two. For the color, I propose to add options saying if this is the first plot or the last plot of the series.
  2. Store the coefficients of the abline to be used on other plots.

Use par(mfg)

I used some transparent color so that we do not see all superimposition of each iteration. Depending on what you want to achieve, you can modify this.

############## Input Values #################
TPN <- function(each.sub.pop.n = 150, 
                sub.pop.means = 20:10, 
                predict.range = 10:0, 
                sub.pop.sd = .75,
                n.sample = 2,   
                plot = TRUE,
                first = FALSE,
                last = FALSE) {
  #############################################

  if (plot & first) {
    plot.new()
    m <- matrix( c(1, 2), nrow = 2, ncol = 1 ); layout(m)
    par( mar = c(2, 4.1, 2.1, 2.1) )
  }

  set.seed(2460986)
  Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')

  y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
  set.seed(NULL)
  x <- rep(predict.range, each = each.sub.pop.n)

  if (plot) {
    par(mfg = c(1,1)) ## Top-Row Plot  
    if (first) {
      plot(x, y, ylim = range(y), col = "transparent")
    } else if (last) {
      plot(x, y, ylim = range(y))     
    }
  }

  sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
  sample <- data.frame(y = unlist(sample), 
                       x = as.numeric(rep(names(sample), each = n.sample)))

  x = sample$x  ;  y = sample$y

  if (plot) {
    par(mfg = c(2,1))   #### BOTTOM-ROW PLOT
    if (first) {
      plot(x, y, ylim = range(y), col = "transparent")
    }
    if (last) {
      points(x, y)
    }
    abline(lm(y ~ x), col = c('blue', 'red')[(last) + 1]) # Regression Line
  }
}
## TEST HERE:
n <- 10
for (i in 1:n) {
  TPN(first = ifelse(i == 1, TRUE, FALSE), last = ifelse(i == n, TRUE, FALSE))
}

Store the abline coefficients

There is no need of transparent color here because, a new plot is created for each iteration.

############## Input Values #################
TPN <- function(each.sub.pop.n = 150, 
                sub.pop.means = 20:10, 
                predict.range = 10:0, 
                sub.pop.sd = .75,
                n.sample = 2,   
                plot = TRUE,
                coefs = FALSE,
                coefsup = NULL) {
  #############################################
  if (plot) {
    m <- matrix( c(1, 2), nrow = 2, ncol = 1 )
    layout(m)
    par( mar = c(2, 4.1, 2.1, 2.1) )
  }

  set.seed(2460986)
  Vec.rnorm <- Vectorize(function(n, mean, sd) rnorm(n, mean, sd), 'mean')

  y <- c( Vec.rnorm(each.sub.pop.n, sub.pop.means, sub.pop.sd) )
  set.seed(NULL)
  x <- rep(predict.range, each = each.sub.pop.n)

  if (plot) {
    plot(x, y, ylim = range(y))
  }

  sample <- lapply(split(y, x), function(z) sample(z, n.sample, replace = TRUE))
  sample <- data.frame(y = unlist(sample), 
                       x = as.numeric(rep(names(sample), each = n.sample)))

  x = sample$x  ;  y = sample$y

  if (plot) {
    plot(x, y, ylim = range(y))
    # Add the previous lines if exists
    if (!is.null(coefsup)) {
      apply(coefsup, 1, function(x) abline(a = x[1], b = x[2], col = "blue")) 
    }
    abline(lm(y ~ x), col = 'red') # Regression Line
  }
  if (coefs) {return(coef(lm(y ~ x)))}
}
# TEST with coefs
n <- 10
coefsup <- NULL
for (i in 1:n) {
  coefsup <- rbind(coefsup, TPN(coefs = TRUE, coefsup = coefsup))
}

In both cases, the output is what you expect:



来源:https://stackoverflow.com/questions/44033567/add-the-new-regression-line-but-keep-the-regression-lines-from-previous-runs-in

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!