Double for-loop operation in R (with an example)

后端 未结 5 840
無奈伤痛
無奈伤痛 2021-01-06 00:02

Please look at the following small working example:

#### Pseudo data
nobs1 <- 4000
nobs2 <- 5000
mylon1 <- runif(nobs1, min=0, max=1)-76
mylat1 <         


        
5条回答
  •  甜味超标
    2021-01-06 00:27

    Here's an option that decreases the runtime to ~2 seconds on my machine because part of it is vectorized.

    A direct comparison with the original solution follows.

    Test data:

    nobs1 <- 4000
    nobs2 <- 5000
    mylon1 <- runif(nobs1, min=0, max=1)-76
    mylat1 <- runif(nobs1, min=0, max=1)+37
    mylon2 <- runif(nobs2, min=0, max=1)-76
    mylat2 <- runif(nobs2, min=0, max=1)+37
    

    Original solution:

    #### define a distance function
    thedistance <- function(lon1, lat1, lon2, lat2) {
      R <- 6371 # Earth mean radius [km]
      delta.lon <- (lon2 - lon1)
      delta.lat <- (lat2 - lat1)
      a <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) * sin(delta.lon/2)^2
      c <- 2 * asin(min(1,sqrt(a)))
      d = R * c
      return(d)
    }
    
    ptm <- proc.time()
    
    #### Calculate distances between locations
    # Initiate the resulting distance vector
    ndistance <- nobs1*nobs2 # The number of distances
    mydistance <- vector(mode = "numeric", length = ndistance)
    
    k=1
    for (i in 1:nobs1) {
      for (j in 1:nobs2) {
        mydistance[k] = thedistance(mylon1[i],mylat1[i],mylon2[j],mylat2[j])
        k=k+1
      }
    }
    
    proc.time() - ptm
       User      System     elapsed 
    148.243       0.681     148.901 
    

    My approach:

    # modified (vectorized) distance function:
    thedistance2 <- function(lon1, lat1, lon2, lat2) {
      R <- 6371 # Earth mean radius [km]
      delta.lon <- (lon2 - lon1)
      delta.lat <- (lat2 - lat1)
      a <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) * sin(delta.lon/2)^2
      c <- 2 * asin(pmin(1,sqrt(a)))   # pmin instead of min
      d = R * c
      return(d)
    }
    
    ptm2 <- proc.time()
    
    lst <- vector("list", length = nobs1)
    
    for (i in seq_len(nobs1)) {
        lst[[i]] = thedistance2(mylon1[i],mylat1[i],mylon2,mylat2)
    }
    
    res <- unlist(lst)
    
    proc.time() - ptm2
       User      System     elapsed
      1.988       0.331       2.319 
    

    Are the results all equal?

    all.equal(mydistance, res)
    #[1] TRUE
    

提交回复
热议问题