Find nearest cities from the data frame to the specific location

谁说我不能喝 提交于 2019-12-02 02:25:31

Here is one idea. dataframe2 is the final output. The Near_City column shows the top three closest cities for each city in the city column.

library(dplyr)
library(sp)
library(rgdal)
library(sf)

# Create example data frame
dataframe<-data.frame(long=c("-106.61291","-81.97224","-84.42770","-72.68604","-97.60056","-104.70261"),
                      lat=c("35.04333","33.37378","33.64073","41.93887","35.39305","38.80171"),
                      state=c("NM","GA","GA","TX","OK","CO"),
                      city=c("Albuquerque","Augusta","Atlanta","Windsor Locks","Oklahoma City","Colarado Springs"),
                      stringsAsFactors = FALSE
)

# Create spatial point data frame object
dataframe_sp <- dataframe %>%
  mutate(long = as.numeric(long), lat = as.numeric(lat))
coordinates(dataframe_sp) <- ~long + lat

# Convert to sf object
dataframe_sf <- st_as_sf(dataframe_sp)

# Set projection
st_crs(dataframe_sf) <- 4326

# Calculate the distance
dist_m <- st_distance(dataframe_sf, dataframe_sf)

# Select the closet three cities
# Remove the first row, and then select the first three rows
index <- apply(dist_m, 1, order)
index <- index[2:nrow(index), ]
index <- index[1:3, ]

# Rep each city by three
dataframe2 <- dataframe[rep(1:nrow(dataframe), each = 3), ]

# Process the dataframe based on index, store the results in Near_City column
dataframe2$Near_City <- dataframe[as.vector(index), ]$city

Update

We can further create the output the OP wants.

dataframe3 <- dataframe[as.vector(index), ]
dataframe3$TargetCity <- dataframe2$city

nearest_city_list <- split(dataframe3, f = dataframe3$TargetCity)

Now each "Target City" is an element on the list nearest_city_list. To Access the data, we can access the list element using the target city name. Here is an example pulling out the results of Albuquerque:

nearest_city_list[["Albuquerque"]]
        long      lat state             city  TargetCity
6 -104.70261 38.80171    CO Colarado Springs Albuquerque
5  -97.60056 35.39305    OK    Oklahoma City Albuquerque
3  -84.42770 33.64073    GA          Atlanta Albuquerque
user_123

This might be a little slow with all your data but it does the trick

dataframe<-data.frame(long=as.numeric(c("-106.61291","-81.97224","-84.42770","-72.68604","-97.60056","-104.70261")),
                  lat=as.numeric(c("35.04333","33.37378","33.64073","41.93887","35.39305","38.80171")),
                  state=c("NM","GA","GA","TX","OK","CO"),
                  city=c("Albuquerque","Augusta","Atlanta","Windsor Locks","Oklahoma City","Colarado Springs"))

library(sp)
library(rgeos)


coordinates(dataframe) <- ~long+lat
dist_cities <- gDistance(dataframe, byid=T)

dist_cities_rank<-data.frame()
for(i in seq(1,dim(dist_cities)[1])){
   dist_cities_rank<-rbind(dist_cities_rank,rank(as.numeric(dist_cities[i,])))
}

three_close_cities<-list()
for(i in seq(1,dim(dataframe)[1])){

   three_close_cities[[i]]<-
   list(test_city=dataframe[i,],cbind(dataframe[which(dist_cities_rank[i,]<=4&dist_cities_rank[i,]!=1),],
                                                          dist_cities[i,which(dist_cities_rank[i,]<=4&dist_cities_rank[i,]!=1)]))
}

The following should work for you

I made a distance function that accepts x (longitude of current row in dataframe), y (latitude of current row in dataframe), and dataframe. It returns the top 2 nearest cities (excluding the target city)

 dist <- function(xi, yi, z) {
              z <- z %>% 
                     mutate(dist = sqrt((as.double(as.character(z$long)) - as.double(as.character(xi)))^2 + (as.double(as.character(z$lat)) - as.double(as.character(yi)))^2)) %>%
                     arrange(dist) %>%            # distance
                     slice(2:3)                   # top 2 nearest cities

              return(z)
         }

tidyverse solution

 library(tidyverse)
 mod <- dataframe %>%
          mutate(copylong = long, copylat = lat) %>%     # make copy of longitude and latitude to nest
          nest(copylong, copylat) %>%                    # nest copy
          mutate(data = map(data, ~ dist(.x$copylong, .x$copylat, dataframe)))

To save only the nearest cities as a separate data frame

 desired <- map_df(1:nrow(mod), ~ mod$data[.x][[1]])

Output

         long      lat  state             city      dist
 1 -104.70261 38.80171     CO Colarado Springs  4.216001
 2  -97.60056 35.39305     OK    Oklahoma City  9.019133
 3  -84.42770 33.64073     GA          Atlanta  2.469928
 4  -72.68604 41.93887     TX    Windsor Locks 12.633063
 5  -81.97224 33.37378     GA          Augusta  2.469928
 6  -97.60056 35.39305     OK    Oklahoma City 13.288900
 # etc

Extra

If you want to keep the original database and the nearest cities

 mod <- dataframe %>%
          mutate(copylong = long, copylat = lat) %>%     # make copy of longitude and latitude to nest
          nest(copylong, copylat) %>%                    # nest copy
          mutate(data = map(data, ~ dist(.x$copylong, .x$copylat, dataframe))) %>%
          unnest(data)
Extra output
         long      lat  state             city      long1     lat1 state1            city1      dist
 1 -106.61291 35.04333     NM      Albuquerque -104.70261 38.80171     CO Colarado Springs  4.216001
 2 -106.61291 35.04333     NM      Albuquerque  -97.60056 35.39305     OK    Oklahoma City  9.019133
 3  -81.97224 33.37378     GA          Augusta  -84.42770 33.64073     GA          Atlanta  2.469928
 4  -81.97224 33.37378     GA          Augusta  -72.68604 41.93887     TX    Windsor Locks 12.633063

Split into named list

 L <- split(mod, mod$city)
 names(L) <- dataframe$city
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!