问题
Below dataframe contains the information about the lat, long , state and city. I want to find the three nearest cities for every city given in the dataframe. For example, from the below dataframe, Oklahoma city and Colarado SPringd nearest to Albuquerque, So three nearest city to Albuquerque should be saved in other dataframe named nearest_AL(I don't know how to get this result, that'y I tried to gave an idea by creating a data frame).
dataframe<-data.frame(long=c("-106.61291","-81.97224","-84.42770","-72.68604","-97.60056","-104.70261"),
lat=c("35.04333","33.37378","33.64073","41.93887","35.39305","38.80171"),
state=c("NM","GA","GA","TX","OK","CO"),
city=c("Albuquerque","Augusta","Atlanta","Windsor Locks","Oklahoma City","Colarado Springs")
)
nearest_Al<-data.frame(long=c("-97.60056","-104.70261"),
lat=c("35.39305","38.80171"),
state=c("OK","CO"),
city=c("Oklahoma City","Colarado Springs")
)
This same thing I have to perform on the dataframe which contains rows 500k and around 100 locations.
Thanks in advance!
回答1:
Here is one idea. dataframe2 is the final output. The Near_City column shows the top three closest cities for each city in the city column.
library(dplyr)
library(sp)
library(rgdal)
library(sf)
# Create example data frame
dataframe<-data.frame(long=c("-106.61291","-81.97224","-84.42770","-72.68604","-97.60056","-104.70261"),
lat=c("35.04333","33.37378","33.64073","41.93887","35.39305","38.80171"),
state=c("NM","GA","GA","TX","OK","CO"),
city=c("Albuquerque","Augusta","Atlanta","Windsor Locks","Oklahoma City","Colarado Springs"),
stringsAsFactors = FALSE
)
# Create spatial point data frame object
dataframe_sp <- dataframe %>%
mutate(long = as.numeric(long), lat = as.numeric(lat))
coordinates(dataframe_sp) <- ~long + lat
# Convert to sf object
dataframe_sf <- st_as_sf(dataframe_sp)
# Set projection
st_crs(dataframe_sf) <- 4326
# Calculate the distance
dist_m <- st_distance(dataframe_sf, dataframe_sf)
# Select the closet three cities
# Remove the first row, and then select the first three rows
index <- apply(dist_m, 1, order)
index <- index[2:nrow(index), ]
index <- index[1:3, ]
# Rep each city by three
dataframe2 <- dataframe[rep(1:nrow(dataframe), each = 3), ]
# Process the dataframe based on index, store the results in Near_City column
dataframe2$Near_City <- dataframe[as.vector(index), ]$city
Update
We can further create the output the OP wants.
dataframe3 <- dataframe[as.vector(index), ]
dataframe3$TargetCity <- dataframe2$city
nearest_city_list <- split(dataframe3, f = dataframe3$TargetCity)
Now each "Target City" is an element on the list nearest_city_list. To Access the data, we can access the list element using the target city name. Here is an example pulling out the results of Albuquerque:
nearest_city_list[["Albuquerque"]]
long lat state city TargetCity
6 -104.70261 38.80171 CO Colarado Springs Albuquerque
5 -97.60056 35.39305 OK Oklahoma City Albuquerque
3 -84.42770 33.64073 GA Atlanta Albuquerque
回答2:
This might be a little slow with all your data but it does the trick
dataframe<-data.frame(long=as.numeric(c("-106.61291","-81.97224","-84.42770","-72.68604","-97.60056","-104.70261")),
lat=as.numeric(c("35.04333","33.37378","33.64073","41.93887","35.39305","38.80171")),
state=c("NM","GA","GA","TX","OK","CO"),
city=c("Albuquerque","Augusta","Atlanta","Windsor Locks","Oklahoma City","Colarado Springs"))
library(sp)
library(rgeos)
coordinates(dataframe) <- ~long+lat
dist_cities <- gDistance(dataframe, byid=T)
dist_cities_rank<-data.frame()
for(i in seq(1,dim(dist_cities)[1])){
dist_cities_rank<-rbind(dist_cities_rank,rank(as.numeric(dist_cities[i,])))
}
three_close_cities<-list()
for(i in seq(1,dim(dataframe)[1])){
three_close_cities[[i]]<-
list(test_city=dataframe[i,],cbind(dataframe[which(dist_cities_rank[i,]<=4&dist_cities_rank[i,]!=1),],
dist_cities[i,which(dist_cities_rank[i,]<=4&dist_cities_rank[i,]!=1)]))
}
回答3:
The following should work for you
I made a distance function that accepts x (longitude of current row in dataframe), y (latitude of current row in dataframe), and dataframe. It returns the top 2 nearest cities (excluding the target city)
dist <- function(xi, yi, z) {
z <- z %>%
mutate(dist = sqrt((as.double(as.character(z$long)) - as.double(as.character(xi)))^2 + (as.double(as.character(z$lat)) - as.double(as.character(yi)))^2)) %>%
arrange(dist) %>% # distance
slice(2:3) # top 2 nearest cities
return(z)
}
tidyverse solution
library(tidyverse)
mod <- dataframe %>%
mutate(copylong = long, copylat = lat) %>% # make copy of longitude and latitude to nest
nest(copylong, copylat) %>% # nest copy
mutate(data = map(data, ~ dist(.x$copylong, .x$copylat, dataframe)))
To save only the nearest cities as a separate data frame
desired <- map_df(1:nrow(mod), ~ mod$data[.x][[1]])
Output
long lat state city dist
1 -104.70261 38.80171 CO Colarado Springs 4.216001
2 -97.60056 35.39305 OK Oklahoma City 9.019133
3 -84.42770 33.64073 GA Atlanta 2.469928
4 -72.68604 41.93887 TX Windsor Locks 12.633063
5 -81.97224 33.37378 GA Augusta 2.469928
6 -97.60056 35.39305 OK Oklahoma City 13.288900
# etc
Extra
If you want to keep the original database and the nearest cities
mod <- dataframe %>%
mutate(copylong = long, copylat = lat) %>% # make copy of longitude and latitude to nest
nest(copylong, copylat) %>% # nest copy
mutate(data = map(data, ~ dist(.x$copylong, .x$copylat, dataframe))) %>%
unnest(data)
Extra output
long lat state city long1 lat1 state1 city1 dist
1 -106.61291 35.04333 NM Albuquerque -104.70261 38.80171 CO Colarado Springs 4.216001
2 -106.61291 35.04333 NM Albuquerque -97.60056 35.39305 OK Oklahoma City 9.019133
3 -81.97224 33.37378 GA Augusta -84.42770 33.64073 GA Atlanta 2.469928
4 -81.97224 33.37378 GA Augusta -72.68604 41.93887 TX Windsor Locks 12.633063
Split into named list
L <- split(mod, mod$city)
names(L) <- dataframe$city
来源:https://stackoverflow.com/questions/45576214/find-nearest-cities-from-the-data-frame-to-the-specific-location