I have this dataframe
CC.Number Date Time Accident.Type Location.1
1 12T008826 07/01/2012 1630 PD (39.26699, -76.560642)
2
separate from tidyr also works
library(tidyr)
# Sub out the parentheses
df$Location.1 <- gsub("[()]", "", df$Location.1)
separate(df, col = Location.1, into = c("lat","long"), sep = ",")
# CC.Number Date Time Accident.Type lat long
#1 12T008826 07/01/2012 1630 PD 39.26699 -76.560642
#2 12L005385 07/02/2012 1229 PD 39.000549 -76.399312
#3 12L005388 07/02/2012 1229 PD 39.00058 -76.399267
#4 12T008851 07/02/2012 445 PI 39.26367 -76.56648
#5 12T008858 07/02/2012 802 PD 39.240862 -76.599017
#6 12T008860 07/02/2012 832 PD 39.27022 -76.63926
We can use extract from tidyr by capturing as two groups with only the numeric elements with dots and discard the rest in 'Location.1'
library(tidyr)
df1 %>%
extract(Location.1, into = c('alt', 'lng'), "\\(([0-9.]+),\\s+(-*[0-9.]+).")
# CC.Number Date Time Accident.Type alt lng
#1 12T008826 07/01/2012 1630 PD 39.26699 -76.560642
#2 12L005385 07/02/2012 1229 PD 39.000549 -76.399312
#3 12L005388 07/02/2012 1229 PD 39.00058 -76.399267
#4 12T008851 07/02/2012 445 PI 39.26367 -76.56648
#5 12T008858 07/02/2012 802 PD 39.240862 -76.599017
#6 12T008860 07/02/2012 832 PD 39.27022 -76.63926
In base you can use trimws to remove () and read.table to split at ,.
cbind(md[1:4], read.table(sep=",", text=trimws(md$Location.1, whitespace = "[ ()]"),
col.names=c("alt", "lng")))
# CC.Number Date Time Accident.Type alt lng
#1 12T008826 07/01/2012 1630 PD 39.26699 -76.56064
#2 12L005385 07/02/2012 1229 PD 39.00055 -76.39931
#3 12L005388 07/02/2012 1229 PD 39.00058 -76.39927
#4 12T008851 07/02/2012 445 PI 39.26367 -76.56648
#5 12T008858 07/02/2012 802 PD 39.24086 -76.59902
#6 12T008860 07/02/2012 832 PD 39.27022 -76.63926
Data:
md <- structure(list(CC.Number = c("12T008826", "12L005385", "12L005388",
"12T008851", "12T008858", "12T008860"), Date = c(" 07/01/2012",
" 07/02/2012", " 07/02/2012", " 07/02/2012", " 07/02/2012", " 07/02/2012"
), Time = c(1630L, 1229L, 1229L, 445L, 802L, 832L), Accident.Type = c(" PD",
" PD", " PD", " PI", " PD",
" PD"), Location.1 = c(" (39.26699, -76.560642)",
" (39.000549, -76.399312)", " (39.00058, -76.399267)", " (39.26367, -76.56648)",
" (39.240862, -76.599017)", " (39.27022, -76.63926)")), class = "data.frame", row.names = c(NA,
-6L))
You can do like this also, assuming dat1 is your original dataset name, we can use strsplit and gsub. First we replace commas and parenthesis with nothing using gsub and then using strsplit to split the values by spaces:
df1 <- setNames(data.frame(do.call("rbind",strsplit(gsub("\\(|\\)|,","",dat1$Location.1),split=" "))),c("Lat","Long"))
df2 <- data.frame(cbind(dat1[,1:(length(dat1)-1)],df1))
# CC.Number Date Time Accident.Type Lat Long
# 1 12T008826 07/01/12 1630 PD 39.26699 -76.560642
# 2 12L005385 07/02/12 1229 PD 39.000549 -76.399312
# 3 12L005388 07/02/12 1229 PD 39.00058 -76.399267
# 4 12T008851 07/02/12 445 PI 39.26367 -76.56648
# 5 12T008858 07/02/12 802 PD 39.240862 -76.599017
# 6 12T008860 07/02/12 832 PD 39.27022 -76.63926