问题
I've been working on a project where the goal is to take a two-column CSV of street addresses and zip codes, read it into R, then perform a Zillow query for each one (GetSearchResults, specifically), parse the output, and store the parsed output in a dataframe to be written to a CSV (and placed right next to the existing data).
caveat: I can only call one address/zip combo at a time through the zillow API, so anything that violates that is off the table immediately.
As of this point, I have about 85% of the work done. I have i) a bit of code that can, one-by-one, query those address/zip combos from a dataframe as well as ii) a tentative way of putting that input back into a dataframe,
library(ZillowR)
library(rvest)
library(dplyr)
library(DT)
# this commented section is what I would use instead of creating the dataframe manually below, just for clarity
# data1 = read.csv('Addresses.csv', header = F, colClasses = 'character')$V1
# data2 = read.csv('Addresses.csv', header = F, colClasses = 'character')$V2
# data = data.frame(street = data1, city.state = as.character(data2))
# per comments, should add a "stringsAsFactors = FALSE" in the dataframe part
data = data.frame(
street = c('77 Huntington Ave',
'85 Prospect St',
'219 Lincoln St'),
city.state = c(rep('01752', 3)))
get.zillowdata = function(df, address, city.state){
require(ZillowR)
set_zillow_web_service_id('API KEY')
results = do.call(rbind, lapply(1:nrow(df), function(i){
z = tryCatch({
zdata = GetDeepSearchResults(address = df$street[i],
citystatezip = df$city.state[i],
zws_id = getOption("ZillowR-zws_id"),
url = "http://www.zillow.com/webservice/GetDeepSearchResults.htm")
return(zdata)
},
error = function(cond) {
message(paste("No Data Available:", df$street[i], df$city.state[i]))
return(NA) # Choose a return value in case of error
},
warning = function(cond) {
message(paste("Zdata caused a warning:", df$street[i], df$city.state[i]))
return(NA) # Choose a return value in case of warning
},
# print processing message to screen
finally = {
message(paste("Processed Address:", df$street[i], df$city.state[i]))
message(paste(i, "of", nrow(df), 'processed'))
}
)
}))
if(nrow(results)==nrow(df)){
results = cbind(df, results)
print(paste('Original data had', nrow(df), 'rows. Returning a dataframe with', nrow(results),
'rows. Returned dataframe has', sum(is.na(results$amount)), 'missing zdata values.'))
return(results)
}
else(print("Error: nrows(df) do not match nrows(zdata)"))
}
get.zillowdata(data)
`
and also iii) a parser for the XMLnode response that you get when you perform a query through the Zillow API which picks out specific child values (zestimate, square footage, lot size, etc; whatever you specify)
library(ZillowR)
library(XML)
library(RCurl)
set_zillow_web_service_id('API KEY')
output123 = GetDeepSearchResults(address = 'STREET ADDRESS', citystatezip = '0ZIP CODE', zws_id = getOption("ZillowR-zws_id"), url = "http://www.zillow.com/webservice/GetSearchResults.htm")
results <- xmlToList(output123$response[["results"]])
getValRange <- function(x, hilo) {
ifelse(hilo %in% unlist(dimnames(x)), x["text",hilo][[1]], NA)
}
out <- apply(results, MAR=2, function(property) {
zpid <- property$zpid
links <- unlist(property$links)
address <- unlist(property$address)
z <- property$zestimate
zestdf <- list(
amount=ifelse("text" %in% names(z$amount), z$amount$text, NA),
lastupdated=z$"last-updated",
valueChange=ifelse(length(z$valueChange)==0, NA, z$valueChange),
valueLow=getValRange(z$valuationRange, "low"),
valueHigh=getValRange(z$valuationRange, "high"),
percentile=z$percentile)
list(id=zpid, links, address, zestdf)
})
data <- as.data.frame(do.call(rbind, lapply(out, unlist)),
row.names=seq_len(length(out)))
But I'm a little stuck at this point. How should I put these together so that I can include the parsing at the end of the api call part and make sure that both of them get iterated over the full list of addresses/zips? My code right now isn't in any particular order, so feel free to move things around if you decide to tackle this, and if anyone needs additional information, I'm happy to clarify!
Thanks very much in advance.
回答1:
This is a semi simplified answer to your question(I mean I removed some of the information that api provides, such as high low estimates but you can add them back it following the logic for the other provided info in the code below). First loading in the required packages:
ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
#install and load the following packages
packages <- c("ZillowR", "rjson", "httr", "XML")
ipak(packages)
Next set your Zillow API Key:
set_zillow_web_service_id('API KEY')
Next defining a dataset with only addresses, and then defining and adding columns of street address, and zip code to it, so we can use them later with Zillow api:
df <-data.frame("Address" = c("5435 Andrea Boulevard, Sacramento, CA 95842","8434 Walerga Road Apt 421, Antelope, CA 95843"
,"7152 Flanders Way, Sacramento, CA 95842","1076 Edmonton Drive, Sacramento, CA 95833"
,"9906 Burline Street, Sacramento, CA 95827","3634 Sapphire Drive Apt 1, Auburn, CA 95602"),stringsAsFactors = FALSE)
df$StreetCity<-gsub("(.*),.*", "\\1", df$Address)
df$Street<-gsub("(.*),.*", "\\1", df$StreetCity)
df$zip<-substr(df$Address,nchar(df$Address)-4,nchar(df$Address))
df
# Address StreetCity Street zip
# 1 5435 Andrea Boulevard, Sacramento, CA 95842 5435 Andrea Boulevard, Sacramento 5435 Andrea Boulevard 95842
# 2 8434 Walerga Road Apt 421, Antelope, CA 95843 8434 Walerga Road Apt 421, Antelope 8434 Walerga Road Apt 421 95843
# 3 7152 Flanders Way, Sacramento, CA 95842 7152 Flanders Way, Sacramento 7152 Flanders Way 95842
# 4 1076 Edmonton Drive, Sacramento, CA 95833 1076 Edmonton Drive, Sacramento 1076 Edmonton Drive 95833
# 5 9906 Burline Street, Sacramento, CA 95827 9906 Burline Street, Sacramento 9906 Burline Street 95827
# 6 3634 Sapphire Drive Apt 1, Auburn, CA 95602 3634 Sapphire Drive Apt 1, Auburn 3634 Sapphire Drive Apt 1 95602
Now lets hit the API:
getValRange <- function(x, hilo) {
ifelse(hilo %in% unlist(dimnames(x)), x["text",hilo][[1]], NA)
}
Out_df_All <-NULL
Out_df <-NULL
for (i in 1:nrow(df)){
print(i)
xml =GetSearchResults(address = as.character(df[i,]$Street), citystatezip = as.character(df[i,]$zip),rentzestimate = TRUE)
if (!is.null(xml$response[["results"]])){
results <- xmlToList(xml$response[["results"]])
if (is.matrix(results)){
out <- apply(results, MAR=2, function(property) {
zpid <- property$zpid
links <- unlist(property$links)["mapthishome"]
address <- unlist(property$address)
z <-property$zestimate
zestdf <- list(
zestimate=ifelse("text" %in% names(z$amount), z$amount$text, NA)
)
rz <- property$rentzestimate
rentalzestdf <- list(
rentzestimate=ifelse("text" %in% names(rz$amount), rz$amount$text, NA)
)
list(zpid=zpid, links, address, zestdf, rentalzestdf)
}
)
data <- as.data.frame(do.call(rbind, lapply(out, unlist)[1]),row.names=seq_len(length(out)))
data[is.na(data)] <- 0
Out_df<-data.frame("Address"=df[i,"Address"],data)
Out_df_All<-rbind(Out_df,Out_df_All)
}
else {
data<-as.data.frame(do.call(rbind, lapply(xml$response[["results"]][1], unlist)[1]),row.names=seq_len(length(xml$response[["results"]][1])))
data<-data[which(grepl("text.value",colnames(data)))]
colnames(data)<-gsub("children.","",colnames(data))
colnames(data)<-gsub(".text.value","",colnames(data))
colnames(data)<-gsub("address.","",colnames(data))
colnames(data)<-gsub("links.","",colnames(data))
colnames(data)<-gsub(".amount","",colnames(data))
data <- data[,c("zpid","mapthishome","street","zipcode","city","state","latitude","longitude")]
data$zestimate <- ifelse("zestimate" %in% colnames(data), data$zestimate, NA)
data$rentzestimate <- ifelse("rentzestimate" %in% colnames(data), data$rentzestimate, NA)
data[is.na(data)] <- 0
Out_df<-data.frame("Address"=df[i,"Address"],data)
Out_df_All<-rbind(Out_df,Out_df_All)
}
}
}
View(Out_df_All)
来源:https://stackoverflow.com/questions/48456449/how-do-i-iterate-a-zillow-api-call-output-parse-function-in-r-over-a-list-of-a