How do I iterate a Zillow API call / output parse function in R over a list of addresses and zip codes?

a 夏天 提交于 2020-01-05 05:52:09

问题


I've been working on a project where the goal is to take a two-column CSV of street addresses and zip codes, read it into R, then perform a Zillow query for each one (GetSearchResults, specifically), parse the output, and store the parsed output in a dataframe to be written to a CSV (and placed right next to the existing data).

caveat: I can only call one address/zip combo at a time through the zillow API, so anything that violates that is off the table immediately.

As of this point, I have about 85% of the work done. I have i) a bit of code that can, one-by-one, query those address/zip combos from a dataframe as well as ii) a tentative way of putting that input back into a dataframe,

library(ZillowR)
library(rvest)
library(dplyr)
library(DT)

# this commented section is what I would use instead of creating the dataframe manually below, just for clarity
# data1 = read.csv('Addresses.csv', header = F, colClasses = 'character')$V1
# data2 = read.csv('Addresses.csv', header = F, colClasses = 'character')$V2
# data = data.frame(street = data1, city.state = as.character(data2))
# per comments, should add a "stringsAsFactors = FALSE" in the dataframe part

data = data.frame(
    street = c('77 Huntington Ave',
             '85 Prospect St',
             '219 Lincoln St'),
    city.state = c(rep('01752', 3)))

get.zillowdata = function(df, address, city.state){
    require(ZillowR)
    set_zillow_web_service_id('API KEY')
    results = do.call(rbind, lapply(1:nrow(df), function(i){
        z = tryCatch({
        zdata = GetDeepSearchResults(address = df$street[i],
                  citystatezip = df$city.state[i],
                  zws_id = getOption("ZillowR-zws_id"),
                  url = "http://www.zillow.com/webservice/GetDeepSearchResults.htm")
        return(zdata)
    },

    error = function(cond) {
      message(paste("No Data Available:", df$street[i], df$city.state[i]))
      return(NA) # Choose a return value in case of error
    },

    warning = function(cond) {
      message(paste("Zdata caused a warning:", df$street[i], df$city.state[i]))
      return(NA) # Choose a return value in case of warning
    },
    # print processing message to screen
    finally = {
      message(paste("Processed Address:", df$street[i], df$city.state[i]))
      message(paste(i, "of", nrow(df), 'processed'))
      }
    )
    }))

if(nrow(results)==nrow(df)){
    results = cbind(df, results)

    print(paste('Original data had', nrow(df), 'rows. Returning a dataframe with', nrow(results),
    'rows. Returned dataframe has', sum(is.na(results$amount)), 'missing zdata values.'))

  return(results)
}
    else(print("Error: nrows(df) do not match nrows(zdata)"))
}

get.zillowdata(data)
` 

and also iii) a parser for the XMLnode response that you get when you perform a query through the Zillow API which picks out specific child values (zestimate, square footage, lot size, etc; whatever you specify)

library(ZillowR)
library(XML)
library(RCurl)

set_zillow_web_service_id('API KEY')
output123 = GetDeepSearchResults(address = 'STREET ADDRESS', citystatezip = '0ZIP CODE', zws_id = getOption("ZillowR-zws_id"), url = "http://www.zillow.com/webservice/GetSearchResults.htm")

results <- xmlToList(output123$response[["results"]])

getValRange <- function(x, hilo) {
  ifelse(hilo %in% unlist(dimnames(x)), x["text",hilo][[1]], NA)
}

out <- apply(results, MAR=2, function(property) {
  zpid <- property$zpid
  links <- unlist(property$links)
  address <- unlist(property$address)
  z <- property$zestimate
  zestdf <- list(
    amount=ifelse("text" %in% names(z$amount), z$amount$text, NA),
    lastupdated=z$"last-updated",
    valueChange=ifelse(length(z$valueChange)==0, NA, z$valueChange),
    valueLow=getValRange(z$valuationRange, "low"),
    valueHigh=getValRange(z$valuationRange, "high"),
    percentile=z$percentile)
  list(id=zpid, links, address, zestdf)
})

data <- as.data.frame(do.call(rbind, lapply(out, unlist)),
                      row.names=seq_len(length(out)))

But I'm a little stuck at this point. How should I put these together so that I can include the parsing at the end of the api call part and make sure that both of them get iterated over the full list of addresses/zips? My code right now isn't in any particular order, so feel free to move things around if you decide to tackle this, and if anyone needs additional information, I'm happy to clarify!

Thanks very much in advance.


回答1:


This is a semi simplified answer to your question(I mean I removed some of the information that api provides, such as high low estimates but you can add them back it following the logic for the other provided info in the code below). First loading in the required packages:

ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg)) 
install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
#install and load the following packages 
packages <- c("ZillowR", "rjson", "httr", "XML")
ipak(packages)

Next set your Zillow API Key:

set_zillow_web_service_id('API KEY')

Next defining a dataset with only addresses, and then defining and adding columns of street address, and zip code to it, so we can use them later with Zillow api:

df <-data.frame("Address" = c("5435 Andrea Boulevard, Sacramento, CA 95842","8434 Walerga Road Apt 421, Antelope, CA 95843"
       ,"7152 Flanders Way, Sacramento, CA 95842","1076 Edmonton Drive, Sacramento, CA 95833"    
       ,"9906 Burline Street, Sacramento, CA 95827","3634 Sapphire Drive Apt 1, Auburn, CA 95602"),stringsAsFactors = FALSE)
df$StreetCity<-gsub("(.*),.*", "\\1", df$Address)
df$Street<-gsub("(.*),.*", "\\1", df$StreetCity)
df$zip<-substr(df$Address,nchar(df$Address)-4,nchar(df$Address))
df
#                                         Address                          StreetCity                    Street   zip
# 1   5435 Andrea Boulevard, Sacramento, CA 95842   5435 Andrea Boulevard, Sacramento     5435 Andrea Boulevard 95842
# 2 8434 Walerga Road Apt 421, Antelope, CA 95843 8434 Walerga Road Apt 421, Antelope 8434 Walerga Road Apt 421 95843
# 3       7152 Flanders Way, Sacramento, CA 95842       7152 Flanders Way, Sacramento         7152 Flanders Way 95842
# 4     1076 Edmonton Drive, Sacramento, CA 95833     1076 Edmonton Drive, Sacramento       1076 Edmonton Drive 95833
# 5     9906 Burline Street, Sacramento, CA 95827     9906 Burline Street, Sacramento       9906 Burline Street 95827
# 6   3634 Sapphire Drive Apt 1, Auburn, CA 95602   3634 Sapphire Drive Apt 1, Auburn 3634 Sapphire Drive Apt 1 95602

Now lets hit the API:

getValRange <- function(x, hilo) {
  ifelse(hilo %in% unlist(dimnames(x)), x["text",hilo][[1]], NA)
}

Out_df_All <-NULL
Out_df <-NULL

for (i in 1:nrow(df)){
  print(i)
  xml =GetSearchResults(address = as.character(df[i,]$Street), citystatezip = as.character(df[i,]$zip),rentzestimate = TRUE)
  if (!is.null(xml$response[["results"]])){
    results <- xmlToList(xml$response[["results"]])
    if (is.matrix(results)){
      out <- apply(results, MAR=2, function(property) {
        zpid <- property$zpid
        links <- unlist(property$links)["mapthishome"]
        address <- unlist(property$address)
        z <-property$zestimate
        zestdf <- list(
          zestimate=ifelse("text" %in% names(z$amount), z$amount$text, NA)
        ) 

        rz <- property$rentzestimate
        rentalzestdf <- list(
          rentzestimate=ifelse("text" %in% names(rz$amount), rz$amount$text, NA)
        ) 
        list(zpid=zpid, links, address, zestdf, rentalzestdf)
      }


      )
      data <- as.data.frame(do.call(rbind, lapply(out, unlist)[1]),row.names=seq_len(length(out)))
      data[is.na(data)] <- 0
      Out_df<-data.frame("Address"=df[i,"Address"],data)
      Out_df_All<-rbind(Out_df,Out_df_All)
    }

    else {
      data<-as.data.frame(do.call(rbind, lapply(xml$response[["results"]][1], unlist)[1]),row.names=seq_len(length(xml$response[["results"]][1])))
      data<-data[which(grepl("text.value",colnames(data)))]
      colnames(data)<-gsub("children.","",colnames(data))
      colnames(data)<-gsub(".text.value","",colnames(data))
      colnames(data)<-gsub("address.","",colnames(data))
      colnames(data)<-gsub("links.","",colnames(data))
      colnames(data)<-gsub(".amount","",colnames(data))
      data <- data[,c("zpid","mapthishome","street","zipcode","city","state","latitude","longitude")]
      data$zestimate <- ifelse("zestimate" %in% colnames(data), data$zestimate, NA)
      data$rentzestimate <- ifelse("rentzestimate" %in% colnames(data), data$rentzestimate, NA)
      data[is.na(data)] <- 0
      Out_df<-data.frame("Address"=df[i,"Address"],data)
      Out_df_All<-rbind(Out_df,Out_df_All)

    }
  }
}
View(Out_df_All)



来源:https://stackoverflow.com/questions/48456449/how-do-i-iterate-a-zillow-api-call-output-parse-function-in-r-over-a-list-of-a

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!