When retrieving the h1 title using rvest, I sometimes run into 404 pages. This stop the process and returns this error.
Error in open.con
You can see this Question for explanation here
urls<-c(
"http://boingboing.net/2016/06/16/spam-king-sanford-wallace.html",
"http://boingboing.net/2016/06/16/omg-the-japanese-trump-commer.html",
"http://boingboing.net/2016/06/16/omar-mateen-posted-to-facebook.html",
"http://boingboing.net/2016/06/16/omar-mateen-posted-to-facffffdebook.html")
readUrl <- function(url) {
out <- tryCatch(
{
message("This is the 'try' part")
url %>% as.character() %>% read_html() %>% html_nodes('h1') %>% html_text()
},
error=function(cond) {
message(paste("URL does not seem to exist:", url))
message("Here's the original error message:")
message(cond)
return(NA)
}
}
)
return(out)
}
y <- lapply(urls, readUrl)