I have an XML file (a TEI-encoded play) that I want to process into a data.frame in R, where every row of the data.frame contains one line of the play, the line number, the
Added additional xpathApply for sp elements:
bodyToDF <- function(x){
scenenum <- xmlGetAttr(x, "n")
scenetype <- xmlGetAttr(x, "type")
sp <- xpathApply(x, 'sp', function(sp) {
who <- xmlGetAttr(sp, "who")
if(is.null(who))
who <- NA
line_num <- xpathSApply(sp, 'l', function(l) { xmlGetAttr(l,"n")})
linecontent = xpathSApply(sp, 'l', function(l) { xmlValue(l,"n")})
data.frame( scenenum, scenetype, who, line_num, linecontent)
})
do.call(rbind, sp)
}
res <- xpathApply(doc, '//div1', bodyToDF)
temp.df <- do.call(rbind, res)
First 4 columns:
# > temp.df[,1:4]
# scenenum scenetype who line_num
# 1 1 scene fau 30
# 2 1 scene fau 31
# 3 1 scene fau 32
# 4 1 scene eang 105
# 5 2 scene sch1 NA
# 6 2 scene sch2 NA
# 7 2 scene sch1 NA
# 8 2 scene wag NA