I have xml like this:
Consider parsing meeting data by node index and expand it to the number of its child race elements, then column bind with race data:
doc <- xmlParse("/path/to/Source.xml")
# NUMBER OF MEETING NODES
mtg_num <- length(xpathSApply(doc, "//meeting"))
# DATAFRAME LIST OF EXPANDED MEETING ATTRS
meeting_list <- lapply(seq(mtg_num), function(i) {
races_num <- length(xpathSApply(doc, sprintf("//meeting[%s]/race", i)))
data.frame(
meeting_id = rep(xpathSApply(doc, sprintf("//meeting[%s]/@id", i)), races_num),
meeting_name = rep(xpathSApply(doc, sprintf("//meeting[%s]/@name", i)), races_num)
)
})
# COLUMN BIND MEETING NODES, RACE NODES, AND RACE ATTRS
final_df <- cbind(do.call(rbind, meeting_list),
xmlToDataFrame(nodes = getNodeSet(doc, "//meeting/race")),
XML:::xmlAttrsToDataFrame(getNodeSet(doc, "//meeting/race")))
Output
head(final_df)
# meeting_id meeting_name time date ampm title type distance group tipsAllowed predictorAllowed
# 1 195 Punchestown (IRE) 12:25 2018-01-13 pm Adare Manor Opportunity Handicap Chase C 2m4f Handicap 1 1
# 2 195 Punchestown (IRE) 1:00 2018-01-13 pm Total Event Rental (Kildare) Novice Chase (Grade 3) C 2m4f Grade 3 1 1
# 3 195 Punchestown (IRE) 1:35 2018-01-13 pm Connolly's RED MILLS Amateur National (Q.R.) Handicap Chase C 3m1f Handicap 1 1
# 4 195 Punchestown (IRE) 2:10 2018-01-13 pm Sky Bet Moscow Flyer Novice Hurdle (Grade 2) H 2m Grade 2 1 1
# 5 195 Punchestown (IRE) 2:45 2018-01-13 pm Sportinglife.com Maiden Hurdle H 2m 1 1
# 6 195 Punchestown (IRE) 3:20 2018-01-13 pm Leinster Leader Mares Handicap Hurdle H 2m4f40y Handicap 1 1
# bettingLink declaredRunners liveCommentary liveTab raceDescription tvText betOffers id perform_race_id perform_race_id_atr details_available race_status_code
# 1 1 10 1 1 Handicap Chase ATR 692415 1 R
# 2 1 7 1 1 Novice Chase Grade 3 ATR 692416 1 R
# 3 1 12 1 1 Handicap Chase ATR 692417 1 R
# 4 1 7 1 1 Novice Hurdle Grade 2 ATR 692418 1 R
# 5 1 17 1 1 Maiden Hurdle ATR 692419 1 R
# 6 1 8 1 1 Handicap Hurdle ATR 692420 1 R