I have a 15 GB XML file which I would want to split it .It has approximately 300 Million lines in it . It doesn\'t have any top nodes which are interdependent .Is there any
Used this for splitting Yahoo Q&A dataset
count = 0
file_count = 1
with open('filepath') as f:
current_file = ""
for line in f:
current_file = current_file + line
if "" in line:
count = count + 1
if count==50000:
current_file = current_file + ""
with open('filepath/Split/file_' +str(file_count)+'.xml' , 'w') as split:
split.write(current_file)
file_count = file_count + 1
current_file = "\n"
count = 0
current_file = current_file + " "
with open('filepath/Split/file_' +str(file_count)+'.xml' , 'w') as split:
split.write(current_file)