evaluateXPath runs slow for repeating 1 XML Element in java

喜夏-厌秋 提交于 2020-06-13 09:16:51

问题


I have about 1,000,000 XML files and I am using XpathExpression with Java language to walk through the XML tags and get my considered data.

Imagine I have about 5000 tags for name, 5000 tags for family name, 5000 tags for age, and only 1 tag for date in each file. Now I want to repeat date tag to 5000 times too.

Blow code is runnable for XML files with Java programming with less than 20MB size, but I have files with more than 20MB size and it takes so many times to run and in some cases, I got Out of memory error in eclipse( I tried adding vmargs in the run configuration of Eclipse but it takes so much time and still so low.) I am pretty sure there is a problem with my array for repeting date tag and it is not optimized, I really appreciate if you mind and have a look at my code, in addition I should say that i am newbie to java:

package TEST;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
import java.io.BufferedWriter;
import java.io.FileWriter; 
import java.io.IOException; 
import java.io.PrintWriter;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import java.util.Arrays;

public class Data {

    //function started
     public static void main(String[] args) throws Exception 
        {
        //Get Files
            String doc ="MyFileNew";
            String dump="";
            int number =200;
            for (int i=1; i<=201; i++) {
               number ++;
               dump = doc+number;
               String fileName= "/root/MyFiles/" + dump + ".xml";
               Document document = getDocument(fileName);
;
               FileWriter fw = null; 
               BufferedWriter bw = null; 
               PrintWriter pw = null;


                //Using Document Builder
                DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
                documentBuilderFactory.setNamespaceAware(true);
                DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
                Document doc1 = documentBuilder.parse(fileName);


             /*******Get attribute values using xpath******/
            XPathFactory xpathFactory = XPathFactory.newInstance();
            XPath xpath = xpathFactory.newXPath();


            try {
                fw = new FileWriter("/root/Results/" + dump + ".txt");
                bw = new BufferedWriter(fw);
                pw = new PrintWriter(bw);


                //Printing Name tags
                pw.println( "Name"+ evaluateXPath(document, "/xml/item/item[@key='Name']/text()") );


                 //Counting Name tags
                XPathExpression expr1 = xpath.compile("count(/xml/item/item[@key='Name']/)");
                Number result1 = (Number) expr1.evaluate(doc1, XPathConstants.NUMBER);
                int n = result1.intValue(); 

                //Printing FamilyName tags
                pw.println( "FamilyName: " + evaluateXPath(document, "/xml/item/item[@key='FamilyName']/text() \n") );

                //Printing Age tags
                pw.println( "Age: " + evaluateXPath(document, "/xml/item/item[@key='Age']/text() \n") );

                //Repeating Date based on counting name tags

                String[] strArray = new String[0];
                for (int q=0; q<n;q++){
                List<String> strArraytmp = evaluateXPath(document,"/xml/item/item[@key='date']/text()");
                String[] strings = strArraytmp.stream().toArray(String[]::new);
                strArray= Stream.of(strArray, strings ).flatMap(Stream::of).toArray(String[]::new); 
                  }
                pw.println("date: " + Arrays.toString(strArray));

                System.out.println("this file goes to path:" + "/root/Results/Data/" + dump + ".txt");
                pw.flush();
        } 
        catch (IOException e) 
        { e.printStackTrace(); } } 

}
        private static List<String> evaluateXPath(Document document, String xpathExpression) throws Exception 
        {
            // Create XPathFactory object
            XPathFactory xpathFactory = XPathFactory.newInstance();

            // Create XPath object
            XPath xpath = xpathFactory.newXPath();

            List<String> values = new ArrayList<>();
            try
            {
                // Create XPathExpression object
                XPathExpression expr = xpath.compile(xpathExpression);

                // Evaluate expression result on XML document
                NodeList nodes = (NodeList) expr.evaluate(document, XPathConstants.NODESET);

                for (int i = 0; i < nodes.getLength(); i++) {
                    values.add(nodes.item(i).getNodeValue());
                }

            } catch (XPathExpressionException e) {
                e.printStackTrace();
            }

            return values;
        }


        private static Document getDocument(String fileName) throws Exception 
        {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            factory.setNamespaceAware(true);
            DocumentBuilder builder = factory.newDocumentBuilder();
            Document doc = builder.parse(fileName);
            return doc;
        }

        }

来源:https://stackoverflow.com/questions/62245567/evaluatexpath-runs-slow-for-repeating-1-xml-element-in-java

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!