How do I split a large xml file?

后端 未结 7 2058
小鲜肉
小鲜肉 2021-01-02 14:05

We export “records” to an xml file; one of our customers has complained that the file is too big for their other system to process. Therefore I need to split up the file,

7条回答
  •  星月不相逢
    2021-01-02 14:28

    Using Ultraedit based on https://www.ultraedit.com/forums/viewtopic.php?f=52&t=6704

    All I added was some XML header and footer bits The first and last file need to be manually fixed (or remove the root element from your source).

        // from https://www.ultraedit.com/forums/viewtopic.php?f=52&t=6704 
    
    var FoundsPerFile = 200;      // Global setting for number of found split strings per file.
    var SplitString = "";  // String where to split. The split occurs after next character.
    var xmlHead = '';
    var xmlRootStart = '';
    var xmlRootEnd = '';
    
    /* Find the tab index of the active document */
    // Copied from http://www.ultraedit.com/forums/viewtopic.php?t=4571
    function getActiveDocumentIndex () {
       var tabindex = -1; /* start value */
    
       for (var i = 0; i < UltraEdit.document.length; i++)
       {
          if (UltraEdit.activeDocument.path==UltraEdit.document[i].path) {
             tabindex = i;
             break;
          }
       }
       return tabindex;
    }
    
    if (UltraEdit.document.length) { // Is any file open?
       // Set working environment required for this job.
       UltraEdit.insertMode();
       UltraEdit.columnModeOff();
       UltraEdit.activeDocument.hexOff();
       UltraEdit.ueReOn();
    
       // Move cursor to top of active file and run the initial search.
       UltraEdit.activeDocument.top();
       UltraEdit.activeDocument.findReplace.searchDown=true;
       UltraEdit.activeDocument.findReplace.matchCase=true;
       UltraEdit.activeDocument.findReplace.matchWord=false;
       UltraEdit.activeDocument.findReplace.regExp=false;
       // If the string to split is not found in this file, do nothing.
       if (UltraEdit.activeDocument.findReplace.find(SplitString)) {
          // This file is probably the correct file for this script.
          var FileNumber = 1;    // Counts the number of saved files.
          var StringsFound = 1;  // Counts the number of found split strings.
          var NewFileIndex = UltraEdit.document.length;
          /* Get the path of the current file to save the new
             files in the same directory as the current file. */
          var SavePath = "";
          var LastBackSlash = UltraEdit.activeDocument.path.lastIndexOf("\\");
          if (LastBackSlash >= 0) {
             LastBackSlash++;
             SavePath = UltraEdit.activeDocument.path.substring(0,LastBackSlash);
          }
          /* Get active file index in case of more than 1 file is open and the
             current file does not get back the focus after closing the new files. */
          var FileToSplit = getActiveDocumentIndex();
          // Always use clipboard 9 for this script and not the Windows clipboard.
          UltraEdit.selectClipboard(9);
          // Split the file after every x found split strings until source file is empty.
          while (1) {
             while (StringsFound < FoundsPerFile) {
                if (UltraEdit.document[FileToSplit].findReplace.find(SplitString)) StringsFound++;
                else {
                   UltraEdit.document[FileToSplit].bottom();
                   break;
                }
             }
             // End the selection of the find command.
             UltraEdit.document[FileToSplit].endSelect();
             // Move the cursor right to include the next character and unselect the found string.
             UltraEdit.document[FileToSplit].key("RIGHT ARROW");
             // Select from this cursor position everything to top of the file.
             UltraEdit.document[FileToSplit].selectToTop();
             // Is the file not already empty?
             if (UltraEdit.document[FileToSplit].isSel()) {
                // Cut the selection and paste it into a new file.
                UltraEdit.document[FileToSplit].cut();
                UltraEdit.newFile();
                UltraEdit.document[NewFileIndex].setActive();
                UltraEdit.activeDocument.paste();
    
    
                /* Add line termination on the last line and remove automatically added indent
                   spaces/tabs if auto-indent is enabled if the last line is not already terminated. */
                if (UltraEdit.activeDocument.isColNumGt(1)) {
                   UltraEdit.activeDocument.insertLine();
                   if (UltraEdit.activeDocument.isColNumGt(1)) {
                      UltraEdit.activeDocument.deleteToStartOfLine();
                   }
                }
    
                // add headers and footers 
    
                UltraEdit.activeDocument.top();
                UltraEdit.activeDocument.write(xmlHead);
                            UltraEdit.activeDocument.write(xmlRootStart);
                UltraEdit.activeDocument.bottom();
                UltraEdit.activeDocument.write(xmlRootEnd);
                // Build the file name for this new file.
                var SaveFileName = SavePath + "LETTER";
                if (FileNumber < 10) SaveFileName += "0";
                SaveFileName += String(FileNumber) + ".raw.xml";
                // Save the new file and close it.
                UltraEdit.saveAs(SaveFileName);
                UltraEdit.closeFile(SaveFileName,2);
                FileNumber++;
                StringsFound = 0;
                /* Delete the line termination in the source file
                   if last found split string was at end of a line. */
                UltraEdit.document[FileToSplit].endSelect();
                UltraEdit.document[FileToSplit].key("END");
                if (UltraEdit.document[FileToSplit].isColNumGt(1)) {
                   UltraEdit.document[FileToSplit].top();
                } else {
                   UltraEdit.document[FileToSplit].deleteLine();
                }
             } else break;
                UltraEdit.outputWindow.write("Progress " + SaveFileName);
          }  // Loop executed until source file is empty!
    
          // Close source file without saving and re-open it.
          var NameOfFileToSplit = UltraEdit.document[FileToSplit].path;
          UltraEdit.closeFile(NameOfFileToSplit,2);
          /* The following code line could be commented if the source
             file is not needed anymore for further actions. */
          UltraEdit.open(NameOfFileToSplit);
    
          // Free memory and switch back to Windows clipboard.
          UltraEdit.clearClipboard();
          UltraEdit.selectClipboard(0);
       }
    }
    

提交回复
热议问题