This is sample XML:
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <check> <val> <Samsung> <name value="galaxy" /> <name value="galaxy" /> <name value="galaxys" /> <id value="123" /> <id value="123" /> <cal>23</cal> <cal>23</cal> <name2 value="galaxy" /> </Samsung> <htc> <name value="galaxy" /> <name value="galaxy" /> <name value="galaxys" /> <id value="123" /> <id value="123" /> <name2 value="galaxy" /> </htc> </val> </check>
This is java code I wrote which finds the duplicate elements(including their attributes) and deletes them:
import java.io.File; import java.io.IOException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; public class RecursiveNodeCheck { /** * @param args */ public static String parent; public static void main(String[] args) { // TODO Auto-generated method stub String path="D:/vodafone/parse.xml"; try { File file = new File(path); DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance() .newDocumentBuilder(); Document doc = dBuilder.parse(file); System.out.println("Root element :" + doc.getDocumentElement().getNodeName()); parent=doc.getDocumentElement().getNodeName(); String name="//"+parent+"/*"; XPathExpression expr = XPathFactory.newInstance().newXPath() .compile(name); NodeList list = (NodeList) expr.evaluate(doc, XPathConstants.NODESET); recursive(doc,list,path); } catch (Exception e) { System.out.println(e.getMessage()); } } private static void recursive(Document doc,NodeList list,String path) throws XPathExpressionException, TransformerException, SAXException, ParserConfigurationException, IOException { // TODO Auto-generated method stub for (int count = 0; count < list.getLength(); count++) { Node tempNode = list.item(count); if(!(tempNode.getNodeName().equalsIgnoreCase(parent) )){ if(!tempNode.hasChildNodes()&& tempNode.getNodeType() == Node.ELEMENT_NODE){ Node head= tempNode.getParentNode(); Node current=head.getFirstChild(); String exp=null; while(current!=null ){ if (current.getNodeType() != Node.ELEMENT_NODE ){ current=current.getNextSibling(); }else{ if( current.hasAttributes() ){ String key = current.getAttributes().getNamedItem("value").getNodeValue(); String value=""; if(current.getNodeValue()!=null){ value = current.getNodeValue(); } exp= "//"+head.getNodeName()+"/"+current.getNodeName()+"[@value='"+key+"']"+value; DelElements(doc,exp,path); current = current.getNextSibling(); } else{ exp= "//"+head.getNodeName()+"/"+current.getNodeName()+"/text()"; DelElements(doc,exp,path); current = current.getNextSibling(); } } } }else{ recursive(doc,tempNode.getChildNodes(),path); }} if (tempNode.hasChildNodes()) { // loop again if has child nodes recursive(doc,tempNode.getChildNodes(),path); } System.out.println("Node Name =" + tempNode.getNodeName() + " [CLOSE]"); } } private static void DelElements(Document doc, String exp,String path) throws TransformerException, SAXException, ParserConfigurationException, IOException { // TODO Auto-generated method stub DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); NodeList nodes = null; try { doc = factory.newDocumentBuilder().parse(new File(path)); XPathExpression expr = XPathFactory.newInstance().newXPath() .compile(exp); nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET); } catch (XPathExpressionException e) { e.printStackTrace(); } for ( int i= nodes.getLength()-1;i>0; i--) { System.out.println("."); //progress indicator if (nodes.item(i).getNodeType() == Node.TEXT_NODE ){ nodes.item(i).getParentNode().removeChild( nodes.item(i)); } else{ Element el = (Element) ( nodes.item(i)); el.getParentNode().removeChild(el); } } TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); DOMSource source = new DOMSource(doc); StreamResult result = new StreamResult(new File(path)); transformer.transform(source, result); // TODO Auto-generated method stub } } }
This is the Output I got:
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <check> <val> <Samsung> <name value="galaxy"/> <name value="galaxys"/> <id value="123"/> <cal>23</cal> **<cal/>** <name2 value="galaxy"/> </Samsung> <htc> <name value="galaxy"/> <name value="galaxys"/> <id value="123"/> <name2 value="galaxy"/> </htc> </val> </check>
In the above output xml, <cal/>
tag remains. How should I get this deleted?