Validate XML using XSD, a Catalog Resolver, and JAXP DOM for XSLT

前端 未结 1 591
遥遥无期
遥遥无期 2020-12-28 21:48

Background

Using JDK 6 to load XML files into DOM. The XML files must be validated against an XSD. The XSD file location differs depending on the running environme

相关标签:
1条回答
  • 2020-12-28 22:38

    Source Files

    The source files include a catalog manager properties file, Java source code, catalog file, XML data, XSL files, and XSD files. All files are relative to the current working directory (./).

    Catalog Manager Properties File

    This properties file is read by the CatalogResolver class; save as ./CatalogManager.properties:

    catalogs=catalog.xml
    relative-catalogs=yes
    verbosity=99
    prefer=system
    static-catalog=yes
    allow-oasis-xml-catalog-pi=yes
    

    TestXSD.java

    This is the main application; save it as ./src/TestXSD.java:

    package src;
    
    import java.io.*;
    import java.net.URI;
    import java.util.*;
    import java.util.regex.Pattern;
    import java.util.regex.Matcher;
    
    import javax.xml.parsers.*;
    import javax.xml.xpath.*;
    import javax.xml.XMLConstants;
    
    import org.w3c.dom.*;
    import org.xml.sax.*;
    
    import org.apache.xml.resolver.tools.CatalogResolver;
    import org.apache.xerces.util.XMLCatalogResolver;
    import static org.apache.xerces.jaxp.JAXPConstants.JAXP_SCHEMA_LANGUAGE;
    import static org.apache.xerces.jaxp.JAXPConstants.W3C_XML_SCHEMA;
    
    import javax.xml.validation.SchemaFactory;
    import javax.xml.validation.Schema;
    import javax.xml.validation.Validator;
    
    import javax.xml.transform.Result;
    import javax.xml.transform.Source;
    import javax.xml.transform.Transformer;
    import javax.xml.transform.TransformerFactory;
    
    import javax.xml.transform.dom.DOMSource;
    import javax.xml.transform.sax.SAXSource;
    
    import javax.xml.transform.stream.StreamResult;
    import javax.xml.transform.stream.StreamSource;
    
    /**
     * Download http://xerces.apache.org/xml-commons/components/resolver/CatalogManager.properties
     */
    public class TestXSD {
      private final static String ENTITY_RESOLVER =
        "http://apache.org/xml/properties/internal/entity-resolver";
    
      /**
       * This program reads an XML file, performs validation, reads an XSL
       * file, transforms the input XML, and then writes the transformed document
       * to standard output.
       *
       * args[0] - The XSL file used to transform the XML file
       * args[1] - The XML file to transform using the XSL file
       */
      public static void main( String args[] ) throws Exception {
        // For validation error messages.
        ErrorHandler errorHandler = new DocumentErrorHandler(); 
    
        // Read the CatalogManager.properties file.
        CatalogResolver resolver = new CatalogResolver();
        XMLCatalogResolver xmlResolver = createXMLCatalogResolver( resolver );
    
        logDebug( "READ XML INPUT SOURCE" );
        // Load an XML document in preparation to transform it.
        InputSource xmlInput = new InputSource( new InputStreamReader(
          new FileInputStream( args[1] ) ) );
    
        DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
        dbFactory.setAttribute( JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA );
        dbFactory.setNamespaceAware( true );
    
        DocumentBuilder builder = dbFactory.newDocumentBuilder();
        builder.setEntityResolver( xmlResolver );
        builder.setErrorHandler( errorHandler );
    
        logDebug( "PARSE XML INTO DOCUMENT MODEL" );
        Document xmlDocument = builder.parse( xmlInput );
    
        logDebug( "CONVERT XML DOCUMENT MODEL INTO DOMSOURCE" );
        DOMSource xml = new DOMSource( xmlDocument );
    
        logDebug( "GET XML SCHEMA DEFINITION" );
        String schemaURI = getSchemaURI( xmlDocument );
    
        logDebug( "SCHEMA URI: " + schemaURI );
    
        if( schemaURI != null ) {
          logDebug( "CREATE SCHEMA FACTORY" );
          // Create a Schema factory to obtain a Schema for XML validation...
          SchemaFactory sFactory = SchemaFactory.newInstance( W3C_XML_SCHEMA );
          sFactory.setResourceResolver( xmlResolver );
    
          logDebug( "CREATE XSD INPUT SOURCE" );
          String xsdFileURI = xmlResolver.resolveURI( schemaURI );
    
          logDebug( "CREATE INPUT SOURCE XSD FROM: " + xsdFileURI );
          InputSource xsd = new InputSource(
            new FileInputStream( new File( new URI( xsdFileURI ) ) ) );
    
          logDebug( "CREATE SCHEMA OBJECT FOR XSD" );
          Schema schema = sFactory.newSchema( new SAXSource( xsd ) );
    
          logDebug( "CREATE VALIDATOR FOR SCHEMA" );
          Validator validator = schema.newValidator();
    
          logDebug( "VALIDATE XML AGAINST XSD" );
          validator.validate( xml );
        }
    
        logDebug( "READ XSL INPUT SOURCE" );
        // Load an XSL template for transforming XML documents.
        InputSource xslInput = new InputSource( new InputStreamReader(
          new FileInputStream( args[0] ) ) );
    
        logDebug( "PARSE XSL INTO DOCUMENT MODEL" );
        Document xslDocument = builder.parse( xslInput );
    
        transform( xmlDocument, xslDocument, resolver );
        System.out.println();
      }
    
      private static void transform(
        Document xml, Document xsl, CatalogResolver resolver ) throws Exception
      {
        if( versionAtLeast( xsl, 2 ) ) {
          useXSLT2Transformer();
        }
    
        logDebug( "CREATE TRANSFORMER FACTORY" );
        // Create the transformer used for the document.
        TransformerFactory tFactory = TransformerFactory.newInstance();
        tFactory.setURIResolver( resolver );
    
        logDebug( "CREATE TRANSFORMER FROM XSL" );
        Transformer transformer = tFactory.newTransformer( new DOMSource( xsl ) );
    
        logDebug( "CREATE RESULT OUTPUT STREAM" );
        // This enables writing the results to standard output.
        Result out = new StreamResult( new OutputStreamWriter( System.out ) );
    
        logDebug( "TRANSFORM THE XML AND WRITE TO STDOUT" );
        // Transform the document using a given stylesheet.
        transformer.transform( new DOMSource( xml ), out );
      }
    
      /**
       * Answers whether the given XSL document version is greater than or
       * equal to the given required version number.
       *
       * @param xsl The XSL document to check for version compatibility.
       * @param version The version number to compare against.
       *
       * @return true iff the XSL document version is greater than or equal
       * to the version parameter.
       */
      private static boolean versionAtLeast( Document xsl, float version ) {
        Element root = xsl.getDocumentElement();
        float docVersion = Float.parseFloat( root.getAttribute( "version" ) );
    
        return docVersion >= version;
      }
    
      /**
       * Enables Saxon9's XSLT2 transformer for XSLT2 files.
       */
      private static void useXSLT2Transformer() {
        System.setProperty("javax.xml.transform.TransformerFactory",
          "net.sf.saxon.TransformerFactoryImpl");
      }
    
      /**
       * Creates an XMLCatalogResolver based on the file names found in
       * the given CatalogResolver. The resulting XMLCatalogResolver will
       * contain the absolute path to all the files known to the given
       * CatalogResolver.
       *
       * @param resolver The CatalogResolver to examine for catalog file names.
       * @return An XMLCatalogResolver instance with the same number of catalog
       * files as found in the given CatalogResolver.
       */
      private static XMLCatalogResolver createXMLCatalogResolver(
        CatalogResolver resolver ) {
        int index = 0;
        List files = resolver.getCatalog().getCatalogManager().getCatalogFiles();
        String catalogs[] = new String[ files.size() ];
        XMLCatalogResolver xmlResolver = new XMLCatalogResolver();
    
        for( Object file : files ) {
          catalogs[ index ] = (new File( file.toString() )).getAbsolutePath();
          index++;
        }
    
        xmlResolver.setCatalogList( catalogs );
    
        return xmlResolver;
      }
    
      private static String[] parseNameValue( String nv ) {
        Pattern p = Pattern.compile( "\\s*(\\w+)=\"([^\"]*)\"\\s*" );
        Matcher m = p.matcher( nv );
        String result[] = new String[2];
    
        if( m.find() ) {
          result[0] = m.group(1);
          result[1] = m.group(2);
        }
    
        return result;
      }
    
      /**
       * Retrieves the XML schema definition using an XSD.
       *
       * @param node The document (or child node) to traverse seeking processing
       * instruction nodes.
       * @return null if no XSD is present in the XML document.
       * @throws IOException Never thrown (uses StringReader).
       */
      private static String getSchemaURI( Node node ) throws IOException {
        String result = null;
    
        if( node.getNodeType() == Node.PROCESSING_INSTRUCTION_NODE ) {
          ProcessingInstruction pi = (ProcessingInstruction)node;
    
          logDebug( "NODE IS PROCESSING INSTRUCTION" );
    
          if( "xml-model".equals( pi.getNodeName() ) ) {
            logDebug( "PI IS XML MODEL" );
    
            // Hack to get the attributes.
            String data = pi.getData();
    
            if( data != null ) {
              final String attributes[] = pi.getData().trim().split( "\\s+" );
    
              String type = parseNameValue( attributes[0] )[1];
              String href = parseNameValue( attributes[1] )[1];
    
              // TODO: Schema should = http://www.w3.org/2001/XMLSchema
              //String schema = attributes.getNamedItem( "schematypens" );
    
              if( "application/xml".equalsIgnoreCase( type ) && href != null ) {
                result = href;
              }
            }
          }
        }
        else {
          // Try to get the schema type information.
          NamedNodeMap attrs = node.getAttributes();
    
          if( attrs != null ) {
            // TypeInfo.toString() returns values of the form:
            // schemaLocation="uri schemaURI"
            // The following loop extracts the schema URI.
            for( int i = 0; i < attrs.getLength(); i++ ) {
              Attr attribute = (Attr)attrs.item( i );
              TypeInfo typeInfo = attribute.getSchemaTypeInfo();
              String attr[] = parseNameValue( typeInfo.toString() );
    
              if( "schemaLocation".equalsIgnoreCase( attr[0] ) ) {
                result = attr[1].split( "\\s" )[1];
                break;
              }
            }
          }
    
          // Look deeper for the schema URI.
          if( result == null ) {
            NodeList list = node.getChildNodes();
    
            for( int i = 0; i < list.getLength(); i++ ) {
              result = getSchemaURI( list.item( i ) );
    
              if( result != null ) {
                break;
              }
            }
          }
        }
    
        return result;
      }
    
      /**
       * Writes a message to standard output.
       */
      private static void logDebug( String s ) {
        System.out.println( s );
      }
    }
    

    Error Handler

    This is the code for human-friendly error messages; save as ./src/DocumentErrorHandler.java:

    package src;
    
    import java.io.PrintStream;
    
    import org.xml.sax.ErrorHandler;
    import org.xml.sax.SAXParseException;
    import org.xml.sax.SAXException;
    
    /**
     * Handles error messages during parsing and validating XML documents.
     */
    public class DocumentErrorHandler implements ErrorHandler {
      private final static PrintStream OUTSTREAM = System.err;
    
      private void log( String type, SAXParseException e ) {
        OUTSTREAM.println( "SAX PARSE EXCEPTION " + type );
        OUTSTREAM.println( "  Public ID: " + e.getPublicId() );
        OUTSTREAM.println( "  System ID: " + e.getSystemId() );
        OUTSTREAM.println( "  Line     : " + e.getLineNumber() );
        OUTSTREAM.println( "  Column   : " + e.getColumnNumber() );
        OUTSTREAM.println( "  Message  : " + e.getMessage() );
      }
    
      @Override
      public void error( SAXParseException e ) throws SAXException {
        log( "ERROR", e );
      }
    
      @Override
      public void fatalError( SAXParseException e ) throws SAXException {
        log( "FATAL ERROR", e );
      }
    
      @Override
      public void warning( SAXParseException e ) throws SAXException {
        log( "WARNING", e );
      }
    }
    

    Catalog File

    Save as ./catalog.xml:

    <?xml version="1.0" encoding="UTF-8"?>
    <!DOCTYPE catalog PUBLIC "-//OASIS//DTD XML Catalogs V1.1//EN" "http://www.oasis-open.org/committees/entity/release/1.1/catalog.dtd">
    <catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">
        <!-- XSDs linked through primary catalog -->
        <!-- catalog entry for good-note1.xml -->
        <rewriteSystem 
            systemIdStartString="http://stackoverflow.com/schema" 
            rewritePrefix="./ArbitraryFolder/schemas"
        />
    
        <!-- catalog entry for good-note2.xml, good-note3.xml, bad-note1.xml, bad-note2.xml -->
        <rewriteURI 
            uriStartString="http://stackoverflow.com/2014/09/xsd" 
            rewritePrefix="./ArbitraryFolder/schemas"
        />
    
        <!-- add a second catalog as a further test:
             XSL will be resolved through it -->
        <nextCatalog 
            catalog="./ArbitraryFolder/catalog.xml"
        />
    </catalog>
    

    XML Data

    The different test cases include XSDs referenced in either processing instructions or root nodes.

    Schema: Processing Instruction

    The schema can be provided using an xml-model processing instruction (PI). Save as ./Tests/good-notes2.xml:

    <?xml version="1.0" encoding="UTF-8"?>
    <!-- Associating Schemas with XML documents: http://www.w3.org/TR/xml-model/ -->
    <?xml-model type="application/xml" href="http://stackoverflow.com/2014/09/xsd/notes/notes.xsd"?>
    <note>
        <title>Shopping List</title>
        <date>2014-08-30</date>
        <body>headlight fluid, flamgrabblit, exhaust coil</body>
    </note>
    

    Schema: Root Node

    The schema can be provided in attributes of the document's root node. Save as ./Tests/good-notes3.xml:

    <?xml version="1.0" encoding="UTF-8"?>
    <!-- XML Schema Part 1: Structures: 
         Schema-Related Markup in Documents Being Validated: 
         http://www.w3.org/TR/xmlschema-1/#Instance_Document_Constructions -->
    <note 
        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="http://stackoverflow.com http://stackoverflow.com/2014/09/xsd/notes/notes.xsd">
        <title>Shopping List</title>
        <date>2014-08-30</date>
        <body>Eggs, Milk, Carrots</body>
    </note>
    

    Fail Validation

    The following should fail validation (date needs hyphens); save as ./Tests/bad-note1.xml:

    <?xml version="1.0" encoding="UTF-8"?>
    <!-- Associating Schemas with XML documents: http://www.w3.org/TR/xml-model/ -->
    <?xml-model type="application/xml" href="http://stackoverflow.com/2014/09/xsd/notes/notes.xsd"?>
    <!-- FAILS SCHEMA: date is not valid; should use hyphens -->
    <note>
        <title>Shopping List</title>
        <date>20140830</date>
        <body>headlight fluid, flamgrabblit, exhaust coil</body>
    </note>
    

    Transformation

    Save this as ./Tests/note-to-html.xsl:

    <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        exclude-result-prefixes="xs"
        version="2.0">
        <!-- is in the second catalog (../ArbitraryFolder/catalog.xml) -->
        <xsl:import href="http://stackoverflow.com/2014/09/xsl/notes/notes.xsl"/>
    </xsl:stylesheet>
    

    Arbitrary Folder

    The arbitrary folder represents the path to files on a computer that can be located anywhere on the file system. The location of these files could differ, for example, between production, development, and the repository.

    Catalog

    Save this file as ./ArbitraryFolder/catalog.xml:

    <?xml version="1.0" encoding="UTF-8"?>
    <!DOCTYPE catalog PUBLIC "-//OASIS//DTD XML Catalogs V1.1//EN" "http://www.oasis-open.org/committees/entity/release/1.1/catalog.dtd">
    <catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">
    
        <!-- catalog entry for all notes -->
        <rewriteURI 
            uriStartString="http://stackoverflow.com/2014/09/xsl/" 
            rewritePrefix="./XSL/"/>
    
    </catalog>
    

    Notes

    There are two files in this example for transforming the notes: notes.xsl and note-body.xsl. The first includes the second.

    Notes Stylesheet

    Save this as ./ArbitraryFolder/XSL/notes/notes.xsl:

    <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        exclude-result-prefixes="xs"
        version="2.0">
    
        <!-- will not be in catalog (though it could be): 
             by convention, absolute path is assumed to be part of static file structure -->
        <xsl:import href="note-body.xsl"/>
    
        <xsl:template match="/">
            <html>
                <head>
                    <title>A Note</title>
                </head>
                <body>
                    <xsl:apply-templates/>
                </body>
            </html>
        </xsl:template>
        <xsl:template match="note">
            <div>
                <xsl:apply-templates select="title, date, body"/>
            </div>
        </xsl:template>
        <xsl:template match="title">
            <h1><xsl:value-of select="."/></h1>
        </xsl:template>
        <xsl:template match="date">
            <p class="date"><xsl:value-of select="."/></p>
        </xsl:template>
    </xsl:stylesheet>
    

    Note Body Stylesheet

    Save this as ./ArbitraryFolder/XSL/notes/note-body.xsl:

    <?xml version="1.0" encoding="UTF-8"?>
    <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
        xmlns:xs="http://www.w3.org/2001/XMLSchema"
        exclude-result-prefixes="xs"
        version="2.0">
    
        <xsl:template match="body">
            <p class="notebody"><xsl:value-of select="."/></p>
        </xsl:template>
    
    </xsl:stylesheet>
    

    Schema

    The last file required is the schema; save this as ./schemas/notes/notes.xsd:

    <?xml version="1.0" encoding="UTF-8"?>
    <xs:schema elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema">
        <xs:element name="note">
            <xs:complexType>
                <xs:sequence>
                    <xs:element name="title" type="xs:token"/>
                    <xs:element name="date" type="xs:date"/>
                    <xs:element name="body" type="xs:string"/>
                </xs:sequence>
            </xs:complexType>
        </xs:element>
    </xs:schema>
    

    Building

    This section details how to build the test application.

    Libraries

    You will need Saxon 9 (for XSLT2.0 documents), Xerces, Xalan, and the Resolver API:

    jaxen-1.1.6.jar
    resolver.jar
    saxon9he.jar
    serializer.jar
    xalan.jar
    xercesImpl.jar
    xml-apis.jar
    xsltc.jar
    

    Scripts

    Save as ./build.sh:

    #!/bin/bash
    javac -d bin -cp .:lib/* src/TestXSD.java
    

    Save as ./run.sh:

    #!/bin/bash
    java -cp .:bin:lib/* src.TestXSD Tests/note-to-html.xsl $1
    

    Compile

    Use the ./build.sh to compile the code.

    Run Output

    Run using:

    ./run.sh filename.xml
    

    Good Test

    Test that the good note passes validation:

    ./run.sh Tests/good-note2.xml
    

    No errors.

    Bad Test

    Test that the bad note's date does not pass validation:

    ./run.sh Tests/bad-note1.xml
    

    As expected, this produces the desired error:

    Exception in thread "main" org.xml.sax.SAXParseException; cvc-datatype-valid.1.2.1: '20140830' is not a valid value for 'date'.
        at org.apache.xerces.util.ErrorHandlerWrapper.createSAXParseException(Unknown Source)
        at org.apache.xerces.util.ErrorHandlerWrapper.error(Unknown Source)
        at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
        at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
        at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
        at org.apache.xerces.impl.xs.XMLSchemaValidator$XSIErrorReporter.reportError(Unknown Source)
        at org.apache.xerces.impl.xs.XMLSchemaValidator.reportSchemaError(Unknown Source)
        at org.apache.xerces.impl.xs.XMLSchemaValidator.elementLocallyValidType(Unknown Source)
        at org.apache.xerces.impl.xs.XMLSchemaValidator.processElementContent(Unknown Source)
        at org.apache.xerces.impl.xs.XMLSchemaValidator.handleEndElement(Unknown Source)
        at org.apache.xerces.impl.xs.XMLSchemaValidator.endElement(Unknown Source)
        at org.apache.xerces.jaxp.validation.DOMValidatorHelper.finishNode(Unknown Source)
        at org.apache.xerces.jaxp.validation.DOMValidatorHelper.validate(Unknown Source)
        at org.apache.xerces.jaxp.validation.DOMValidatorHelper.validate(Unknown Source)
        at org.apache.xerces.jaxp.validation.ValidatorImpl.validate(Unknown Source)
        at javax.xml.validation.Validator.validate(Validator.java:124)
        at src.TestXSD.main(TestXSD.java:103)
    
    0 讨论(0)
提交回复
热议问题