According to the lxml documentation \"The DTD is retrieved automatically based on the DOCTYPE of the parsed document. All you have to do is use a parser that has DTD validat
You could extract the schemas yourself and import them into a root schema:
from lxml import etree
XSI = "http://www.w3.org/2001/XMLSchema-instance"
XS = '{http://www.w3.org/2001/XMLSchema}'
SCHEMA_TEMPLATE = """
"""
def validate_XML(xml):
"""Validate an XML file represented as string. Follow all schemaLocations.
:param xml: XML represented as string.
:type xml: str
"""
tree = etree.XML(xml)
schema_tree = etree.XML(SCHEMA_TEMPLATE)
# Find all unique instances of 'xsi:schemaLocation=" ..."'
schema_locations = set(tree.xpath("//*/@xsi:schemaLocation", namespaces={'xsi': XSI}))
for schema_location in schema_locations:
# Split namespaces and schema locations ; use strip to remove leading
# and trailing whitespace.
namespaces_locations = schema_location.strip().split()
# Import all found namspace/schema location pairs
for namespace, location in zip(*[iter(namespaces_locations)] * 2):
xs_import = etree.Element(XS + "import")
xs_import.attrib['namespace'] = namespace
xs_import.attrib['schemaLocation'] = location
schema_tree.append(xs_import)
# Contstruct the schema
schema = etree.XMLSchema(schema_tree)
# Validate!
schema.assertValid(tree)
BTW, your simpletest.xsd is missing the targetNamespace.
With the code above, your example document validates against this schema.