问题
I am parsing below SOAP response using xmltodict library.
<SOAP-ENV:Envelope xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/">
<SOAP-ENV:Header/>
<SOAP-ENV:Body>
<ns2:MultiAvailabilityResponse xmlns:ns2="http://www.derbysoft.com/doorway" Status="Successful" Token="187be58c62c2f2515b5d78ee">
<ns2:Availabilities>
<ns2:Availability CurrencyCode="GBP" HotelCode="HY-LONGE">
<ns2:GuestCount AdultCount="1" ChildCount="0"/>
<ns2:RoomTypes>
<ns2:RoomType RoomTypeCode="KING" RoomTypeName="Andaz King">
<ns2:RoomTypeDescription>A 29-square-metre room ,Modern furnishings include oversized work desk, plus bathroom with fast-fill tub and heated towel rail.</ns2:RoomTypeDescription>
</ns2:RoomType>
<ns2:RoomType RoomTypeCode="TWIN" RoomTypeName="Andaz Twin">
<ns2:RoomTypeDescription>A 29-square-metre room ,Modern furnishings include oversized work desk, plus bathroom with fast-fill tub and heated towel rail.</ns2:RoomTypeDescription>
</ns2:RoomType>
<ns2:RoomType RoomTypeCode="QUEN" RoomTypeName="Andaz Queen">
<ns2:RoomTypeDescription>A 26-square-metre room ,Modern furnishings include oversized work desk, plus bathroom with fast-fill tub and heated towel rail.</ns2:RoomTypeDescription>
</ns2:RoomType>
</ns2:RoomTypes>
<ns2:RatePlans>
<ns2:RatePlan RatePlanCode="49584WADPF2" RatePlanName="Advance Purchase">
<ns2:RatePlanDescription>Advance Purchase</ns2:RatePlanDescription>
</ns2:RatePlan>
<ns2:RatePlan RatePlanCode="49584WADPF" RatePlanName="Advance Purchase">
<ns2:RatePlanDescription>Advance Purchase</ns2:RatePlanDescription>
</ns2:RatePlan>
<ns2:RatePlan RatePlanCode="49584IPRTF" RatePlanName="Partner Rate">
<ns2:RatePlanDescription>Partner Rate</ns2:RatePlanDescription>
</ns2:RatePlan>
</ns2:RatePlans>
<ns2:RoomRates>
<ns2:RoomRate RatePlanCode="49584WADPF2" RoomTypeCode="KING">
<ns2:Rates>
<ns2:Rate AmountAfterTax="249.900" AmountBeforeTax="249.900" EffectiveDate="2016-05-05" ExpireDate="2016-05-06"/>
</ns2:Rates>
<ns2:Fees>
<ns2:Fee Amount="0.000" ChargeType="Tax" Type="Exclusive" Unit="PER_ROOM_PER_NIGHT"/>
</ns2:Fees>
</ns2:RoomRate>
<ns2:RoomRate RatePlanCode="49584WADPF2" RoomTypeCode="TWIN">
<ns2:Rates>
<ns2:Rate AmountAfterTax="249.900" AmountBeforeTax="249.900" EffectiveDate="2016-05-05" ExpireDate="2016-05-06"/>
</ns2:Rates>
<ns2:Fees>
<ns2:Fee Amount="0.000" ChargeType="Tax" Type="Exclusive" Unit="PER_ROOM_PER_NIGHT"/>
</ns2:Fees>
</ns2:RoomRate>
<ns2:RoomRate RatePlanCode="49584WADPF" RoomTypeCode="QUEN">
<ns2:Rates>
<ns2:Rate AmountAfterTax="249.900" AmountBeforeTax="249.900" EffectiveDate="2016-05-05" ExpireDate="2016-05-06"/>
</ns2:Rates>
<ns2:Fees>
<ns2:Fee Amount="0.000" ChargeType="Tax" Type="Exclusive" Unit="PER_ROOM_PER_NIGHT"/>
</ns2:Fees>
</ns2:RoomRate>
</ns2:RoomRates>
</ns2:Availability>
<ns2:Availability CurrencyCode="USD" HotelCode="HY-CHIRC">
<ns2:GuestCount AdultCount="1" ChildCount="0"/>
<ns2:RoomTypes>
<ns2:RoomType RoomTypeCode="JRSQ" RoomTypeName="JR SUITE 2 QUEEN BEDS">
<ns2:RoomTypeDescription>Rest in sublime comfort on one of two queen signature Hyatt Grand Beds®, fitted with fine linens, down blanket and plump pillows.</ns2:RoomTypeDescription>
</ns2:RoomType>
<ns2:RoomType RoomTypeCode="CLBD" RoomTypeName="REG CLUB 2 DOUBLE BEDS">
<ns2:RoomTypeDescription>one King or two double-sized Hyatt Grand Beds, fitted with luxurious linens, a down blanket and plush pillows</ns2:RoomTypeDescription>
</ns2:RoomType>
</ns2:RoomTypes>
<ns2:RatePlans>
<ns2:RatePlan RatePlanCode="49584IPRTF" RatePlanName="Partner Rate">
<ns2:RatePlanDescription>Partner Rate</ns2:RatePlanDescription>
<ns2:CancelPolicy NonRefundable="true">
<ns2:CancelPenalties/>
</ns2:CancelPolicy>
</ns2:RatePlan>
<ns2:RatePlan RatePlanCode="49584WPAWAF" RatePlanName="Bed and Breakfast">
<ns2:RatePlanDescription>Bed and Breakfast</ns2:RatePlanDescription>
<ns2:CancelPolicy NonRefundable="true">
<ns2:CancelPenalties/>
</ns2:CancelPolicy>
</ns2:RatePlan>
</ns2:RatePlans>
<ns2:RoomRates>
<ns2:RoomRate RatePlanCode="49584IPRTF" RoomTypeCode="JRSQ">
<ns2:Rates>
<ns2:Rate AmountAfterTax="543.134" AmountBeforeTax="466.650" EffectiveDate="2016-05-05" ExpireDate="2016-05-06"/>
</ns2:Rates>
<ns2:Fees>
<ns2:Fee ChargeType="Tax" Percent="16.390" Type="Exclusive"/>
</ns2:Fees>
</ns2:RoomRate>
<ns2:RoomRate RatePlanCode="49584IPRTF" RoomTypeCode="CLBD">
<ns2:Rates>
<ns2:Rate AmountAfterTax="370.004" AmountBeforeTax="317.900" EffectiveDate="2016-05-05" ExpireDate="2016-05-06"/>
</ns2:Rates>
<ns2:Fees>
<ns2:Fee ChargeType="Tax" Percent="16.390" Type="Exclusive"/>
</ns2:Fees>
</ns2:RoomRate>
<ns2:RoomRate RatePlanCode="49584IPRTF" RoomTypeCode="VW2Q">
<ns2:Rates>
<ns2:Rate AmountAfterTax="325.485" AmountBeforeTax="279.650" EffectiveDate="2016-05-05" ExpireDate="2016-05-06"/>
</ns2:Rates>
<ns2:Fees>
<ns2:Fee ChargeType="Tax" Percent="16.390" Type="Exclusive"/>
</ns2:Fees>
</ns2:RoomRate>
</ns2:RoomRates>
</ns2:Availability>
</ns2:Availabilities>
</ns2:MultiAvailabilityResponse>
</SOAP-ENV:Body>
Here is my code to get the required details from the SOAP response:
def listify(obj):
"""To convert each element of SOAP into a list, so processing of response would be easy."""
if isinstance(obj, list):
return obj
return [obj]
def search_hotels_formatted_response(soap):
"""Parse the response."""
soap = xmltodict.parse(soap, process_namespaces=True)
# Deal with namespaces
env = 'http://schemas.xmlsoap.org/soap/envelope/:'
doorway = 'http://www.derbysoft.com/doorway:'
availability = listify(
soap[env + 'Envelope']
[env + 'Body']
[doorway + 'MultiAvailabilityResponse']
[doorway + 'Availabilities']
[doorway + 'Availability'])
# Intermediate data structure to hold room names
names = {
roomtype['@RoomTypeCode']: roomtype['@RoomTypeName']
for _availability in availability
for roomtype in listify(_availability
[doorway + 'RoomTypes']
[doorway + 'RoomType'])
}
return_dict = {
'ibp': 'dbs',
'rL': sorted([
{
'rtc': roomrate['@RoomTypeCode'],
'rpc': roomrate['@RatePlanCode'],
'rtn': names[roomrate['@RoomTypeCode']],
'rmt': rate['@AmountBeforeTax'],
'cur': _availability['@CurrencyCode'],
'ttc': float(rate['@AmountAfterTax']) - float(rate['@AmountBeforeTax']),
'egc': 0,
'long': ''
}
for _availability in availability
for roomrate in listify(_availability
[doorway + 'RoomRates']
[doorway + 'RoomRate'])
for rate in listify(roomrate
[doorway + 'Rates']
[doorway + 'Rate'])],
key=lambda x: float(x['rmt'])),
'hc': _availability['@HotelCode'],
'hn': ''
}
return return_dict
I am able to get the required details from the SOAP message into the required format of Dictionary. But, I am getting problem in getting the details from this particular SOAP message shared above. In all other cases, I was just getting response of single hotel with different number of combinations in it, but in this SOAP response I've got two hotels and my parsing code is fetching the prices of rooms from both the hotels at the same time and sorting it. But, I want to do the sorting on first hotel options then on the secong hotel options. The number of hotels could vary, so try to provide generic solution which will work irrespective of the number of hotels.
Any help would be aprreciated. Thanks!
回答1:
Consider simplifying the XML using XSLT, the declarative programming language used specifically to transform XML documents. Python can run XSLT 1.0 using the lxml
module. From there parse the transformed tree into a dictionary with xmltodict
which can further be parsed into json
. All three outputs are included below:
XSLT Script (save as .xsl file, referenced below in .py script)
<xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/"
xmlns:ns2="http://www.derbysoft.com/doorway">
<xsl:output version="1.0" encoding="UTF-8" indent="yes" />
<xsl:strip-space elements="*"/>
<!-- Response Level -->
<xsl:template match="ns2:MultiAvailabilityResponse">
<Responses>
<xsl:apply-templates select="ns2:Availabilities"/>
</Responses>
</xsl:template>
<xsl:template match="ns2:Availabilities">
<xsl:apply-templates select="ns2:Availability">
<xsl:sort select="@CurrencyCode" order="descending"/>
</xsl:apply-templates>
</xsl:template>
<!-- Hotel Level -->
<xsl:template match="ns2:Availability">
<Hotel>
<xsl:apply-templates select="ns2:RoomRates"/>
</Hotel>
</xsl:template>
<!-- Room Rates Level -->
<xsl:template match="ns2:RoomRates">
<xsl:apply-templates select="ns2:RoomRate">
<xsl:sort select="descendant::ns2:Rate/@AmountBeforeTax"
order="ascending" data-type="number"/>
</xsl:apply-templates>
</xsl:template>
<!-- Room Rate Level -->
<xsl:template match="ns2:RoomRate">
<RoomRate>
<ibp>dbs</ibp>
<rL><xsl:apply-templates select="ns2:Rates"/></rL>
<hc><xsl:value-of select="ancestor::ns2:Availability/@HotelCode"/></hc>
<hn></hn>
</RoomRate>
</xsl:template>
<!-- Rates Level -->
<xsl:template match="ns2:Rates/*">
<xsl:variable name="rtc" select="ancestor::ns2:RoomRate/@RoomTypeCode"/>
<rtc><xsl:value-of select="$rtc"/></rtc>
<rpc><xsl:value-of select="ancestor::ns2:RoomRate/@RatePlanCode"/></rpc>
<rtn><xsl:value-of select="ancestor::ns2:Availability/ns2:RoomTypes/
ns2:RoomType[@RoomTypeCode=$rtc]/@RoomTypeName"/></rtn>
<rmt><xsl:value-of select="@AmountBeforeTax"/></rmt>
<cur><xsl:value-of select="ancestor::ns2:Availability/@CurrencyCode"/></cur>
<ttc><xsl:value-of select="@AmountAfterTax - @AmountBeforeTax"/></ttc>
<egc>0</egc>
<long></long>
</xsl:template>
</xsl:transform>
Python Script (outputs to xml, dict, and json types)
import lxml.etree as ET
import xmltodict
import json
# LOAD XML AND XSL
dom = ET.parse('soap.xml')
xslt = ET.parse('XSLTScript.xsl')
# TRANSFORM
transform = ET.XSLT(xslt)
newdom = transform(dom)
# NEW TREE OUTPUT
return_xml = ET.tostring(newdom, encoding='UTF-8', pretty_print=True, xml_declaration=True)
print(return_xml.decode("utf-8"))
return_dict = xmltodict.parse(return_xml)
print(return_dict)
return_json = json.dumps(return_dict, indent=4)
print(return_json)
Transformed XML Output
<?xml version='1.0' encoding='UTF-8'?>
<Responses xmlns:SOAP-ENV="http://schemas.xmlsoap.org/soap/envelope/" xmlns:ns2="http://www.derbysoft.com/doorway">
<Hotel>
<RoomRate>
<ibp>dbs</ibp>
<rL>
<rtc>VW2Q</rtc>
<rpc>49584IPRTF</rpc>
<rtn/>
<rmt>279.650</rmt>
<cur>USD</cur>
<ttc>45.835</ttc>
<egc>0</egc>
<long/>
</rL>
<hc>HY-CHIRC</hc>
<hn/>
</RoomRate>
<RoomRate>
<ibp>dbs</ibp>
<rL>
<rtc>CLBD</rtc>
<rpc>49584IPRTF</rpc>
<rtn>REG CLUB 2 DOUBLE BEDS</rtn>
<rmt>317.900</rmt>
<cur>USD</cur>
<ttc>52.104</ttc>
<egc>0</egc>
<long/>
</rL>
<hc>HY-CHIRC</hc>
<hn/>
</RoomRate>
<RoomRate>
<ibp>dbs</ibp>
<rL>
<rtc>JRSQ</rtc>
<rpc>49584IPRTF</rpc>
<rtn>JR SUITE 2 QUEEN BEDS</rtn>
<rmt>466.650</rmt>
<cur>USD</cur>
<ttc>76.484</ttc>
<egc>0</egc>
<long/>
</rL>
<hc>HY-CHIRC</hc>
<hn/>
</RoomRate>
</Hotel>
<Hotel>
<RoomRate>
<ibp>dbs</ibp>
<rL>
<rtc>KING</rtc>
<rpc>49584WADPF2</rpc>
<rtn>Andaz King</rtn>
<rmt>249.900</rmt>
<cur>GBP</cur>
<ttc>0</ttc>
<egc>0</egc>
<long/>
</rL>
<hc>HY-LONGE</hc>
<hn/>
</RoomRate>
<RoomRate>
<ibp>dbs</ibp>
<rL>
<rtc>TWIN</rtc>
<rpc>49584WADPF2</rpc>
<rtn>Andaz Twin</rtn>
<rmt>249.900</rmt>
<cur>GBP</cur>
<ttc>0</ttc>
<egc>0</egc>
<long/>
</rL>
<hc>HY-LONGE</hc>
<hn/>
</RoomRate>
<RoomRate>
<ibp>dbs</ibp>
<rL>
<rtc>QUEEN</rtc>
<rpc>49584WADPF</rpc>
<rtn>Andaz Queen</rtn>
<rmt>249.900</rmt>
<cur>GBP</cur>
<ttc>0</ttc>
<egc>0</egc>
<long/>
</rL>
<hc>HY-LONGE</hc>
<hn/>
</RoomRate>
</Hotel>
</Responses>
Python Dictionary
OrderedDict([('Responses', OrderedDict([('@xmlns:SOAP-ENV', 'http://schemas.xmlsoap.org/soap/envelope/'),
('@xmlns:ns2', 'http://www.derbysoft.com/doorway'), ('Hotel', [OrderedDict([('RoomRate', [OrderedDict([('ibp', 'dbs'),
('rL', OrderedDict([('rtc', 'VW2Q'), ('rpc', '49584IPRTF'), ('rtn', None), ('rmt', '279.650'), ('cur', 'USD'),
('ttc', '45.835'), ('egc', '0'), ('long', None)])), ('hc', 'HY-CHIRC'), ('hn', None)]), OrderedDict([('ibp', 'dbs'),
('rL', OrderedDict([('rtc', 'CLBD'), ('rpc', '49584IPRTF'), ('rtn', 'REG CLUB 2 DOUBLE BEDS'), ('rmt', '317.900'),
('cur', 'USD'), ('ttc', '52.104'), ('egc', '0'), ('long', None)])), ('hc', 'HY-CHIRC'), ('hn', None)]),
OrderedDict([('ibp', 'dbs'), ('rL', OrderedDict([('rtc', 'JRSQ'), ('rpc', '49584IPRTF'), ('rtn', 'JR SUITE 2 QUEEN BEDS'),
('rmt', '466.650'), ('cur', 'USD'), ('ttc', '76.484'), ('egc', '0'), ('long', None)])), ('hc', 'HY-CHIRC'),
('hn', None)])])]), OrderedDict([('RoomRate', [OrderedDict([('ibp', 'dbs'), ('rL', OrderedDict([('rtc', 'KING'),
('rpc', '49584WADPF2'), ('rtn', 'Andaz King'), ('rmt', '249.900'), ('cur', 'GBP'), ('ttc', '0'), ('egc', '0'),
('long', None)])), ('hc', 'HY-LONGE'), ('hn', None)]), OrderedDict([('ibp', 'dbs'), ('rL', OrderedDict([('rtc', 'TWIN'),
('rpc', '49584WADPF2'), ('rtn', 'Andaz Twin'), ('rmt', '249.900'), ('cur', 'GBP'), ('ttc', '0'), ('egc', '0'),
('long', None)])), ('hc', 'HY-LONGE'), ('hn', None)]), OrderedDict([('ibp', 'dbs'), ('rL', OrderedDict([('rtc', 'QUEEN'),
('rpc', '49584WADPF'), ('rtn', 'Andaz Queen'), ('rmt', '249.900'), ('cur', 'GBP'), ('ttc', '0'), ('egc', '0'),
('long', None)])), ('hc', 'HY-LONGE'), ('hn', None)])])])])]))])
JSON Output
{
"Responses": {
"@xmlns:SOAP-ENV": "http://schemas.xmlsoap.org/soap/envelope/",
"@xmlns:ns2": "http://www.derbysoft.com/doorway",
"Hotel": [
{
"RoomRate": [
{
"ibp": "dbs",
"rL": {
"rtc": "VW2Q",
"rpc": "49584IPRTF",
"rtn": null,
"rmt": "279.650",
"cur": "USD",
"ttc": "45.835",
"egc": "0",
"long": null
},
"hc": "HY-CHIRC",
"hn": null
},
{
"ibp": "dbs",
"rL": {
"rtc": "CLBD",
"rpc": "49584IPRTF",
"rtn": "REG CLUB 2 DOUBLE BEDS",
"rmt": "317.900",
"cur": "USD",
"ttc": "52.104",
"egc": "0",
"long": null
},
"hc": "HY-CHIRC",
"hn": null
},
{
"ibp": "dbs",
"rL": {
"rtc": "JRSQ",
"rpc": "49584IPRTF",
"rtn": "JR SUITE 2 QUEEN BEDS",
"rmt": "466.650",
"cur": "USD",
"ttc": "76.484",
"egc": "0",
"long": null
},
"hc": "HY-CHIRC",
"hn": null
}
]
},
{
"RoomRate": [
{
"ibp": "dbs",
"rL": {
"rtc": "KING",
"rpc": "49584WADPF2",
"rtn": "Andaz King",
"rmt": "249.900",
"cur": "GBP",
"ttc": "0",
"egc": "0",
"long": null
},
"hc": "HY-LONGE",
"hn": null
},
{
"ibp": "dbs",
"rL": {
"rtc": "TWIN",
"rpc": "49584WADPF2",
"rtn": "Andaz Twin",
"rmt": "249.900",
"cur": "GBP",
"ttc": "0",
"egc": "0",
"long": null
},
"hc": "HY-LONGE",
"hn": null
},
{
"ibp": "dbs",
"rL": {
"rtc": "QUEEN",
"rpc": "49584WADPF",
"rtn": "Andaz Queen",
"rmt": "249.900",
"cur": "GBP",
"ttc": "0",
"egc": "0",
"long": null
},
"hc": "HY-LONGE",
"hn": null
}
]
}
]
}
}
回答2:
You are creating list comprehension from all room rate results (though all the hotels), but hotel code is outside of a loop of any kind, on the same level as the list. So, you will always get only one (last) of the hotels.
In code:
'hc': _availability['@HotelCode'],
Variable _availability is the last occurence from the preceding loop "for _availability in availability". It the loop was 'normal' for block, it wouldn't even be defined there.
To my opinion, such complex data structures are very very difficult to read and use, maybe you should consider OOP approach with data in object's attributes.
If you insist on using such data structures, you can do it like this:
def search_hotels_formatted_response(soap):
"""Parse the response."""
soap = xmltodict.parse(soap, process_namespaces=True)
# Deal with namespaces
env = 'http://schemas.xmlsoap.org/soap/envelope/:'
doorway = 'http://www.derbysoft.com/doorway:'
availability = listify(
soap[env + 'Envelope']
[env + 'Body']
[doorway + 'MultiAvailabilityResponse']
[doorway + 'Availabilities']
[doorway + 'Availability'])
# Intermediate data structure to hold room names
names = {
roomtype['@RoomTypeCode']: roomtype['@RoomTypeName']
for _availability in availability
for roomtype in listify(_availability
[doorway + 'RoomTypes']
[doorway + 'RoomType'])
}
# Intermediate data structure to hold rates
rates = {
rate['@RoomTypeCode']: rate
for _availability in availability
for rate in listify(_availability
[doorway + 'RoomRates']
[doorway + 'RoomRate'])
}
return_list = []
for _availability in availability:
return_list.append({
'ibp': 'dbs',
'rL': sorted([
{
'rtc': rates[roomtype['@RoomTypeCode']]['@RoomTypeCode'],
'rpc': rates[roomtype['@RoomTypeCode']]['@RatePlanCode'],
'rtn': names[rates[roomtype['@RoomTypeCode']]['@RoomTypeCode']],
'rmt': rates[roomtype['@RoomTypeCode']][doorway + 'Rates'][doorway + 'Rate']['@AmountBeforeTax'],
'cur': _availability['@CurrencyCode'],
'ttc': float(rates[roomtype['@RoomTypeCode']][doorway + 'Rates'][doorway + 'Rate']['@AmountAfterTax']) - float(rates[roomtype['@RoomTypeCode']][doorway + 'Rates'][doorway + 'Rate']['@AmountBeforeTax']),
'egc': 0,
'long': '',
}
for roomtype in listify(_availability
[doorway + 'RoomTypes']
[doorway + 'RoomType'])],
key=lambda x: float(x['rmt'])),
'hc': _availability['@HotelCode'],
'hn': ''
})
return return_list
来源:https://stackoverflow.com/questions/37019605/fetch-the-details-from-soap-xml-response