Edit XML file text based on path

后端未结

关注

 3  452

I have an XML file (e.g. jerry.xml) which contains some data as given below.

2<


                      
              相关标签:


      
      
        
          3条回答        

        
                         				            
            
           
            
                              
                
              
              
                
                  灰色年华        
                
              
                            
                2020-12-19 02:44
              
            
            
                                                                       
I've altered your extractNumbers function and other code to generate a relative xpath based on the read in file.

import xml.etree.ElementTree as ET

def extractNumbers(path, node):
    nums = []
    # You'll want to store a relative, rather than an absolute path.
    if not path: # This is the root node, store the // Predicate to look at all root's children.
        path = ".//"
    else: # This is not the root node
        if 'month' in node.attrib:
            if node.attrib['month'] in ['05', '06']:
                return nums

        path += node.tag
        if 'name' in node.keys():
            path += '[@name="{:s}"]/'.format(node.attrib['name'])
        elif 'year' in node.keys():
            path += '[@month="{:s}"]/'.format(node.attrib['month'])
        try:
            num = float(node.text)
            nums.append((path, num) )
        except (ValueError, TypeError):
            pass
    # Descend into the node's child nodes
    for e in list(node):
        nums.extend( extractNumbers(path, e) )
    return nums

tree = ET.parse('jerry.xml')
nums = extractNumbers('', tree.getroot())


At this point you have a nums list populated with tuples of "path, num".  You'll want to write the path into your csv.  In the following, I've assumed that you know the Text1, Text2, and Text3 values before hand, and so I've written 'foo', 'bar', 'baz' into each row.

import csv
# Write the CSV file with the data found from extractNumbers
with open('records.csv', 'w') as records:
    writer = csv.writer(records, delimiter=';')
    writer.writerow(['Path', 'Text1', 'Text2', 'Text3'])
    for entry in nums:
        # Ensure that you're writing a relative xpath
        rel_path = entry[0]
        # you will want to "Text1", 'foo' below, to be an appropriate value, as it will be written into the xml below
        writer.writerow([rel_path, 'foo', 'bar', 'baz'])


You will now have the following CSV file

Path;Text1;Text2;Text3
".//country[@name=""Peru""]/rank";foo;bar;baz
".//country[@name=""Peru""]/gdpnp";foo;bar;baz
".//country[@name=""Singapore""]/rank";foo;bar;baz
".//country[@name=""Singapore""]/gdpnp";foo;bar;baz


In the following code, you will read the csv file
Read the CSV file, and use the PATH column to alter the appropriate values

import csv
import xml.etree.ElementTree as ET
with open('records.csv', 'r') as records:
    reader = csv.reader(records, delimiter=';')
    for row in reader:
        if reader.line_num == 1: continue # skip the row of headers
        for data in tree.findall(row[0]):
            data.text = row[1]
tree.write('jerry_new.xml')


You'll have the following results in jerry_new.xml

<data>
    <country name="Peru">
        <rank updated="yes">foo</rank>
        <language>english</language>
        <currency>1.21$/kg</currency>
        <gdppc month="06">141100</gdppc>
        <gdpnp month="10">foo</gdpnp>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
    <country name="Singapore">
        <rank updated="yes">foo</rank>
        <language>english</language>
        <currency>4.1$/kg</currency>
        <gdppc month="05">59900</gdppc>
        <gdpnp month="08">foo</gdpnp>
        <neighbor direction="N" name="Malaysia" />
    </country>
</data>

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  盖世英雄少女心        
                
              
                            
                2020-12-19 02:46
              
            
            
                                                                       
FIrst of all, documentation of how to modify an XML. Now, here is my own example:

import xml.etree.ElementTree as ET

s = """
<root>
    <parent attribute="value">
        <child_1 other_attr="other_value">child text</child_1>
        <child_2 yet_another_attr="another_value">more child text</child_2>
    </parent>
</root>
"""

root = ET.fromstring(s)

for parent in root.getchildren():
    parent.attrib['attribute'] = 'new value'
    for child in parent.getchildren():
        child.attrib['new_attrib'] = 'new attribute for {}'.format(child.tag)
        child.text += ', appended text!'

>>> ET.dump(root)
<root>
    <parent attribute="new value">
        <child_1 new_attrib="new attribute for child_1" other_attr="other_value">child text, appended text!</child_1>
        <child_2 new_attrib="new attribute for child_2" yet_another_attr="another_value">more child text, appended text!</child_2>
    </parent>
</root>


And you can do this with Xpath as well.

>>> root.find('parent/child_1[@other_attr]').attrib['other_attr'] = 'found it!'
>>> ET.dump(root)
<root>
    <parent attribute="new value">
        <child_1 new_attrib="new attribute for child_1" other_attr="found it!">child text, appended text!</child_1>
        <child_2 new_attrib="new attribute for child_2" yet_another_attr="another_value">more child text, appended text!</child_2>
    </parent>
</root>

                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
            
           
            
                              
                
              
              
                
                  忘掉有多难        
                
              
                            
                2020-12-19 02:47
              
            
            
                                                                       
You should be able to use the XPath capabilities of the module to do this:

import xml.etree.ElementTree as ET
tree = ET.parse('jerry.xml')
root = tree.getroot()
for data in root.findall(".//country[@name='singapore']/gdpnp[@month='08']"):
    data.text = csv_value

tree.write("filename.xml")


So you need to rewrite the path in the csv to match the XPath rules defined for the module (see Supported XPath rules).
                                                                        
                                                        
            
            
              
                
                0
              
                 
                
               讨论(0)
              
              
                                                   
              
                                                            
            
                      
                    


               
            
    发布评论:
    
         
                        
    
    提交评论 
  
  

                    
                    
                    
                        
                        
                         加载中...
                        
                    
                
          
          	          
                             
        
        
          
            
            
              
              
            
    


                                 
              
            
                          
    

        
         
                验证码
                
                  
                
                
                   看不清?
                
              
                                  
                    
   
                 
             
              提交回复