关于将XML文件读入python词典,我遇到了各种各样的问题/答案,并且我有一个主要的工作示例。它使用cElementTree模块和几个自定义类XmlDictConfig遍历XML文件的各个级别并转换为python字典,而另一个类ReadTree包含用于从字典中返回特定值的方法。
这有一个例外:当我尝试读取具有计量单位(UOM)标记的字段时,词典仅包含UOM键/值对,而不包含感兴趣的值。我可以提取感兴趣值的唯一方法是使用cElementTree iter()方法创建生成器,然后使用遍历生成器的next()方法。不使用发电机怎么办?
我已经将类和main用于我的工作示例。主要在注释中包含XML文件以供参考。
import numpy as np
import xml.etree.cElementTree as ElementTree
class XmlDictConfig(dict):
'''
Example usage:
>>> tree = ElementTree.parse('your_file.xml')
>>> root = tree.getroot()
>>> xmldict = XmlDictConfig(root)
'''
def __init__(self,parent_element):
if parent_element.items():
self.update(dict(parent_element.items()))
for element in parent_element:
if element:
# treat like dict - we assume that if the first two tags
# in a series are different,then they are all different.
if len(element) == 1 or element[0].tag != element[1].tag:
aDict = XmlDictConfig(element)
# treat like list - we assume that if the first two tags
# in a series are the same,then the rest are the same.
else:
# here,we put the list in dictionary; the key is the
# tag name the list elements all share in common,and
# the value is the list itself
aDict = {element[0].tag: XmlListConfig(element)}
# if the tag has attributes,add those to the dict
if element.items():
aDict.update(dict(element.items()))
self.update({element.tag: aDict})
# this assumes that if you've got an attribute in a tag,# you won't be having any text. This may or may not be a
# good idea -- time will tell. It works for the way we are
# currently doing XML configuration files...
elif element.items():
self.update({element.tag: dict(element.items())})
# finally,if there are no child tags and no attributes,extract
# the text
else:
self.update({element.tag: element.text})
class ReadTree:
def __init__(self,fileName):
self.tree = ElementTree.parse(fileName)
self.root = self.tree.getroot()
self.rootNamespace = self.root.tag.strip('tree')
def __enter__(self):
return self
def __exit__(self,exc_type,exc_value,traceback):
pass
## \brief create dictionary from keys/values
#
# private method for zipping up a dictionary using lists of key/value pairs
def _create_dictionary(self,keys,values):
return dict(zip(keys,values))
## \brief get dictionary object
#
# use XmlDictConfig class to iterate through xml structure
def _get_xmlDict(self):
xmlDict = XmlDictConfig(self.root)
return xmlDict
## \brief return branch level as a dictionary
#
# use private function _create_dictionary to convert the entries at branch
# level into a dictionary
def _get_branch(self):
xmlDict = self._get_xmlDict()
branch = xmlDict[self.rootNamespace + 'branch']
branchDict = self._create_dictionary(branch.keys(),branch.values())
return branchDict
## \brief return the value of the twig field
#
#
def get_twig(self):
twig = self._get_branch()[self.rootNamespace + 'twig']
return twig
## \brief get_leaf method A
#
# use a generator to return the value of the leaf field
def get_leaf_A(self):
leafGenerator = self.root.iter(self.rootNamespace + 'leaf')
leaf = float(next(leafGenerator).text)
return leaf
## \brief get_leaf method B
#
# try to avoid using a generator to return the value of the leaf field
# this approach does not work
def get_leaf_B(self):
leaf = self._get_branch()[self.rootNamespace + 'leaf']
return leaf
if __name__ == '__main__':
Tree = ReadTree('./inFile.xml')
'''
<?xml version="1.0" encoding="utf-8"?>
<tree xmlns:xsi="http://www.interweb.net">
<branch uid="something">
<twig>111.11</twig>
<leaf uom="m">999.99</leaf>
</branch>
</tree>
'''
twig = Tree.get_twig()
print(twig)
# This prints 111.11
leaf_A = Tree.get_leaf_A()
print(leaf_A)
# This prints 999.99
leaf_B = Tree.get_leaf_B()
print(leaf_B)
# This prints {'uom': 'm'}