Τμήμα Πληροφορικής και Τηλεπικοινωνιών - Άρτα
Πανεπιστήμιο Ιωαννίνων
Γκόγκος Χρήστος
http://chgogos.github.io/
Εαρινό εξάμηνο 2020-2021
import xml.dom.minidom
print(dir(xml.dom.minidom))
['Attr', 'AttributeList', 'CDATASection', 'CharacterData', 'Childless', 'Comment', 'DOMImplementation', 'DOMImplementationLS', 'Document', 'DocumentFragment', 'DocumentLS', 'DocumentType', 'EMPTY_NAMESPACE', 'EMPTY_PREFIX', 'Element', 'ElementInfo', 'EmptyNodeList', 'Entity', 'Identified', 'NamedNodeMap', 'Node', 'NodeList', 'Notation', 'ProcessingInstruction', 'ReadOnlySequentialNamedNodeMap', 'StringTypes', 'Text', 'TypeInfo', 'XMLNS_NAMESPACE', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', '_append_child', '_clear_id_cache', '_clone_node', '_do_pulldom_parse', '_get_containing_element', '_get_containing_entref', '_get_elements_by_tagName_helper', '_get_elements_by_tagName_ns_helper', '_in_document', '_no_type', '_nodeTypes_with_children', '_nssplit', '_set_attribute_node', '_write_data', 'defproperty', 'domreg', 'getDOMImplementation', 'io', 'parse', 'parseString', 'xml']
# parsing του XML, εκτύπωση
path = "../../../datasets/person.xml"
xml_f = open(path, "r", encoding="utf-8")
xmlparse = xml.dom.minidom.parseString(xml_f.read())
prettyxml = xmlparse.toprettyxml()
print(prettyxml)
<?xml version="1.0" ?> <root> <address> <city>Άρτα</city> <postalCode>45221</postalCode> <state>Ήπειρος</state> <streetAddress>Ανεξαρτησίας 33</streetAddress> </address> <age>30</age> <firstName>Γιάννης</firstName> <lastName>Παπαδόπουλος</lastName> <married>true</married> <phoneNumbers> <element> <number>00302681123456</number> <type>home</type> </element> <element> <number>00302681654321</number> <type>work</type> </element> <element> <number>00306971234567</number> <type>mobile</type> </element> </phoneNumbers> <email null="true"/> </root>
# εμφάνιση της πληροφορίας lastName ως XML
xmlparse.getElementsByTagName('lastName')[0].toxml()
'<lastName>Παπαδόπουλος</lastName>'
# εμφάνιση της πληροφορίας lastName
print(xmlparse.getElementsByTagName('lastName')[0].firstChild.data)
print(xmlparse.getElementsByTagName('lastName')[0].firstChild.nodeValue) # το nodeValue είναι ψευδώνυμο για το data
Παπαδόπουλος Παπαδόπουλος
# εμφάνιση όλων των εμφωλευμένων πληροφοριών στην ετικέτα address
for node in xmlparse.getElementsByTagName('address')[0].childNodes:
if node.nodeType == node.ELEMENT_NODE:
print(f'{node.tagName}: {node.firstChild.data}')
city: Άρτα postalCode: 45221 state: Ήπειρος streetAddress: Ανεξαρτησίας 33
for node in xmlparse.getElementsByTagName('phoneNumbers'):
i=0
for node2 in node.childNodes:
if node2.nodeType == node.ELEMENT_NODE:
i+=1
print(f'Phone{i}')
for node3 in node2.childNodes:
if node3.nodeType == node.ELEMENT_NODE:
print(f'{node3.tagName}: {node3.firstChild.data}')
print("#"*30)
Phone1 number: 00302681123456 type: home ############################## Phone2 number: 00302681654321 type: work ############################## Phone3 number: 00306971234567 type: mobile ##############################
# διάσχιση XML χρησιμοποιώντας τη συνάρτηση list(elem)
import xml.etree.ElementTree as ET
path = "../../../datasets/person.xml"
xml_f = open(path, "r", encoding="utf-8")
tree = ET.ElementTree(file=xml_f)
root = tree.getroot()
print(root)
print("#" * 30)
for elem in list(root):
if elem.tag == 'address':
for elem2 in list(elem):
print(elem2.tag, elem2.text)
elif elem.tag == 'phoneNumbers':
for elem2 in list(elem):
print(list(elem2)[0].tag, list(elem2)[0].text)
print(list(elem2)[1].tag, list(elem2)[1].text)
else:
print(elem.tag, elem.text)
<Element 'root' at 0x00000251EDCACCC0> ############################## city Άρτα postalCode 45221 state Ήπειρος streetAddress Ανεξαρτησίας 33 age 30 firstName Γιάννης lastName Παπαδόπουλος married true number 00302681123456 type home number 00302681654321 type work number 00306971234567 type mobile email None
# διάσχιση XML με iterator
for elem in tree.iter():
if elem.tag == 'root':
continue
elif elem.tag == 'address':
print('ADDRESS')
elif elem.tag == 'phoneNumbers':
print('PHONENUMBERS')
elif elem.tag == 'element':
continue
else:
print(f'{elem.tag} {elem.text}')
ADDRESS city Άρτα postalCode 45221 state Ήπειρος streetAddress Ανεξαρτησίας 33 age 30 firstName Γιάννης lastName Παπαδόπουλος married true PHONENUMBERS number 00302681123456 type home number 00302681654321 type work number 00306971234567 type mobile email None
# εγγραφή αρχείου XML που περιέχει attributes και elements
import xml.etree.ElementTree as ET
xml_doc = ET.Element("messages")
note1 = ET.SubElement(xml_doc, "note")
note1.set('id', '501')
note1_to = ET.SubElement(note1, "to")
note1_to.text = 'Tove'
note1_from = ET.SubElement(note1, "from")
note1_from.text = 'Jani'
note1_heading = ET.SubElement(note1, "heading")
note1_heading.text = 'Reminder'
note1_body = ET.SubElement(note1, "body")
note1_body.text = "Don't forget me the weekend!"
note2 = ET.SubElement(xml_doc, "note")
note2.set('id', '502')
note2_to = ET.SubElement(note2, "to")
note2_to.text = 'Jani'
note2_from = ET.SubElement(note2, "from")
note2_from.text = 'Tove'
note2_heading = ET.SubElement(note2, "heading")
note2_heading.text = 'Re: Reminder'
note2_body = ET.SubElement(note2, "body")
note2_body.text = "I will not"
# https://stackoverflow.com/questions/749796/pretty-printing-xml-in-python
def prettify(element, indent=" "):
queue = [(0, element)] # (level, element)
while queue:
level, element = queue.pop(0)
children = [(level + 1, child) for child in list(element)]
if children:
element.text = "\n" + indent * (level + 1) # for child open
if queue:
element.tail = "\n" + indent * queue[0][0] # for sibling open
else:
element.tail = "\n" + indent * (level - 1) # for parent close
queue[0:0] = children # prepend so children come before siblings
prettify(xml_doc)
tree = ET.ElementTree(xml_doc)
tree.write('../../../datasets/notes.xml')
import xmltodict
path = "../../../datasets/person.xml"
xml_f = open(path, "r", encoding="utf-8")
xmldict = xmltodict.parse(xml_f.read())
xmldict
OrderedDict([('root', OrderedDict([('address', OrderedDict([('city', 'Άρτα'), ('postalCode', '45221'), ('state', 'Ήπειρος'), ('streetAddress', 'Ανεξαρτησίας 33')])), ('age', '30'), ('firstName', 'Γιάννης'), ('lastName', 'Παπαδόπουλος'), ('married', 'true'), ('phoneNumbers', OrderedDict([('element', [OrderedDict([('number', '00302681123456'), ('type', 'home')]), OrderedDict([('number', '00302681654321'), ('type', 'work')]), OrderedDict([('number', '00306971234567'), ('type', 'mobile')])])])), ('email', OrderedDict([('@null', 'true')]))]))])
print(xmldict['root']['lastName'])
print(xmldict['root']['address']['city'])
print(xmldict['root']['phoneNumbers']['element'][0]['number'])
Παπαδόπουλος Άρτα 00302681123456