Notebook

In [1]:

import ipywidgets as widgets
import requests
from IPython.display import display, clear_output
from bs4 import BeautifulSoup
from lxml import etree
import pandas
import unicodedata
import voila

Testabfrage DNB-Daten¶

Hier können Sie unsere SRU-Schnittstelle über einfache Formulareingaben abfragen. Wählen Sie dazu den Katalog, den Sie abfragen möchten und das Metadatenformat für die Ausgabe aus. Im nächsten Schritt geben Sie Ihren Suchbegriff ein.

Für die Ausführung des dahinterliegenden Codes muss die Reihenfolge bei Eingaben und Buttonklicks eingehalten werden.

Im Anschluss können Sie sich eine gekürzte tabellarische Darstellung Ihrer Anfrage ansehen und diese als XML- oder CSV-Datei speichern.

Bitte beachten Sie:

Dieses Tutorial dient als Einstieg. Aus Performance-Gründen werden jeweils immer nur die ersten 100 Treffer Ihrer Anfrage ausgegeben.
Die Metadatenformate enthalten unterschiedliche Informationen. Die Ausgabetabellen und -dateien variieren daher entsprechend in der Anzahl enthaltener Elemente und Informationen.

Bitte wählen Sie zunächst den gewünschten Katalog:¶

DNB = Titeldaten der Deutschen Nationalbibliothek
DMA = Deutsches Musikarchiv
GND = Gemeinsame Normdatei

In [2]:

auswahl = widgets.Dropdown(
            options=['DNB', 'DMA', 'GND'],  
            value='DNB',
            description='Katalog:',
            style={'description_width': 'initial'},
            disabled=False,
        )

display(auswahl)

default = "https://services.dnb.de/sru/dnb"

Dropdown(description='Katalog:', options=('DNB', 'DMA', 'GND'), style=DescriptionStyle(description_width='init…

Bitte wählen Sie das Metadatenformat für die Ausgabe:¶

In [3]:

meta = widgets.Dropdown(
            options=[('MARC21-xml', 'MARC21-xml'), ('DNB Casual (oai_dc)', 'oai_dc'), ('RDF (RDFxml)', 'RDFxml')], 
            value='MARC21-xml',
            description='Metadatenformat:', 
            layout={'width': 'max-content'},
            style={'description_width': 'initial'},
            disabled=False,
        )

display(meta)

Dropdown(description='Metadatenformat:', layout=Layout(width='max-content'), options=(('MARC21-xml', 'MARC21-x…

In [4]:

button = widgets.Button(description="Bestätigen")
output1 = widgets.Output()

display(button, output1)

def on_button_clicked(b):
    
    with output1:
        global A
        value = "https://services.dnb.de/sru/dnb"
        clear_output()
        result = auswahl.value
        if auswahl.value == "DNB":
            selected_url = "https://services.dnb.de/sru/dnb"
        elif auswahl.value == "DMA":
            selected_url = "https://services.dnb.de/sru/dnb.dma"
        elif auswahl.value == "GND":
            selected_url = "https://services.dnb.de/sru/authorities"
        else:
            selected_url = "ERROR: Keine URL gewählt"
        print("Auswahl Katalog-URL für", result, ":", selected_url)
        print("Auswahl Metadatenformat:", meta.value)
    
        A = selected_url
        return A
    
button.on_click(on_button_clicked)

Button(description='Bestätigen', style=ButtonStyle())

Output()

Bitte geben Sie nun Ihren Suchbegriff ein:¶

In [6]:

searchterm = widgets.Text(
            value='',
            placeholder='Suchbegriff eintippen',
            description='Suchbegriff:',
            disabled=False
        )

display(searchterm)

Text(value='', description='Suchbegriff:', placeholder='Suchbegriff eintippen')

In [7]:

button_search = widgets.Button(description="Suche starten")
output2 = widgets.Output()

display(button_search, output2)

def on_button_clicked(b):
    
    with output2:
        global records 
        global records_marc
        global gndm
        global r1
        clear_output()
        searchtext = searchterm.value
        print("Suche nach:", searchtext)
        
        if 'A' in globals():
            test = 'yes'
        else:
            test = 'no'        
        
        parameter = {'version' : '1.1' , 'operation' : 'searchRetrieve' , 'query' : searchtext, 'recordSchema' : meta.value, 
                     'maximumRecords': '100'} 
        
        if test == 'yes':
            r1 = requests.get(A, params = parameter)
        if test == 'no': 
            r1 = requests.get(default, params = parameter)

        response = BeautifulSoup(r1.content)
        records = response.find_all('record')
        records_marc = response.find_all('record', {'type':'Bibliographic'})
        gndm = response.find_all('record', {'type':'Authority'})
        numberofrecords = response.find_all('numberofrecords')[0].text
        vorschau = records[0]
        print("Gefundene Treffer:", numberofrecords)
        print(" ")
        print("Vorschau des ersten Treffers der SRU-Antwort:")
        print("")
        print(vorschau.prettify())
        print("")
        print(" - Ende der Vorschau - ")
        
button_search.on_click(on_button_clicked)

Button(description='Suche starten', style=ButtonStyle())

Output()

In [23]:

#Funktion für Titeldaten in OAI-DC
def parse_record_dc(record):
    
    ns = {"dc": "http://purl.org/dc/elements/1.1/", 
          "xsi": "http://www.w3.org/2001/XMLSchema-instance"}
    xml = etree.fromstring(unicodedata.normalize("NFC", str(record)))
    
    #idn
    idn = xml.xpath(".//dc:identifier[@xsi:type='dnb:IDN']", namespaces=ns) #--> Adressiert das Element direkt   
    try:
        idn = idn[0].text
    except:
        idn = 'fail'
    
    #creator:
    creator = xml.xpath('.//dc:creator', namespaces=ns)
    try:
        creator = creator[0].text
    except:
        creator = "N/A"
    
    #titel
    titel = xml.xpath('.//dc:title', namespaces=ns)
    try:
        titel = titel[0].text
    except:
        titel = "N/A"
        
    #date
    date = xml.xpath('.//dc:date', namespaces=ns)
    try:
        date = date[0].text
    except:
        date = "N/A"
    
    
    #publisher
    publ = xml.xpath('.//dc:publisher', namespaces=ns)
    try:
        publ = publ[0].text
    except:
        publ = "N/A"
    
        
    #identifier
    ids = xml.xpath('.//dc:identifier[@xsi:type="tel:ISBN"]', namespaces=ns)
    try:
        ids = ids[0].text
    except:
        ids = "N/A"
        
    #urn
    urn = xml.xpath('.//dc:identifier[@xsi:type="tel:URN"]', namespaces=ns)
    try:
        urn = urn[0].text
    except:
        urn = "N/A"
        
        
    meta_dict = {"IDN":idn, "CREATOR":creator, "TITLE":titel, "DATE":date, "PUBLISHER":publ, "URN":urn, "ISBN":ids}
    
    return meta_dict

In [24]:

#Function für Titeldaten in MARC21
def parse_record_marc(item):

    ns = {"marc":"http://www.loc.gov/MARC21/slim"}
    xml = etree.fromstring(unicodedata.normalize("NFC", str(item)))
    
    
    #idn
    idn = xml.findall("marc:controlfield[@tag = '001']", namespaces=ns)
    try:
        idn = idn[0].text
    except:
        idn = 'N/A' 
        
    
    #creator
    creator1 = xml.findall("marc:datafield[@tag = '100']/marc:subfield[@code = 'a']", namespaces=ns)
    creator2 = xml.findall("marc:datafield[@tag = '110']/marc:subfield[@code = 'a']", namespaces=ns)
    subfield = xml.findall("marc:datafield[@tag = '110']/marc:subfield[@code = 'e']", namespaces=ns)
    
    if creator1:
        creator = creator1[0].text
    elif creator2:
        creator = creator2[0].text
        if subfield:
            creator = creator + " [" + subfield[0].text + "]"
    else:
        creator = "N/A"
    
    #Titel $a
    title = xml.findall("marc:datafield[@tag = '245']/marc:subfield[@code = 'a']", namespaces=ns)
    title2 = xml.findall("marc:datafield[@tag = '245']/marc:subfield[@code = 'b']", namespaces=ns)
    
    if title and not title2:
        titletext = title[0].text
    elif title and title2:     
        titletext = title[0].text + ": " + title2[0].text
    else:
        titletext = "N/A"
    
    
    #date
    date = xml.findall("marc:datafield[@tag = '264']/marc:subfield[@code = 'c']", namespaces=ns)
    try:
        date = date[0].text
    except:    
        date = 'N/A'
    
    
    #publisher
    publ = xml.findall("marc:datafield[@tag = '264']/marc:subfield[@code = 'b']", namespaces=ns)
    try:
        publ = publ[0].text
    except:    
        publ = 'N/A'
        
        
    #URN
    testurn = xml.findall("marc:datafield[@tag = '856']/marc:subfield[@code = 'x']", namespaces=ns)
    urn = xml.findall("marc:datafield[@tag = '856']/marc:subfield[@code = 'u']", namespaces=ns)
    
    if testurn:
        urn = urn[0].text
    else:    
        urn = 'N/A'
        
        
    #ISBN
    isbn_new = xml.findall("marc:datafield[@tag = '020']/marc:subfield[@code = 'a']", namespaces=ns)
    isbn_old = xml.findall("marc:datafield[@tag = '024']/marc:subfield[@code = 'a']", namespaces=ns)
    if isbn_new:
        isbn = isbn_new[0].text
    elif isbn_old: 
        isbn = isbn_old[0].text
    else:    
        isbn = 'N/A'
    

    
    meta_dict = {"IDN":idn, "CREATOR":creator, "TITLE": titletext, "DATE":date, 
                 "PUBLISHER":publ, "URN":urn, "ISBN":isbn}
    
    return meta_dict
    

In [25]:

#Funktion für Titeldaten in RDF:

def parse_record_rdf(record):
    
    ns = {"xlmns":"http://www.loc.gov/zing/srw/", 
          "agrelon":"https://d-nb.info/standards/elementset/agrelon#",
          "bflc":"http://id.loc.gov/ontologies/bflc/",
          "rdau":"http://rdaregistry.info/Elements/u/",
          "dc":"http://purl.org/dc/elements/1.1/",
          "rdau":"http://rdaregistry.info/Elements/u",
          "bibo":"http://purl.org/ontology/bibo/",
          "dbp":"http://dbpedia.org/property/",  
          "dcmitype":"http://purl.org/dc/dcmitype/", 
          "dcterms":"http://purl.org/dc/terms/", 
          "dnb_intern":"http://dnb.de/", 
          "dnbt":"https://d-nb.info/standards/elementset/dnb#", 
          "ebu":"http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#", 
          "editeur":"https://ns.editeur.org/thema/", 
          "foaf":"http://xmlns.com/foaf/0.1/", 
          "gbv":"http://purl.org/ontology/gbv/", 
          "geo":"http://www.opengis.net/ont/geosparql#", 
          "gndo":"https://d-nb.info/standards/elementset/gnd#", 
          "isbd":"http://iflastandards.info/ns/isbd/elements/", 
          "lib":"http://purl.org/library/", 
          "madsrdf":"http://www.loc.gov/mads/rdf/v1#", 
          "marcrole":"http://id.loc.gov/vocabulary/relators/",
          "mo":"http://purl.org/ontology/mo/", 
          "owl":"http://www.w3.org/2002/07/owl#", 
          "rdf":"http://www.w3.org/1999/02/22-rdf-syntax-ns#", 
          "rdfs":"http://www.w3.org/2000/01/rdf-schema#", 
          "schema":"http://schema.org/", 
          "sf":"http://www.opengis.net/ont/sf#", 
          "skos":"http://www.w3.org/2004/02/skos/core#", 
          "umbel":"http://umbel.org/umbel#", 
          "v":"http://www.w3.org/2006/vcard/ns#", 
          "vivo":"http://vivoweb.org/ontology/core#", 
          "wdrs":"http://www.w3.org/2007/05/powder-s#", 
          "xsd":"http://www.w3.org/2001/XMLSchema#"}
    
    xml = etree.fromstring(unicodedata.normalize("NFC", str(record)))
     
    #idn
    idn = xml.findall(".//dc:identifier", namespaces=ns)
    try:
        idn = idn[0].text
    except:
        idn = 'N/A' 
        
    
    #creator
    creator = record.find_all('rdau:p60327')
    
    try:
        creator = creator[0].text
    except:
        creator = "N/A"
        
    
    #title
    test = record.find_all('dc:title')
    
    try:
        test = test[0].text
    except:
        test = "N/A"
        
        
    #date
    date = record.find_all('dcterms:issued')
    
    try:
        date = date[0].text
    except:
        date = "N/A"    
    
        
    #publisher
    publ = record.find_all('dc:publisher')
    
    try:
        publ = publ[0].text
    except:
        publ = "N/A"    
    
    #urn
    urn = record.find_all('umbel:islike')
    
    try:
        urn = urn[0]
        urn = urn.get('rdf:resource')
    except:
        urn = "N/A"
    
    #isbn
    isbn = xml.findall(".//bibo:isbn13", namespaces=ns)
    isbn10 = xml.findall(".//bibo:isbn10", namespaces=ns)
    
    if isbn:
        isbn = isbn[0].text
    elif isbn10: 
        isbn = isbn10[0].text
    else:
        isbn = "N/A"
    
    
        
        
    meta_dict = {"IDN":idn, "CREATOR":creator, "TITLE":test, "DATE":date, "PUBLISHER":publ, "URN":urn, "ISBN":isbn}
    
    return meta_dict
    

In [26]:

#Funktion für GND in MARC21:

def parse_record_gndm(record):
    
    ns = {"xmlns":"http://www.loc.gov/MARC21/slim"}
    xml = etree.fromstring(unicodedata.normalize("NFC", str(record)))
    
    
    #Art
    gndtype = xml.findall("xmlns:datafield[@tag = '075']/xmlns:subfield[@code = 'b']", namespaces=ns)
    gndtype = gndtype[0].text
    
    if gndtype == "p": 
        gndtype = "Person"
    elif gndtype == "b":
        gndtype = "Organisation"
    elif gndtype == "u": 
        gndtype = "Werk"
    elif gndtype == "f": 
        gndtype = "Veranstaltung"
    elif gndtype == "g": 
        gndtype = "Geografikum" 
    elif gndtype == "n": 
        gndtype = "Person"
    elif gndtype == "s": 
        gndtype = "Sachbegriff"
        
        
    #Name
    main1 = xml.findall("xmlns:datafield[@tag = '100']/xmlns:subfield[@code = 'a']", namespaces=ns)
    main2 = xml.findall("xmlns:datafield[@tag = '110']/xmlns:subfield[@code = 'a']", namespaces=ns)
    main3 = xml.findall("xmlns:datafield[@tag = '111']/xmlns:subfield[@code = 'a']", namespaces=ns)
    main4 = xml.findall("xmlns:datafield[@tag = '130']/xmlns:subfield[@code = 'a']", namespaces=ns)
    main5 = xml.findall("xmlns:datafield[@tag = '150']/xmlns:subfield[@code = 'a']", namespaces=ns)
    main6 = xml.findall("xmlns:datafield[@tag = '151']/xmlns:subfield[@code = 'a']", namespaces=ns)
    
    
    if main1: 
        main = main1[0].text
    elif main2:     
        main = main2[0].text
    elif main3: 
        main = main3[0].text
    elif main4:
        main = main4[0].text
    elif main5:
        main = main5[0].text
    elif main6:
        main = main6[0].text
    else:
        main = "N/A"
        
    
    #title (bei Werken)
    title1 = xml.findall("xmlns:datafield[@tag = '100']/xmlns:subfield[@code = 't']", namespaces=ns)
    
    if title1: 
        title = title1[0].text
    else:
        title = 'N/A'
    
    
    
    #idn
    idn = xml.findall("xmlns:controlfield[@tag = '001']", namespaces=ns)
    try:
        idn = idn[0].text
    except:
        idn = 'N/A' 
    
    
    #Link
    link1 = xml.findall("xmlns:datafield[@tag = '024']/xmlns:subfield[@code = '0']", namespaces=ns)
    try:
        link1 = link1[0].text
    except:
        link1 = 'N/A' 
        
        
    
    dicty = {"IDN":idn, "TYPE":gndtype, "NAME":main, "TITLE":title, "LINK":link1}
    return dicty

In [27]:

#Funktion für GND in OAI-DC:

def parse_record_gndoai(record):
    
    ns = {"dc": "http://purl.org/dc/elements/1.1/", 
          "xsi": "http://www.w3.org/2001/XMLSchema-instance"}
    xml = etree.fromstring(unicodedata.normalize("NFC", str(record)))
    
    
    #idn
    idn = xml.xpath(".//dc:identifier[@xsi:type='dnb:IDN']", namespaces=ns) #--> Adressiert das Element direkt   
    try:
        idn = idn[0].text
    except:
        idn = 'fail'
    
    
    #title
    title = xml.xpath(".//dc:title", namespaces=ns)  
    try:
        title = title[0].text
    except:
        title = 'N/A'   
    
    
    #creator
    creator = xml.xpath(".//dc:creator", namespaces=ns)  
    try:
        creator = creator[0].text
    except:
        creator = 'N/A'        
        
    
    dicty = {"IDN":idn, "NAME":creator, "TITLE":title}
    return dicty


        

In [28]:

#GND in RDF

def parse_record_gndrdf(record):
    
    ns = {"xlmns":"http://www.loc.gov/zing/srw/", 
          "agrelon":"https://d-nb.info/standards/elementset/agrelon#",
          "bflc":"http://id.loc.gov/ontologies/bflc/",
          "rdau":"http://rdaregistry.info/Elements/u/",
          "dc":"http://purl.org/dc/elements/1.1/",
          "rdau":"http://rdaregistry.info/Elements/u",
          "bibo":"http://purl.org/ontology/bibo/",
          "dbp":"http://dbpedia.org/property/",  
          "dcmitype":"http://purl.org/dc/dcmitype/", 
          "dcterms":"http://purl.org/dc/terms/", 
          "dnb_intern":"http://dnb.de/", 
          "dnbt":"https://d-nb.info/standards/elementset/dnb#", 
          "ebu":"http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#", 
          "editeur":"https://ns.editeur.org/thema/", 
          "foaf":"http://xmlns.com/foaf/0.1/", 
          "gbv":"http://purl.org/ontology/gbv/", 
          "geo":"http://www.opengis.net/ont/geosparql#", 
          "gndo":"https://d-nb.info/standards/elementset/gnd#", 
          "isbd":"http://iflastandards.info/ns/isbd/elements/", 
          "lib":"http://purl.org/library/", 
          "madsrdf":"http://www.loc.gov/mads/rdf/v1#", 
          "marcrole":"http://id.loc.gov/vocabulary/relators/",
          "mo":"http://purl.org/ontology/mo/", 
          "owl":"http://www.w3.org/2002/07/owl#", 
          "rdf":"http://www.w3.org/1999/02/22-rdf-syntax-ns#", 
          "rdfs":"http://www.w3.org/2000/01/rdf-schema#", 
          "schema":"http://schema.org/", 
          "sf":"http://www.opengis.net/ont/sf#", 
          "skos":"http://www.w3.org/2004/02/skos/core#", 
          "umbel":"http://umbel.org/umbel#", 
          "v":"http://www.w3.org/2006/vcard/ns#", 
          "vivo":"http://vivoweb.org/ontology/core#", 
          "wdrs":"http://www.w3.org/2007/05/powder-s#", 
          "xsd":"http://www.w3.org/2001/XMLSchema#"}
    
    xml = etree.fromstring(unicodedata.normalize("NFC", str(record)))
     
    #idn
    idn = xml.findall(".//gndo:gndidentifier", namespaces=ns)
    try:
        idn = idn[0].text
    except:
        idn = 'N/A' 
        
        
    #link
    link = record.find_all('rdf:description')
    
    try: 
        link = link[0]
        link = link.get('rdf:about')
    except:
        link = 'N/A' 
        
    
    #name
    name = record.find_all('gndo:preferrednamefortheperson')
    name2 = record.find_all('gndo:preferrednameforthecorporatebody') 
    name3 = record.find_all('gndo:preferrednameforthework') 
    name4 = record.find_all('gndo:preferrednamefortheconferenceorevent')
    name5 = record.find_all('gndo:preferrednameforthesubjectheading')
    
    if name:
        name = name[0].text
    elif name2:
        name = name2[0].text
    elif name3:
        name = name3[0].text
    elif name4: 
        name = name4[0].text
    elif name5: 
        name = name5[0].text
    else:
        name = "N/A"
        
    
    #time
    time = record.find_all('gndo:periodofactivity')
    time2 = record.find_all('gndo:dateofpublication')
    time3 = record.find_all('gndo:dateofconferenceorevent')
    time4 = record.find_all('gndo:dateofbirth')
    
    if time:
        time = time[0].text
    elif time2:
        time = time2[0].text
    elif time3:
        time = time3[0].text
    elif time4:
        time = time4[0].text + "-"
    else:
        time = "N/A"
    
            
    
    #type
    gndtype = record.find_all('rdf:type')
    
    try: 
        gndtype = gndtype[0]
        gndtype = gndtype.get('rdf:resource')
    except:
        link = 'N/A' 
        
    
    
    meta_dict = {"IDN":idn, "NAME":name, "TIME":time, "LINK":link}
    
    return meta_dict

In [29]:

#Function für DMA in MARC21
def parse_record_dmamarc(item):

    ns = {"marc":"http://www.loc.gov/MARC21/slim"}
    xml = etree.fromstring(unicodedata.normalize("NFC", str(item)))
    
    
    #idn
    idn = xml.findall("marc:controlfield[@tag = '001']", namespaces=ns)
    try:
        idn = idn[0].text
    except:
        idn = 'N/A' 
        
    
    #creator
    creator1 = xml.findall("marc:datafield[@tag = '100']/marc:subfield[@code = 'a']", namespaces=ns)
    #creator2 = xml.findall("marc:datafield[@tag = '110']/marc:subfield[@code = 'a']", namespaces=ns)
    subfield = xml.findall("marc:datafield[@tag = '245']/marc:subfield[@code = 'c']", namespaces=ns)
    
    
    if creator1:
        creator = creator1[0].text
    elif subfield:
        creator = subfield[0].text
    else:
        creator = "N/A"
    
    #Titel $a
    title = xml.findall("marc:datafield[@tag = '245']/marc:subfield[@code = 'a']", namespaces=ns)
    title2 = xml.findall("marc:datafield[@tag = '245']/marc:subfield[@code = 'b']", namespaces=ns)
    
    if title and not title2:
        titletext = title[0].text
    elif title and title2:     
        titletext = title[0].text + ": " + title2[0].text
    else:
        titletext = "N/A"
    
    
    #Umfang/Format
    art = xml.findall("marc:datafield[@tag = '300']/marc:subfield[@code = 'a']", namespaces=ns)
    try:
        art = art[0].text
    except:    
        art = 'N/A'
    
    
    
    #date
    date = xml.findall("marc:datafield[@tag = '264']/marc:subfield[@code = 'c']", namespaces=ns)
    try:
        date = date[0].text
    except:    
        date = 'N/A'
    
    
    #publisher
    publ = xml.findall("marc:datafield[@tag = '264']/marc:subfield[@code = 'b']", namespaces=ns)
    try:
        publ = publ[0].text
    except:    
        publ = 'N/A'
          
        
    #ISBN
    isbn_new = xml.findall("marc:datafield[@tag = '020']/marc:subfield[@code = 'a']", namespaces=ns)
    isbn_old = xml.findall("marc:datafield[@tag = '024']/marc:subfield[@code = 'a']", namespaces=ns)
    if isbn_new:
        isbn = isbn_new[0].text
    elif isbn_old: 
        isbn = isbn_old[0].text
    else:    
        isbn = 'N/A'
    
    meta_dict = {"IDN":idn, "CREATOR":creator, "TITLE":titletext, "DATE":date, "PUBLISHER":publ, "ISBN":isbn}  
    
    return meta_dict
    

In [30]:

#Funktion für DMA in OAI-DC
def parse_record_dmadc(record):
    
    ns = {"dc": "http://purl.org/dc/elements/1.1/", 
          "xsi": "http://www.w3.org/2001/XMLSchema-instance"}
    xml = etree.fromstring(unicodedata.normalize("NFC", str(record)))
    
    #idn
    idn = xml.xpath(".//dc:identifier[@xsi:type='dnb:IDN']", namespaces=ns) #--> Adressiert das Element direkt   
    try:
        idn = idn[0].text
    except:
        idn = 'fail'
    
    #creator:
    creator = xml.xpath('.//dc:creator', namespaces=ns)
    try:
        creator = creator[0].text
    except:
        creator = "N/A"
    
    #titel
    titel = xml.xpath('.//dc:title', namespaces=ns)
    try:
        titel = titel[0].text
    except:
        titel = "N/A"
        
    #date
    date = xml.xpath('.//dc:date', namespaces=ns)
    try:
        date = date[0].text
    except:
        date = "N/A"
    
    
    #publisher
    publ = xml.xpath('.//dc:publisher', namespaces=ns)
    try:
        publ = publ[0].text
    except:
        publ = "N/A"
    
        
    #format
    form = xml.xpath('.//dc:format', namespaces=ns)
    try:
        form = form[0].text
    except:
        form = "N/A"
                
        
    meta_dict = {"IDN":idn, "CREATOR":creator, "TITLE":titel, "DATE":date, "PUBLISHER":publ, "FORMAT":form}
    
    return meta_dict

In [35]:

#DMA in RDF: 

def parse_record_dmardf(record):
    
    ns = {"xlmns":"http://www.loc.gov/zing/srw/", 
          "agrelon":"https://d-nb.info/standards/elementset/agrelon#",
          "bflc":"http://id.loc.gov/ontologies/bflc/",
          "rdau":"http://rdaregistry.info/Elements/u/",
          "dc":"http://purl.org/dc/elements/1.1/",
          "rdau":"http://rdaregistry.info/Elements/u",
          "bibo":"http://purl.org/ontology/bibo/",
          "dbp":"http://dbpedia.org/property/",  
          "dcmitype":"http://purl.org/dc/dcmitype/", 
          "dcterms":"http://purl.org/dc/terms/", 
          "dnb_intern":"http://dnb.de/", 
          "dnbt":"https://d-nb.info/standards/elementset/dnb#", 
          "ebu":"http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#", 
          "editeur":"https://ns.editeur.org/thema/", 
          "foaf":"http://xmlns.com/foaf/0.1/", 
          "gbv":"http://purl.org/ontology/gbv/", 
          "geo":"http://www.opengis.net/ont/geosparql#", 
          "gndo":"https://d-nb.info/standards/elementset/gnd#", 
          "isbd":"http://iflastandards.info/ns/isbd/elements/", 
          "lib":"http://purl.org/library/", 
          "madsrdf":"http://www.loc.gov/mads/rdf/v1#", 
          "marcrole":"http://id.loc.gov/vocabulary/relators/",
          "mo":"http://purl.org/ontology/mo/", 
          "owl":"http://www.w3.org/2002/07/owl#", 
          "rdf":"http://www.w3.org/1999/02/22-rdf-syntax-ns#", 
          "rdfs":"http://www.w3.org/2000/01/rdf-schema#", 
          "schema":"http://schema.org/", 
          "sf":"http://www.opengis.net/ont/sf#", 
          "skos":"http://www.w3.org/2004/02/skos/core#", 
          "umbel":"http://umbel.org/umbel#", 
          "v":"http://www.w3.org/2006/vcard/ns#", 
          "vivo":"http://vivoweb.org/ontology/core#", 
          "wdrs":"http://www.w3.org/2007/05/powder-s#", 
          "xsd":"http://www.w3.org/2001/XMLSchema#"}
    
    xml = etree.fromstring(unicodedata.normalize("NFC", str(record)))
     
    #idn
    idn = xml.findall(".//dc:identifier", namespaces=ns)
    try:
        idn = idn[0].text
    except:
        idn = 'N/A' 
              
    
    #creator
    name = record.find_all('rdau:p60327')
    
    if name:
        name = name[0].text
    else:
        name = "N/A"
        
    
    #title:
    title = record.find_all('dc:title')
    
    if title:
        title = title[0].text
    else: 
        title = "N/A"
        
    
    #publisher
    publ = record.find_all('dc:publisher')
        
    if publ:
        publ = publ[0].text
    else:
        publ = "N/A"
    
    
    #date
    time = record.find_all('dcterms:issued')
        
    if time:
        time = time[0].text
    else:
        time = "N/A"
    
    
            
        
    meta_dict = {"IDN":idn, "NAME":name, "TITLE":title, "PUBLISHER":publ, "DATE":time}
    
    return meta_dict

Ausgeben und Speichern der Daten:¶

In [17]:

#Funktion für Links: 
def make_clickable(val):
    if val == "N/A": 
        link = "N/A"
    else: 
        link = '<a target="_blank" href="{}">{}</a>'.format(val,val)
    
    return link 

In [18]:

#Extrahieren der Browser-URL:
import os
import re
url = os.environ["JUPYTERHUB_ACTIVITY_URL"]
url_new = url.replace('/activity', '')
url_newer = url_new.replace('http://hub:8081/binder/jupyter/hub/api/users', 'https://notebooks.gesis.org/binder/jupyter/user')
    

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-18-7701e2ee221c> in <module>
      2 import os
      3 import re
----> 4 url = os.environ["JUPYTERHUB_ACTIVITY_URL"]
      5 url_new = url.replace('/activity', '')
      6 url_newer = url_new.replace('http://hub:8081/binder/jupyter/hub/api/users', 'https://notebooks.gesis.org/binder/jupyter/user')

C:\ProgramData\Anaconda3\lib\os.py in __getitem__(self, key)
    673         except KeyError:
    674             # raise KeyError with the original key value
--> 675             raise KeyError(key) from None
    676         return self.decodevalue(value)
    677 

KeyError: 'JUPYTERHUB_ACTIVITY_URL'

Speichern der Schnittstellenantwort als XML-Datei:¶

Eine Ergebnisdatei "data.xml" wird mit einem Klick auf dem Button in folgendes Verzeichnis gespeichert:

In [19]:

from IPython.core.display import HTML

clicky = make_clickable(url_newer)
display(HTML(clicky))
    
#print(url_newer) 

---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-8fc4f82fea90> in <module>
      1 from IPython.core.display import HTML
      2 
----> 3 clicky = make_clickable(url_newer)
      4 display(HTML(clicky))
      5 

NameError: name 'url_newer' is not defined

In [20]:

button_xml = widgets.Button(description="XML speichern")
output_xml = widgets.Output()

display(button_xml, output_xml)

def on_button_clicked(b):
    with output_xml:
        
        with open('data.xml', 'w', encoding='utf-8') as f:
            print(r1.text, file=f)
    
            
button_xml.on_click(on_button_clicked)

Button(description='XML speichern', style=ButtonStyle())

Output()

Kopieren Sie am besten den Link in ein neuens Browser-Tab. Sie können die Datei daraufhin bei sich speichern, indem Sie die Datei "data.xml" in der linken Navigationsleiste suchen und diese markieren (Häkchen setzen), woraufhin in der oberen Leiste ein Button mit der Beschriftung "Download" erscheint. Klicken Sie diesen an und wählen Sie einen lokalen Speicherort, um die Datei dauerhaft zu sichern. Wenn Sie die Datei nicht herunterladen, steht Sie Ihnen nur temporär für die Dauer Ihrer aktuellen Sitzung zur Verfügung.

Darstellung der Daten in tabellarischer Form:¶

In [34]:

button_df = widgets.Button(description="Ausgabe als Tabelle")
output3 = widgets.Output()

display(button_df, output3)

def on_button_clicked(b):
    global df
    with output3:
        clear_output()
        #für Titeldaten:
        if auswahl.value == "DNB" and meta.value == "oai_dc":
            result = [parse_record_dc(record) for record in records]
            df = pandas.DataFrame(result)
            df1 = (df.style
                         .format({'URN': make_clickable})
                         .set_properties(**{'text-align': 'left'})
                         .set_table_styles([dict(selector = 'th', props=[('text-align', 'left')])]) )    
            display(df1)
        elif auswahl.value == "DNB" and meta.value == "MARC21-xml":
            result2 = [parse_record_marc(item) for item in records_marc]
            df = pandas.DataFrame(result2)
            df1 = (df.style
                         .format({'URN': make_clickable})
                         .set_properties(**{'text-align': 'left'})
                         .set_table_styles([dict(selector = 'th', props=[('text-align', 'left')])]) )       
            display(df1)
        elif auswahl.value == "DNB" and meta.value == "RDFxml":
            result3 = [parse_record_rdf(item) for item in records]
            df = pandas.DataFrame(result3)
            df1 = (df.style
                         .format({'URN': make_clickable})
                         .set_properties(**{'text-align': 'left'})
                         .set_table_styles([dict(selector = 'th', props=[('text-align', 'left')])]) )       
            display(df1)
        
        #für GND:
        elif auswahl.value == "GND" and meta.value == "MARC21-xml":
            result4 = [parse_record_gndm(item) for item in gndm]
            df = pandas.DataFrame(result4)            
            df1 = (df.style
                         .format({'Link': make_clickable})
                         .set_properties(**{'text-align': 'left'})
                         .set_table_styles([dict(selector = 'th', props=[('text-align', 'left')])]) )                   
            display(df1)
        elif auswahl.value == "GND" and meta.value == "oai_dc":
            result5 = [parse_record_gndoai(item) for item in records]
            df = pandas.DataFrame(result5)
            print('Bitte beachten Sie, dass sich das Format "DNB Casual (oai_dc)" nur bedingt für GND-Datensätze eignet.')
            print('Für eine Darstellung mit mehr Informationen wählen Sie bitte das Format "MARC21-xml".')
            display(df)
        elif auswahl.value == "GND" and meta.value == "RDFxml":
            result6 = [parse_record_gndrdf(item) for item in records]
            df = pandas.DataFrame(result6)
            df1 = (df.style
                         .format({'LINK': make_clickable})
                         .set_properties(**{'text-align': 'left'})
                         .set_table_styles([dict(selector = 'th', props=[('text-align', 'left')])]) )       
            display(df1)
        
        #für DMA:
        elif auswahl.value == "DMA" and meta.value == "MARC21-xml":
            result7 = [parse_record_dmamarc(item) for item in records_marc]
            df = pandas.DataFrame(result7)            
            df1 = (df.style
                         .format({'URN': make_clickable})
                         .set_properties(**{'text-align': 'left'})
                         .set_table_styles([dict(selector = 'th', props=[('text-align', 'left')])]) )                   
            display(df1)
        elif auswahl.value == "DMA" and meta.value == "oai_dc":
            result8 = [parse_record_dmadc(record) for record in records]
            df = pandas.DataFrame(result8)
            df1 = (df.style
                         .format({'URN': make_clickable})
                         .set_properties(**{'text-align': 'left'})
                         .set_table_styles([dict(selector = 'th', props=[('text-align', 'left')])]) ) 
            display(df1)
        elif auswahl.value == "DMA" and meta.value == "RDFxml":
            result9 = [parse_record_dmardf(record) for record in records]
            df = pandas.DataFrame(result9)
            df1 = (df.style
                         .format({'URN': make_clickable})
                         .set_properties(**{'text-align': 'left'})
                         .set_table_styles([dict(selector = 'th', props=[('text-align', 'left')])]) ) 
            display(df1)
        else:
            print("ERROR")
            

    
button_df.on_click(on_button_clicked)

Button(description='Ausgabe als Tabelle', style=ButtonStyle())

Output()

Tabelle in .csv-Datei überführen:¶

Sie finden die hier erstellte Datei im selben Verzeichnis wie oben bereits angegeben - die hier erzeugte Datei erscheint als "Tabelle.csv".

In [17]:

button_csv = widgets.Button(description="Als CSV speichern")
output4 = widgets.Output()

display(button_csv, output4)

def on_button_clicked(b):
    with output4:
        df.to_csv("Tabelle.csv")
            
button_csv.on_click(on_button_clicked)

Button(description='Als CSV speichern', style=ButtonStyle())

Output()

In [ ]: