#!/usr/bin/env python # coding: utf-8 # # Using LX-Syllabifier to syllabify all words in a text # This is an example notebook that illustrates how you can use the LX-Syllabifier web service to # analyse a text. # # **Before you run this example**, replace `access_key_goes_here` by your webservice access key, below: # In[1]: LXSLLABIFIER_WS_API_KEY = 'access_key_goes_here' LXSLLABIFIER_WS_API_URL = 'https://portulanclarin.net/workbench/lx-syllabifier/api/' # ## Importing required Python modules # The next cell will take care of installing the `requests` package, # if not already installed, and make it available to use in this notebook. # In[2]: try: import requests except: get_ipython().system('pip3 install requests') import requests from IPython.display import HTML, display_html # ## Wrapping the complexities of the JSON-RPC API in a simple, easy to use function # # The `WSException` class defined below, will be used later to identify errors # from the webservice. # In[3]: class WSException(Exception): 'Webservice Exception' def __init__(self, errordata): "errordata is a dict returned by the webservice with details about the error" super().__init__(self) assert isinstance(errordata, dict) self.message = errordata["message"] # see https://json-rpc.readthedocs.io/en/latest/exceptions.html for more info # about JSON-RPC error codes if -32099 <= errordata["code"] <= -32000: # Server Error if errordata["data"]["type"] == "WebServiceException": self.message += f": {errordata['data']['message']}" else: self.message += f": {errordata['data']!r}" def __str__(self): return self.message # The next function invoques the LX-Suite webservice through it's public JSON-RPC API. # In[4]: def syllabify(text): ''' Arguments text: a string with a maximum of 10000 characters, Portuguese text, with the input to be processed Returns a string or JSON object with the output according to specification in https://portulanclarin.net/workbench/lx-syllabifier/ Raises a WSException if an error occurs. ''' request_data = { 'method': 'syllabify', 'jsonrpc': '2.0', 'id': 0, 'params': { 'text': text, 'key': LXSLLABIFIER_WS_API_KEY, }, } request = requests.post(LXSLLABIFIER_WS_API_URL, json=request_data) response_data = request.json() if "error" in response_data: raise WSException(response_data["error"]) else: return response_data["result"] # The next function will count the number of syllables in a given string (already processed by LX-Syllabifier): # In[5]: def count_syllables(s): # this is a naive tokenization based on whitespace, but in principle it poses no problem # because punctuation will be attached to the previous token and that will not change the # number of syllables return sum(len(token.split("|")) for token in s.split(" ")) # Here are a few stanzas from Luís de Camões' work "Os Lusíadas" that we will use in our experiment: # In[6]: stanzas = [""" As armas e os barões assinalados, Que da ocidental praia Lusitana, Por mares nunca de antes navegados, Passaram ainda além da Taprobana, Em perigos e guerras esforçados, Mais do que prometia a força humana, E entre gente remota edificaram Novo Reino, que tanto sublimaram; """,""" E também as memórias gloriosas Daqueles Reis, que foram dilatando A Fé, o Império, e as terras viciosas De África e de Ásia andaram devastando; E aqueles, que por obras valerosas Se vão da lei da morte libertando; Cantando espalharei por toda parte, Se a tanto me ajudar o engenho e arte. """,""" Cessem do sábio Grego e do Troiano As navegações grandes que fizeram; Cale-se de Alexandro e de Trajano A fama das vitórias que tiveram; Que eu canto o peito ilustre Lusitano, A quem Neptuno e Marte obedeceram: Cesse tudo o que a Musa antígua canta, Que outro valor mais alto se alevanta. """,""" E vós, Tágides minhas, pois criado Tendes em mim um novo engenho ardente, Se sempre em verso humilde celebrado Foi de mim vosso rio alegremente, Dai-me agora um som alto e sublimado, Um estilo grandíloquo e corrente, Porque de vossas águas, Febo ordene Que não tenham inveja às de Hipoerene. """] # Next, we will use the functions we defined above for syllabifying an excerpt from Luís de Camões' work "Os Lusíadas", and to count the number of syllables in each line: # In[7]: for stanza in stanzas: html = ['
'] syllabified = syllabify(stanza) html.extend([ f'
{count_syllables(verse)}
{verse.replace("|", "·")}
' for verse in syllabified.strip().splitlines() ]) html.append('
') display_html(HTML("".join(html))) display_html(HTML("""""")) # ## Getting the status of a webservice access key # In[8]: def get_key_status(): '''Returns a string with the detailed status of the webservice access key''' request_data = { 'method': 'key_status', 'jsonrpc': '2.0', 'id': 0, 'params': { 'key': LXSLLABIFIER_WS_API_KEY, }, } request = requests.post(LXSLLABIFIER_WS_API_URL, json=request_data) response_data = request.json() if "error" in response_data: raise WSException(response_data["error"]) else: return response_data["result"] # In[9]: get_key_status()