#!/usr/bin/env python # coding: utf-8 # # Getting started # # It is assumed that you have read # [start](start.ipynb) # and followed the installation instructions there. # # Corpus # # This is # # * `oldbabylonian` Old Babylonian Letters # # First acquaintance # # We just want to grasp what the corpus is about and how we can find our way in the data. # # Open a terminal or command prompt and say one of the following # # ```text-fabric oldbabylonian``` # # Wait and see a lot happening before your browser starts up and shows you an interface on the corpus: # Text-Fabric needs an app to deal with the corpus-specific things. # It downloads/finds/caches the latest version of the **app**: # # ``` # Using TF-app in /Users/dirk/text-fabric-data/annotation/app-oldbabylonian/code: # rv0.2=#4bb2530bfb94dc93601f8b3df7722cb0e5df7a43 (latest release) # ``` # # It downloads/finds/caches the latest version of the **data**: # # ``` # Using data in /Users/dirk/text-fabric-data/Nino-cunei/oldbabylonian/tf/1.0.4: # rv1.4=#43c36d148794e3feeb3dd39e105ce6a4df79c467 (latest release) # ``` # # The data is preprocessed in order to speed up typical Text-Fabric operations. # The result is cached on your computer. # Preprocessing costs time. Next time you use this corpus on this machine, the startup time is much quicker. # # ``` # TF setup done. # ``` # # Then the app goes on to act as a local webserver serving the corpus that has just been downloaded # and it will open your browser for you and load the corpus page # # ``` # * Running on http://localhost:8106/ (Press CTRL+C to quit) # Opening oldbabylonian in browser # Listening at port 18986 # ``` # # # Help! # # Indeed, that is what you need. Click the vertical `Help` tab. # # From there, click around a little bit. Don't read closely, just note the kinds of information that is presented to you. # # Later on, it will make more sense! # # Browsing # # First we browse our data. Click the browse button. # # # # and then, in the table of *documents* (tablets), click on `obverse` # # # # Now you're looking at one side of tablet: the marks in an ASCII transcription. # # # Now click the *Options* tab and select the `layout-orig-unicode` format to see the same tablet in cuneiform signs. # # # You can click a triangle to see how a line is broken down: # # # # Searching # # See that line, starting with the word `um-ma`, and whose last word ends in the sign `ma`? # # That is a pattern. Let's search for it. # # Enter this query in the search pad and press the search icon above it. # # ``` # line # =: word # =: sign reading=um # <: sign reading=ma # := # < sign reading=ma # := # ``` # # # In English: # # search all `line`s that contain a `word` and a `sign` where: # # * `=:` the `word` starts where the `line` starts # * the `word` contains a `sign` and a `sign` where: # * `=:` the first `sign` starts where the `word` starts # * `<:` the second sign follows the first sign immediately # * `:=` the second sign ends where the word ends # * `<` the `sign` comes after the word # * `:=` the `sign` ends where the line ends # # # You can expand results by clicking the triangle. # # You can see the result in context by clicking the browse icon. # # You can go back to the result list by clicking the results icon. # # # # Computing # # We see that this line comes at the start of a tablet. # # In fact, this pattern corresponds to a heading of a letter. # # Question: of all 1274 results, how many are the first line, the second line, the third line, etc? # # *This is a typical question where you want to leave the search mode and enter computing mode*. # # Let's do that! # # If you have followed the installation instructions, you are nearly set. # # Open your terminal and say # # ``` sh # jupyter notebook # ``` # # Your browser starts up and presents you a local computing environment where you can run Python programs. # # You see cells like the one below, where you can type programming statements and execute them by pressing `Shift Enter`. # In[ ]: # First we load the Text-Fabric module, as follows: # In[1]: from tf.app import use # Now we load the TF-app for the corpus `oldbabylonian` and that app loads the corpus data. # # We give a name to the result of all that loading: `A`. # In[3]: A = use('Nino-cunei/oldbabylonian', hoist=globals()) # Some bits are familiar from above, when you ran the `text-fabric` command in the terminal. # # Other bits are links to the documentation, they point to the same places as the links on the Text-Fabric browser. # # You see a list of all the data features that have been loaded. # # And a list of references to the API documentation, which tells you how you can use this data in your program statements. # # Searching (revisited) # # We do the same search again, but now inside our program. # # That means that we can capture the results in a list for further processing. # In[4]: results = A.search(''' line =: word =: sign reading=um <: sign reading=ma := < sign reading=ma := ''') # In less than a second, we have all the results! # # Let's look at the first one: # In[5]: results[0] # Each result is a list of numbers: for a # # 1. line # 1. word # 1. sign # 1. sign # 1. sign # # # Here is the second one: # In[6]: results[1] # And here the last one: # In[7]: results[-1] # Now we want to find out something for each result line: which line number does it have among the lines on the same tablet face? # # Click the link `Feature docs` above, and read a bit under **Node type line**. # # There you see that the feature `ln` is of particular interest to us. # # First we get the line number of result 1000: # In[8]: node = results[999][0] print(node) lineNumber = F.ln.v(node) print(lineNumber) # Now we collect the set of all line numbers that our result lines have: # In[9]: {F.ln.v(result[0]) for result in results} # What we really want to know is how the result lines are distributed over the line numbers. # In[10]: import collections # In[11]: distribution = collections.Counter() for result in results: lineNumber = F.ln.v(result[0]) distribution[lineNumber] += 1 print(distribution) # An overwhelming majority has it on line 3 # # Let's make the output a bit more friendly: # In[12]: for (lineNumber, amount) in sorted(distribution.items()): print(f'line {lineNumber:>2} is home to {amount:>3} results') # We can now inspect more closely what is going on, for example where results appear late in the tablet, after line 16: # In[13]: results16 = A.search(''' line ln>16 =: word =: sign reading=um <: sign reading=ma := < sign reading=ma := ''') # And we can show them here too: # In[14]: A.table(results16) # But at this point it might be easier to take the new query back to the Text-Fabric browser and query it there: # # # In[ ]: