#!/usr/bin/env python # coding: utf-8 # # # # # --- # # To get started: consult [start](start.ipynb) # # --- # # # Search Introduction # # *Search* in Text-Fabric is a template based way of looking for structural patterns in your dataset. # # Within Text-Fabric we have the unique possibility to combine the ease of formulating search templates for # complicated syntactical patterns with the power of programmatically processing the results. # # This notebook will show you how to get up and running. # # ## Easy command # # Search is as simple as saying (just an example) # # ```python # results = A.search(template) # A.show(results) # ``` # # See all ins and outs in the # [search template docs](https://annotation.github.io/text-fabric/tf/about/searchusage.html). # # Incantation # # The ins and outs of installing Text-Fabric, getting the corpus, and initializing a notebook are # explained in the [start tutorial](start.ipynb). # In[1]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # In[2]: from tf.app import use # In[3]: A = use("clariah/wp6-missieven", hoist=globals()) # # Basic search command # # We start with the most simple form of issuing a query. # Let's look for the words in volume 4, page 235, line 17 # # All work involved in searching takes place under the hood. # In[5]: query = """ volume n=4 page n=239 line n<9 word """ results = A.search(query) A.table(results, skipCols="1 2 3") # The hyperlinks take us to the online image of this page at the Huygens institute. # # Note that we can choose start and/or end points in the results list. # In[6]: A.table(results, start=44, end=53, skipCols="1 2") # We can show the results more fully with `show()`. # In[7]: A.show(results, skipCols="1 2 3", condensed=True, condenseType="line") # Now we pick all numerical words, or rather, words that contain a digit # In[8]: query = """ volume n=4 page n=239 line n<9 word trans~[0-9] """ results = A.search(query) A.show(results, skipCols="1 2 3", condensed=True) # Lets look for all places where there is a remark by the editor: # In[9]: query = """ word isremark """ results = A.search(query) # We can narrow down to the page we just inspected: # In[10]: query = """ volume n=4 page n=239 word isremark """ results = A.search(query) # and show the results: # In[11]: A.show(results, condensed=True) # # Special characters # # How can we look for special characters? # # Let's first see what special characters we have in the corpus. # In[6]: A = use("clariah/wp6-missieven:clone", hoist=globals()) # In[12]: A.specialCharacters() # If you click on a character it is copied to the clipboard. # # We can search for all words with a black square: # In[7]: results = A.search(""" word trans~■ """) # In[8]: A.table(results, condensed=True) # --- # # # Contents # # * **[start](start.ipynb)** start computing with this corpus # * **search** turbo charge your hand-coding with search templates # * **[compute](compute.ipynb)** sink down a level and compute it yourself # * **[exportExcel](exportExcel.ipynb)** make tailor-made spreadsheets out of your results # * **[annotate](annotate.ipynb)** export text, annotate with BRAT, import annotations # * **[share](share.ipynb)** draw in other people's data and let them use yours # * **[entities](entities.ipynb)** use results of third-party NER (named entity recognition) # * **[porting](porting.ipynb)** port features made against an older version to a newer version # * **[volumes](volumes.ipynb)** work with selected volumes only # # CC-BY Dirk Roorda