#!/usr/bin/env python # coding: utf-8 # # [Doc4TF/tools/versionMapping](https://github.com/tonyjurg/Doc4TF/tools/versionMapping.ipynb) # #### *Mapping nodes changes between two Text-Fabric datasets* # # Version: 0.1 (May. 13, 2024). # ## Table of content # * 1 - Introduction # * 2 - Setting up the environment # * 3 - Load Text-Fabric data # * 4 - Creation of the dataset # * 4.1 - Setting up some global variables # * 4.2 - Store all relevant data into a dictionary # * 5 - Create the documentation pages # * 5.1 - Create the set of feature pages # * 5.2 - Create the index pages # * 6 - Licence # # 1 - Introduction # ##### [Back to TOC](#TOC) # # This notebook is utilizing the module [tf.dataset.nodemaps](https://annotation.github.io/text-fabric/tf/dataset/nodemaps.html). See also the description provided with the module. # # 2. Setting up the environment # ##### [Back to TOC](#TOC) # Your environment should (for obvious reasons) include the Python package `Text-Fabric`. If not installed yet, it can be installed using `pip`. Further it is required to be able to invoke the Text-Fabric data sets (either from an online resource, or from a localy stored copy). # # 3 - Load Text-Fabric data # ##### [Back to TOC](#TOC) # See also notebook [map.ipynb](https://nbviewer.org/github/clariah/wp6-missieven/blob/master/programs/map.ipynb). # # See [dataset.Versions](https://annotation.github.io/text-fabric/tf/dataset/nodemaps.html#tf.dataset.nodemaps.Versions) in the Text-Fabric documentation. # In[7]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') # In[25]: # Loading the Text-Fabric code and module versions from tf.fabric import Fabric from tf.dataset import Versions from tf.app import use va = "0.5.6" # vb = "0.5.7" # for v in (va, vb): TF[v] = Fabric(locations=TF_DIR, modules=v) api[v] = TF[v].load(features[v]) # In[11]: # also required: module marimo get_ipython().system('pip install marimo') # In[68]: # Load the app and data from the first version in the set for comparison A1 = use ("saulocantanhede/tfgreek2", version="0.5.7") # In[70]: # Load the app and data from the second version in the set for comparison A2 = use ("saulocantanhede/tfgreek2", version="0.5.6") # In[48]: A2.zipAll() # In[9]: from tf.advanced.helpers import dm from tf.advanced.repo import checkoutRepo # In[56]: def do(task): md = f""" commit | release | local | base | subdir --- | --- | --- | --- | --- `{task[0]}` | `{task[1]}` | `{task[2]}` | `{task[3]}` | `{task[4]}` """ dm(md) # In[58]: do(checkoutRepo(backend='github', org="saulocantanhede", repo="tfgreek2", folder="tf", version="0.5.6", checkout="")) # In[62]: do(checkoutRepo(backend='github', org="saulocantanhede", repo="tfgreek2", folder="tf", version="0.5.7", checkout="")) # # 4 - Creation of the dataset # ## 4.1 - Setting up some global variables # ##### [Back to TOC](#TOC) # In[4]: # The version number of the script scriptVersion="0.1" scriptDate="May. 12, 2024" # ## 4.2 - Store all relevant data into a dictionary # ##### [Back to TOC](#TOC) # The following will create a dictionary containing all relevant information for the loaded node and edge features. # In[5]: # Initialize an empty dictionary to store feature data featureDict = {} import time overallTime = time.time() def getFeatureDescription(metaData): """ This function looks for the 'description' key in the metadata dictionary. If the key is found, it returns the corresponding description. If the key is not present, it returns a default message indicating that no description is available. Parameters: metaData (dict): A dictionary containing metadata about a feature. Returns: str: The description of the feature if available, otherwise a default message. """ return metaData.get('description', "No feature description") def setDataType(metaData): """ This function checks for the 'valueType' key in the metadata. If the key is present, it returns 'String' if the value is 'str', and 'Integer' for other types. If the 'valueType' key is not present, it returns 'Unknown'. Parameters: metaData (dict): A dictionary containing metadata, including the 'valueType' of a feature. Returns: str: A string indicating the determined data type ('String', 'Integer', or 'Unknown'). """ if 'valueType' in metaData: return "String" if metaData["valueType"] == 'str' else "Integer" return "Unknown" def processFeature(feature, featureType, featureMethod): """ Processes a given feature by extracting metadata, description, and data type, and then compiles frequency data for different node types in a feature dictionary. Certain features are skipped based on their type. The processed data is added to a global feature dictionary. Parameters: feature (str): The name of the feature to be processed. featureType (str): The type of the feature ('Node' or 'Edge'). featureMethod (function): A function to obtain feature data. Returns: None: The function updates a global dictionary with processed feature data and does not return anything. """ # Obtain the meta data featureMetaData = featureMethod(feature).meta featureDescription = getFeatureDescription(featureMetaData) dataType = setDataType(featureMetaData) # Initialize dictionary to store feature frequency data featureFrequencyDict = {} # Skip for specific features based on type if not (featureType == 'Node' and feature == 'otype') and not (featureType == 'Edge' and feature == 'oslots'): for nodeType in F.otype.all: frequencyLists = featureMethod(feature).freqList(nodeType) if not isinstance(frequencyLists, int): if len(frequencyLists)!=0: featureFrequencyDict[nodeType] = {'nodetype': nodeType, 'freq': frequencyLists[:tableLimit]} elif isinstance(frequencyLists, int): if frequencyLists != 0: featureFrequencyDict[nodeType] = {'nodetype': nodeType, 'freq': [("Link", frequencyLists)]} # Add processed feature data to the main dictionary featureDict[feature] = {'name': feature, 'descr': featureDescription, 'type': featureType, 'datatype': dataType, 'freqlist': featureFrequencyDict} ######################################################## # MAIN FUNCTION # ######################################################## ######################################################## # Gather general information # ######################################################## print('Gathering generic details') # Initialize default values corpusName = A.appName liveName = '' versionName = A.version # Trying to locate corpus information if A.provenance: for parts in A.provenance[0]: if isinstance(parts, tuple): key, value = parts[0], parts[1] if verbose: print (f'General info: {key}={value}') if key == 'corpus': corpusName = value if key == 'version': versionName = value # value for live is a tuple if key == 'live': liveName=value[1] if liveName is not None and len(liveName)>1: # an URL was found pageTitleMD = f'Doc4TF pages for [{corpusName}]({liveName}) (version {versionName})' pageTitleHTML = f'

Doc4TF pages for {corpusName} (version {versionName})

' else: # No URL found pageTitleMD = f'Doc4TF pages for {corpusName} (version {versionName})' pageTitleHTML = f'

Doc4TF pages for {corpusName} (version {versionName})

' # Overwrite in case user provided a title if 'customPageTitleMD_' in globals(): pageTitleMD = customPageTitleMD if 'customPageTitleHTML' in globals(): pageTitleMD = customPageTitleHTML ######################################################## # Processing node features # ######################################################## print('Analyzing Node Features: ', end='') for nodeFeature in Fall(): if not verbose: print('.', end='') # Progress indicator processFeature(nodeFeature, 'Node', Fs) if verbose: print(f'\nFeature {nodeFeature} = {featureDict[nodeFeature]}\n') # Print feature data if verbose ######################################################## # Processing edge features # ######################################################## print('\nAnalyzing Edge Features: ', end='') for edgeFeature in Eall(): if not verbose: print('.', end='') # Progress indicator processFeature(edgeFeature, 'Edge', Es) if verbose: print(f'\nFeature {edgeFeature} = {featureDict[edgeFeature]}\n') # Print feature data if verbose print(f'\nFinished in {time.time() - overallTime:.2f} seconds.') # # 6 - License # ##### [Back to TOC](#TOC) # Licenced under [Creative Commons Attribution 4.0 International (CC BY 4.0)](https://github.com/tonyjurg/Doc4TF/blob/main/LICENCE.md)