Notebook

In [1]:

import sys, os

# Disable print
def blockPrint():
    sys.stdout = open(os.devnull, 'w')
    
# Restore print
def enablePrint():
    sys.stdout = sys.__stdout__

blockPrint()

!python -m spacy download es_core_news_sm

enablePrint()

import pickle
import re
import numpy as np
import warnings
warnings.filterwarnings("ignore")
from spacy.lang.es.stop_words import STOP_WORDS
import es_core_news_sm
import spacy
nlp = spacy.load('es_core_news_sm')
import pandas as pd
from ipywidgets import widgets, interactive, Layout
from IPython.display import clear_output

ejemplos = pd.read_csv('data/ejemplos.csv')

with open('src/Final_Models/W2Vmodel.pickle', 'rb') as f:
    W2V = pickle.load(f)
with open('src/Final_Models/ML_model_est_Ao_SVM.pickle', 'rb') as f:
    model_est_Ao = pickle.load(f)
with open('src/Final_Models/ML_model_est_Mv_SVM.pickle', 'rb') as f:
    model_est_Mv = pickle.load(f)
with open('src/Final_Models/ML_model_insf_Ao_SVM.pickle', 'rb') as f:
    model_insf_Ao = pickle.load(f)
with open('src/Final_Models/ML_model_insf_Mv_SVM.pickle', 'rb') as f:
    model_insf_Mv = pickle.load(f)
with open('src/Final_Models/ML_model_prot_Ao_SVM.pickle', 'rb') as f:
    model_prot_Ao = pickle.load(f)
with open('src/Final_Models/ML_model_prot_Mv_SVM.pickle', 'rb') as f:
    model_prot_Mv = pickle.load(f)

In [2]:

def multiple_replace(text):
    dic = {r"cia a" : "ciaa","cia m" : "ciam","sis a" : "sisa", "sis m" : "sism",
           'á' : 'a', 'é' : 'e', 'í' : 'i', 'ó' : 'o', 'ú' : 'u' }
    # Create a regular expression  from the dictionary keys
    regex = re.compile("(%s)" % "|".join(map(re.escape, dic.keys())))
    # For each match, look-up corresponding value in dictionary
    return regex.sub(lambda mo: dic[mo.string[mo.start():mo.end()]], text.lower())

def clean_text(text):
    # Create a regular expression  from the dictionary keys
    if isinstance(text, str):
        dic = { r'[^\w.]' : ' ', '[ 0-9 ]' : ''}
        regex = re.compile(r'(%s)' % "|".join(dic.keys()))
        lst = regex.sub(lambda mo: dic[[ k for k in dic if re.search(k, mo.string[mo.start():mo.end()])][0]], text).lower()
    else:
        lst = ' '
    return [el for el in multiple_replace(lst).split('.') if re.search("[a-z]", el)]

def tokenize_and_lemm_spacy(text):
    doc = nlp(text)
    lemmas = []
    for token in doc:
        if token.is_stop== False: # aprovechamos para eliminar ya las stopwords
            if token.is_alpha== True: # Nos quedamos solo con los tokens que contienen letras 
                if token.pos_ not in ['CONJ', 'ADP', 'DET']: # eliminamos nombres propios, conjunciones, determinantes
                    lemmas.append(token.lemma_.lower())
    return lemmas

def tokenize_and_lemm_spacy_lst(lst):
    return [tokenize_and_lemm_spacy(el) for el in lst]

def get_w2v_features(w2v_model, sentence_group):
    """ Transform a sentence_group (containing multiple lists
    of words) into a feature vector. It averages out all the
    word vectors of the sentence_group.
    """
    words = np.concatenate(sentence_group)  # words in text
    index2word_set = set(w2v_model.wv.vocab.keys())  # words known to model
    
    featureVec = np.zeros(w2v_model.vector_size, dtype="float32")
    
    # Initialize a counter for number of words in a review
    nwords = 0
    # Loop over each word in the comment and, if it is in the model's vocabulary, add its feature vector to the total
    for word in words:
        if word in index2word_set: 
            featureVec = np.add(featureVec, w2v_model[word])
            nwords += 1.

    # Divide the result by the number of words to get the average
    if nwords > 0:
        featureVec = np.divide(featureVec, nwords)
    return featureVec

In [3]:

def get_resp(X, model, cut_off, valv):
    prob = model.predict_proba(X)
    if 'Nativa' in valv:
        if prob[:,1] > cut_off[valv]:
            respuesta = 'No'
        else:
            respuesta = 'Sí'
    else:
        if prob[:,1] > cut_off[valv]:
            respuesta = 'Sí'
        else:
            respuesta = 'No'
    return respuesta

def get_dropdown(resp, valv):
    dropdown = widgets.Dropdown(
        options=['Sí', 'No'],
        value = resp,
        layout= Layout(width='90%')
    )
    return dropdown

cut_off = {'Válvula Aórtica Nativa' : 0.7, 'Válvula Mitral Nativa' : 0.7, 
           'Estenosis Aórtica' : 0.9, 'Estenosis Mitral' : 0.7, 
           'Insuficiencia Aórtica' : 0.7, 'Insuficiencia Mitral' : 0.7}

models = {'Válvula Aórtica Nativa' : model_prot_Ao, 'Válvula Mitral Nativa' : model_prot_Mv, 
          'Estenosis Aórtica' : model_est_Ao, 'Estenosis Mitral' : model_est_Mv, 
          'Insuficiencia Aórtica' : model_insf_Ao, 'Insuficiencia Mitral' : model_insf_Mv}

dropdowns = {}

In [4]:

def callback(w):
    with output:
        clear_output()
        try:
            txt = tokenize_and_lemm_spacy_lst(clean_text(conclusion.value))
            w2v_features = list(map(lambda sen_group: get_w2v_features(W2V, sen_group), [txt]))
            X_w2v = np.array(list(map(np.array, w2v_features)))
            for valv in cut_off.keys():
                dropdowns[valv] = get_dropdown(get_resp(X_w2v, models[valv],cut_off, valv),valv)
            
            cols = [(valv, [dropdowns[valv]]) for i, valv in enumerate(dropdowns.keys())]
        
            vboxes = []
            for valv, data in cols:
                vboxes.append(widgets.VBox([widgets.HTML('<b>%s</b>' % valv)] + data, layout= Layout(width='200px')))
            
            display(widgets.HBox(vboxes, layout=widgets.Layout(width='70%')), widgets.Button(description='Commit'))
        except:
            pass

In [5]:

def callback_ej(w):
    conclusion.value = ejemplos.sample(n=1).values[0][0]

App (Demo)¶

Conclusión Estudio Ecocardiográfico¶

El botón ejemplo genera ejemplos aleatorios de reportes ecocardiográficos.

In [6]:

conclusion = widgets.Textarea(description='', placeholder='Type something',
                               continuous_update=True, disabled=False,
                               layout= Layout(width='80%', height='140px'))

tab2a = widgets.VBox(children=[conclusion])
ejemplo = widgets.Button(description='Ejemplo')
ejemplo.on_click(callback_ej)
submit = widgets.Button(description='Submit')
submit.on_click(callback)

output = widgets.Output()
gui = widgets.VBox(children=[ejemplo,tab2a, submit, output])
gui

VBox(children=(Button(description='Ejemplo', style=ButtonStyle()), VBox(children=(Textarea(value='', layout=La…

In [ ]: