import sys, os
# Disable print
def blockPrint():
sys.stdout = open(os.devnull, 'w')
# Restore print
def enablePrint():
sys.stdout = sys.__stdout__
blockPrint()
!python -m spacy download es_core_news_sm
enablePrint()
import pickle
import re
import numpy as np
import warnings
warnings.filterwarnings("ignore")
from spacy.lang.es.stop_words import STOP_WORDS
import es_core_news_sm
import spacy
nlp = spacy.load('es_core_news_sm')
import pandas as pd
from ipywidgets import widgets, interactive, Layout
from IPython.display import clear_output
ejemplos = pd.read_csv('data/ejemplos.csv')
with open('src/Final_Models/W2Vmodel.pickle', 'rb') as f:
W2V = pickle.load(f)
with open('src/Final_Models/ML_model_est_Ao_SVM.pickle', 'rb') as f:
model_est_Ao = pickle.load(f)
with open('src/Final_Models/ML_model_est_Mv_SVM.pickle', 'rb') as f:
model_est_Mv = pickle.load(f)
with open('src/Final_Models/ML_model_insf_Ao_SVM.pickle', 'rb') as f:
model_insf_Ao = pickle.load(f)
with open('src/Final_Models/ML_model_insf_Mv_SVM.pickle', 'rb') as f:
model_insf_Mv = pickle.load(f)
with open('src/Final_Models/ML_model_prot_Ao_SVM.pickle', 'rb') as f:
model_prot_Ao = pickle.load(f)
with open('src/Final_Models/ML_model_prot_Mv_SVM.pickle', 'rb') as f:
model_prot_Mv = pickle.load(f)
def multiple_replace(text):
dic = {r"cia a" : "ciaa","cia m" : "ciam","sis a" : "sisa", "sis m" : "sism",
'á' : 'a', 'é' : 'e', 'í' : 'i', 'ó' : 'o', 'ú' : 'u' }
# Create a regular expression from the dictionary keys
regex = re.compile("(%s)" % "|".join(map(re.escape, dic.keys())))
# For each match, look-up corresponding value in dictionary
return regex.sub(lambda mo: dic[mo.string[mo.start():mo.end()]], text.lower())
def clean_text(text):
# Create a regular expression from the dictionary keys
if isinstance(text, str):
dic = { r'[^\w.]' : ' ', '[ 0-9 ]' : ''}
regex = re.compile(r'(%s)' % "|".join(dic.keys()))
lst = regex.sub(lambda mo: dic[[ k for k in dic if re.search(k, mo.string[mo.start():mo.end()])][0]], text).lower()
else:
lst = ' '
return [el for el in multiple_replace(lst).split('.') if re.search("[a-z]", el)]
def tokenize_and_lemm_spacy(text):
doc = nlp(text)
lemmas = []
for token in doc:
if token.is_stop== False: # aprovechamos para eliminar ya las stopwords
if token.is_alpha== True: # Nos quedamos solo con los tokens que contienen letras
if token.pos_ not in ['CONJ', 'ADP', 'DET']: # eliminamos nombres propios, conjunciones, determinantes
lemmas.append(token.lemma_.lower())
return lemmas
def tokenize_and_lemm_spacy_lst(lst):
return [tokenize_and_lemm_spacy(el) for el in lst]
def get_w2v_features(w2v_model, sentence_group):
""" Transform a sentence_group (containing multiple lists
of words) into a feature vector. It averages out all the
word vectors of the sentence_group.
"""
words = np.concatenate(sentence_group) # words in text
index2word_set = set(w2v_model.wv.vocab.keys()) # words known to model
featureVec = np.zeros(w2v_model.vector_size, dtype="float32")
# Initialize a counter for number of words in a review
nwords = 0
# Loop over each word in the comment and, if it is in the model's vocabulary, add its feature vector to the total
for word in words:
if word in index2word_set:
featureVec = np.add(featureVec, w2v_model[word])
nwords += 1.
# Divide the result by the number of words to get the average
if nwords > 0:
featureVec = np.divide(featureVec, nwords)
return featureVec
def get_resp(X, model, cut_off, valv):
prob = model.predict_proba(X)
if 'Nativa' in valv:
if prob[:,1] > cut_off[valv]:
respuesta = 'No'
else:
respuesta = 'Sí'
else:
if prob[:,1] > cut_off[valv]:
respuesta = 'Sí'
else:
respuesta = 'No'
return respuesta
def get_dropdown(resp, valv):
dropdown = widgets.Dropdown(
options=['Sí', 'No'],
value = resp,
layout= Layout(width='90%')
)
return dropdown
cut_off = {'Válvula Aórtica Nativa' : 0.7, 'Válvula Mitral Nativa' : 0.7,
'Estenosis Aórtica' : 0.9, 'Estenosis Mitral' : 0.7,
'Insuficiencia Aórtica' : 0.7, 'Insuficiencia Mitral' : 0.7}
models = {'Válvula Aórtica Nativa' : model_prot_Ao, 'Válvula Mitral Nativa' : model_prot_Mv,
'Estenosis Aórtica' : model_est_Ao, 'Estenosis Mitral' : model_est_Mv,
'Insuficiencia Aórtica' : model_insf_Ao, 'Insuficiencia Mitral' : model_insf_Mv}
dropdowns = {}
def callback(w):
with output:
clear_output()
try:
txt = tokenize_and_lemm_spacy_lst(clean_text(conclusion.value))
w2v_features = list(map(lambda sen_group: get_w2v_features(W2V, sen_group), [txt]))
X_w2v = np.array(list(map(np.array, w2v_features)))
for valv in cut_off.keys():
dropdowns[valv] = get_dropdown(get_resp(X_w2v, models[valv],cut_off, valv),valv)
cols = [(valv, [dropdowns[valv]]) for i, valv in enumerate(dropdowns.keys())]
vboxes = []
for valv, data in cols:
vboxes.append(widgets.VBox([widgets.HTML('<b>%s</b>' % valv)] + data, layout= Layout(width='200px')))
display(widgets.HBox(vboxes, layout=widgets.Layout(width='70%')), widgets.Button(description='Commit'))
except:
pass
def callback_ej(w):
conclusion.value = ejemplos.sample(n=1).values[0][0]
conclusion = widgets.Textarea(description='', placeholder='Type something',
continuous_update=True, disabled=False,
layout= Layout(width='80%', height='140px'))
tab2a = widgets.VBox(children=[conclusion])
ejemplo = widgets.Button(description='Ejemplo')
ejemplo.on_click(callback_ej)
submit = widgets.Button(description='Submit')
submit.on_click(callback)
output = widgets.Output()
gui = widgets.VBox(children=[ejemplo,tab2a, submit, output])
gui
VBox(children=(Button(description='Ejemplo', style=ButtonStyle()), VBox(children=(Textarea(value='', layout=La…