#!/usr/bin/env python # coding: utf-8 # # Free Text Marking Helper # # One of the issues faced by script markers marking free text examples against a canned answer. # # The following recipe provides a quick and dirty similarity check between a provvded answer and a specimen answer using a small `spacy` natural language model, `en_core_web_sm`. # # Performance of the similarity matcher may be improved by using a larger model (eg the medium size `en_core_web_md` model or the full `en_core_web_lg` model), but these are rather slower to downlad, install and run. # # In an *nbgrader* context, we can do something like the following in a question cell defined as part of a manually graded test: # # # # ```python # # Add your commentary here # # #For now, the specimen answer needs to be assigned to a specific variable # #It would make more sense to define an %%assignment_text block magic that lets you write some # # text in a code cell and then let the magic assign this to a variable that can then be automatically tested. # # answer_txt = ''' # YOUR ANSWER TEXT # ''' # ``` # # and then in the associated test cell assign the specimen answer to `___specimen_txt`: # # ```python # # ### BEGIN HIDDEN TESTS # ___specimen_txt = ''' # As much English wine's consumed as each of Bordeaux red and Burgundy white. # The biggest spike is on the Reception Wines, and we don't really know anything # about the provenance of those wines. # # So given the global pre-eminence of French wines, maybe HoC # aren't doing too bad a job of making the case for English # and Welsh. # ''' # # lang_test(___specimen_txt, answer_txt) # # ### END HIDDEN TESTS # # ``` # # Once again, it may make more sense to define some magic to handle this, such as `%%mark_against_specimen` that could take a block of answer text, use that as the basis of comparison and display or return the result directly. # # The marker can then review the automated marking support similarity grid, assign an appropriate manual mark. # # It might also be worth considering whether similarity marks should be fed back to the student or used in support of generating (or at least drafting) canned feedback. # ## Essential Imports # # Import the necessary model (note: `spacy` is required for the similarity checker and `pandas` used, overkill style, to display the result). # In[1]: #!pip install spacy #!pip install pandas import spacy try: import en_core_web_sm except: import spacy.cli spacy.cli.download("en_core_web_sm") import en_core_web_sm import warnings from spacy.errors import ModelsWarning warnings.filterwarnings("ignore",category=ModelsWarning) import pandas as pd # In[2]: #Loading the model may take some time nlp = en_core_web_sm.load() # ## Test and Report # # Generate a couple of reports: # # - an overall similarity score between the answer text and the provided specimen answer; # - a similarity score between each sentence in the answer text and each sentence in the specimen. # # # We can add a limit value to the marker so that only sentences with similarity matches between supplied and specimen answer sentence exceeding a minimum value are displayed. (This can mean where a supplied answer is spread over sever sentences compared to the specimen answer # In[29]: from IPython.display import HTML def lang_test(__specimen, answer_txt, threshold = 0, show_side_by_side=True, retval=False): ___specimen = nlp(___specimen_txt.replace('\n',' ')) ___answer = nlp(answer_txt.replace('\n',' ')) # It doesn't matter which order we present the parsed sentences for similarity scoring display(HTML('

Similarity score overall

Overall similarity score: {}

'.format(___specimen.similarity(___answer)))) # Another approach may be to split answer into sentences, # and the compare each sentence against each sentence in the specimen, # perhaps retaining the best match score for each answer sentence? ___sentence_scores = pd.DataFrame(columns=['Answer', 'Similarity', 'Specimen', 'Answer (no stopwords)', 'Specimen (no stopwords)']) ___sentence_full_scores = pd.DataFrame(columns=['Answer', 'Similarity Overall']) for ___answer_sent in ___answer.sents: ___answer_sent_nostop = nlp(' '.join([token.text for token in ___answer_sent if not token.is_stop and not token.is_punct])) ___sentence_full_scores = pd.concat([___sentence_full_scores, pd.DataFrame({'Answer':[___answer_sent.text], 'Similarity Overall':[___specimen.similarity(___answer_sent_nostop)] })]) for ___specimen_sent in ___specimen.sents: ___specimen_sent_nostop = nlp(' '.join([token.text for token in ___specimen_sent if not token.is_stop and not token.is_punct])) #print('\nComparing:', ___answer_sent, '\nvs.\n', ___specimen_sent, # '\nMark:\t',___specimen_sent.similarity(___answer_sent) ) ___sentence_scores = pd.concat([___sentence_scores, pd.DataFrame({'Answer':[___answer_sent.text], 'Similarity': [___specimen_sent_nostop.similarity(___answer_sent_nostop)], 'Specimen': [___specimen_sent.text], 'Answer (no stopwords)': [___answer_sent_nostop.text], 'Specimen (no stopwords)': [___specimen_sent_nostop.text] })]) ___pd_default_colwidth = pd.get_option('display.max_colwidth') pd.set_option('display.max_colwidth', -1) if show_side_by_side: display(HTML('''
ProvidedSpecimen
{}{}
'''.format(answer_txt,__specimen))) display(HTML('

Sentence level similarity with full specimen answer

')) display(___sentence_full_scores[___sentence_full_scores['Similarity Overall']>threshold]) display(HTML('

Sentence level matching

')) ___sentence_scores.set_index(['Answer', 'Specimen'], inplace=True) display(___sentence_scores[___sentence_scores['Similarity']>threshold]) pd.set_option('display.max_colwidth', ___pd_default_colwidth) if retval: return pd # In[30]: answer_txt = ''' Reception wine was the most consumed, but we donlt know where that comes from. English and Welsh wines are the next most consumed, followed by white Burgundy and red Bordeaux. ''' # In[31]: ___specimen_txt = ''' As much English wine's consumed as each of Bordeaux red and Burgundy white. The biggest spike is on the Reception Wines, and we don't really know anything about the provenance of those wines. So given the global pre-eminence of French wines, maybe HoC aren't doing too bad a job of making the case for English and Welsh. ''' lang_test(___specimen_txt, answer_txt) # In[33]: lang_test(___specimen_txt, answer_txt, 0.7) # In[ ]: # In[ ]: