#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np import matplotlib.pyplot as plt # In[2]: def returnPrecisionAndRecall(TP, FP, TN, FN): precision = TP / (TP + FP) if (TP + FP) else 0 recall = TP / (TP + FN) if (TP + FN) else 0 return {'precision': precision, 'recall': recall} # Okay, set how many true and false values are in the original dataset actualTrueValues = 50 actualFalseValues = 50 sumValues = 100 precisionRecallCollector = [] # Now, run a set of simulations for n in range(5000): TP = 0 FP = 0 TN = 0 FN = 0 # Begin by randomly setting the total number of true values returned totalTrue = np.random.randint(0, high=sumValues) # The total number of false values is sumValues - totalTrue totalFalse = sumValues - totalTrue # Partition totalTrue and totalFalse into TP, FP and TN, FN, respectively if totalTrue != 0: TP = np.random.randint(0, high=totalTrue) FP = totalTrue - TP if totalFalse != 0: TN = np.random.randint(0, high=totalFalse) FN = totalFalse - TN thisPandR = returnPrecisionAndRecall(TP, FP, TN, FN) precisionRecallCollector.append([thisPandR['precision'], thisPandR['recall']]) # In[3]: pAndRArray = np.asarray(precisionRecallCollector) plt.scatter(pAndRArray[:,0], pAndRArray[:,1], color='k', alpha=0.25) plt.xlabel('Precision') plt.ylabel('Recall')