#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
import matplotlib.pyplot as plt


# In[2]:


def returnPrecisionAndRecall(TP, FP, TN, FN):
    precision = TP / (TP + FP) if (TP + FP) else 0
    recall = TP / (TP + FN) if (TP + FN) else 0
    return {'precision': precision, 'recall': recall}

# Okay, set how many true and false values are in the original dataset

actualTrueValues = 50
actualFalseValues = 50

sumValues = 100

precisionRecallCollector = []

# Now, run a set of simulations

for n in range(5000):
    TP = 0
    FP = 0
    TN = 0
    FN = 0
    # Begin by randomly setting the total number of true values returned
    totalTrue = np.random.randint(0, high=sumValues)
    # The total number of false values is sumValues - totalTrue
    totalFalse = sumValues - totalTrue
    # Partition totalTrue and totalFalse into TP, FP and TN, FN, respectively
    if totalTrue != 0:
        TP = np.random.randint(0, high=totalTrue)
        FP = totalTrue - TP
    if totalFalse != 0:
        TN = np.random.randint(0, high=totalFalse)
        FN = totalFalse - TN
    thisPandR = returnPrecisionAndRecall(TP, FP, TN, FN)
    precisionRecallCollector.append([thisPandR['precision'], thisPandR['recall']])


# In[3]:


pAndRArray = np.asarray(precisionRecallCollector)
plt.scatter(pAndRArray[:,0], pAndRArray[:,1], color='k', alpha=0.25)
plt.xlabel('Precision')
plt.ylabel('Recall')