#!/usr/bin/env python
# coding: utf-8

# # Hiding failures with travis_retry
# 
# `travis_retry` runs tests up to 3 times, potentially hiding intermittent failures.
# Let's check how likely we are to hide real failures, given their frequency:

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
import seaborn as sns


# We start with the probability of a single test failure, $b$:
# 
# $$
# P(1) = b
# $$
# 
# The probability of a single run failing all $r$ retries is:
# 
# $$
# P(r) = b^r
# $$
# 
# And if we are doing $n$ simultaneous test runs with $r$ retries,
# the probability that we will register *any* failure is:
# 
# $$
# P(\text{any fail}) = 1 - P(\text{all succeed}) \\
# = 1 - (1 - P(r))^{n} \\
# = 1 - (1 - b^r)^{n}
# $$
# 
# which we can plot this vs $b$ for various $n$:

# In[2]:


import numpy as np
badness = np.linspace(0,1) # probability of a single failure
retries = 3 # number of times a single test matrix will be run before accepting defeat
p_retry_ok = 1 - (badness ** retries) # P(single success)

for runs in range(1, 8):
    p_all_fail = 1 - (p_retry_ok ** runs)
    plt.plot(badness, p_all_fail, label='n=%i' % runs)
plt.legend(loc=0)
plt.title("%i retries" % retries)
plt.xlabel("$P($single failure$)$")
plt.ylabel("$P($registered failure$)$");


# In the notebook right now, $n$ is either 2 or 8, depending on where the bug lies.

# In[3]:


def any_fail(b, n):
    r = 3
    return (1 - (1 - b**r)**n)


# In[4]:


print("n=2,b=0.5: %.2f" % any_fail(b=0.5, n=2))
print("n=8,b=0.25: %.2f" % any_fail(b=0.25, n=20))


# So if there's a failure happening 50% of the time in a single test group,
# we'll still see it on about 25% of test runs.
# If it occurs 25% of the time across *all* js test groups, we'll see at least a single failure about 25% of the time.