#!/usr/bin/env python
# coding: utf-8

# In[11]:


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt1
from datetime import datetime

get_ipython().run_line_magic('matplotlib', 'inline')

# returns the elapsed milliseconds since the start of the program
def timesec(start_time):
   dt = datetime.now() - start_time
   ms = (dt.days * 24 * 60 * 60 + dt.seconds)  + dt.microseconds /1000000
   return ms

start_time = datetime.now()

fig= plt1.figure(figsize=(15, 5), constrained_layout=True)
widths = [5,5,5]
heights = [5]
gs=fig.add_gridspec(1,3,width_ratios=widths, height_ratios=heights, wspace=0.05)

ax1a=fig.add_subplot(gs[0,:1])
ax1b=fig.add_subplot(gs[0,1:2])
ax1c=fig.add_subplot(gs[0,2:])

ax1a.clear()
ax1b.clear()
ax1c.clear()

#---------------------------------------Main Code---------------------------------------
#Samples, trials (pick trials >1000 to avoid zero counts) and repeats
samples=500
trials=2000
repeats=1000

#Set arrays to zero
piarray=np.zeros(repeats)
narray=np.zeros(repeats)
stdarray=np.zeros(repeats)
countarray=np.zeros(repeats)

#Sample array and expected value
sa=np.array([0,1,2,3,4,5,6,7,8,9])
#sa=np.array([0,1])
ev=(np.max(sa)-np.min(sa))/2

#Detemermine: Stdev, And dx
var=((np.max(sa)-np.min(sa)+1)**2-1)/12
stdev=np.sqrt(var)
stdevt=stdev/np.sqrt(samples)
dx=1/samples

for p in range(repeats):

    #Create Array random numbers
    random=np.random.choice(sa,[trials,samples])

    #Determine mean from random no of numbers (of samples)
    m=np.mean(random,axis=1)
    mn=np.mean(m)

    #Create dataframe and count number of observation per interval
    df=pd.DataFrame({'m' : m})
    dfg=df.groupby(['m'])['m'].agg(['count']).reset_index()

    #Histogram x=bins and y is density
    x=dfg['m'].to_numpy()
    yt=dfg['count'].to_numpy()
    y=yt/(dx*trials)

    #Count number of observation on mean: 4.5
    out=dfg[(dfg['m']<(ev+dx/2)) & (dfg['m']>(ev-dx/2))]
    #out=dfg[((dfg['m'] < (4.5+stdevt+dx/2)) & (dfg['m'] > (4.5+stdevt-dx/2))) | ((dfg['m'] < (4.5-stdevt+dx/2)) & (dfg['m'] > (4.5-stdevt-dx/2)))]
       
    nc=out['count'].to_numpy()
    n=np.sum(nc)
    narray[p]=n

    #Stdev Array
    stdev=np.std(m)
    stdarray[p]=stdev
    
    #Countarray
    count=np.sum(yt)
    countarray[p]=count
    
    #Plot 200 distributions
    if (1000*p/repeats)%2==0:    
        ax1a.plot(x,y, marker='o', color='grey', linestyle='', markersize=0.02, linewidth=0, zorder=10)

#Calculate pi and fill array
piarray=0.5*(trials*dx/(stdevt*narray))**2
#piarray=0.5*(trials*dx/(stdevt*narray)*np.exp(-0.5*(1-ev/stdevt)**2))**2

#Output    
nmean=np.mean(narray)
nsum=np.sum(narray)
nstdev=np.std(narray)
p=nsum/np.sum(countarray)
nstdevm=nstdev/np.sqrt(repeats)

#probability mean value theoretical, sample size based upon samples from [0,1,2..9]
pt=dx/(stdevt*np.sqrt(2*np.pi))
n=(np.max(sa)-np.min(sa))*1/dx

#Normality test https://en.wikipedia.org/wiki/Binomial_distribution#Normal_approximation
ntest=True

if (n*p<9):
    ntp='np=' + str(np.round(n*p,0)) + ', <9 fail'
    ntest=False
else:
    ntp='np=' + str(np.round(n*p,0)) + ', ≥9 pass'
    
if ((n*(1-p))<9):
    nomp='n(1-p)=' + str(np.round(n*(1-p),0)) + ', <9 fail'
    ntest=False
else:
    nomp='n(1-p)=' + str(np.round(n*(1-p),0)) + ', ≥9 pass'

if (ntest==False):
    nttext='normality test failed'
else:
    nttext='normality test passed'    

#95% score
#nerr=trials*1.96*np.sqrt(p*(1-p)/(trials*repeats))
#nerr=n*1.96*np.sqrt(p*(1-p)/(n))

pim=0.5*(dx*trials/(stdevt*nmean))**2
#pim=0.5*(trials*dx/(stdevt*narray)*np.exp(-0.5*(1-ev/stdevt)**2))**2

piminc=0.5*(dx*trials/(stdevt*(nmean+2*nstdevm)))**2
pimaxc=0.5*(dx*trials/(stdevt*(nmean-2*nstdevm)))**2
#---------------------------------------Main Code---------------------------------------

xt=np.linspace(np.min(sa),np.max(sa),1000)
yt=1/(stdevt*np.sqrt(2*np.pi))*np.exp(-0.5*((ev-xt)/stdevt)**2)  

ax1a.plot(xt,yt, marker='', color='blue', linestyle='-', markersize=0, linewidth=1.5,zorder=10,label='$\pi$ (theory)')
ax1a.plot(x,y, marker='o', color='black', linestyle='', markersize=0.1, linewidth=0, zorder=10,label='statistical')
ax1a.set_xlabel('$Mean$',fontsize=10)
ax1a.set_ylabel('$Density$',fontsize=10)
ax1a.set_title('Mean of '+ str(samples) + ' samples, '+ str(sa) + '\n' +str(trials) + ' trials, ' + str(repeats)  + ' repeats',fontsize=12)
ax1a.text(0.02,0.98,'Mean: ' + str(np.round(np.mean(m),4)) + ', Std: ' +  str(np.round(np.mean(stdarray),4)) + '\nn: ' + str(np.round(n,4)) +   ' (bins)\np: ' + str(np.round(p,4)) + ' (mean)\n' + nomp +  '\n' + ntp + '\n' + nttext  , backgroundcolor='white', transform=ax1a.transAxes, fontsize=10,verticalalignment='top')
ax1a.grid(b=True, which='major', color='#666666', linestyle='-', zorder=0)    
ax1a.legend(loc='upper right',fontsize=10,markerscale=15)
ax1a.ticklabel_format(axis='y', style='sci', scilimits=None, useOffset=None, useLocale=None, useMathText=None)
ax1a.axes.set_ylim([0,1.2*1/(np.std(m)*np.sqrt(2*np.pi))])
ax1a.axes.set_xlim([ev-3*stdevt,ev+3*stdevt])

#Determine Bins and Max n array
step=(np.max(narray)-np.min(narray))/20
nround=step*np.round(narray/step,0)
df=pd.DataFrame({'nr' : nround})
dfg=df.groupby(['nr'])['nr'].agg(['count']).reset_index()
x=dfg['nr'].to_numpy()
bins=dfg['nr'].to_numpy()
y=dfg['count'].to_numpy()
y=y/(step*np.sum(y))
mx=np.max(y)

ax1b.hist(narray, density=True, bins=(bins-0.5*step), rwidth=1, color='black', edgecolor='white', linewidth=1, zorder=10)
ax1b.plot(x,y, marker='v', color='gray', linestyle='-', markersize=2, zorder=10,linewidth=0)
ax1b.plot([nmean,nmean],[0,1.5*mx], marker='', color='darkorange', linestyle='-', markersize=0, linewidth=1.5,zorder=12, label='$\pi$ (statistical)')
#ax1b.plot([nmean-3*nstdevm,nmean-3*nstdevm],[0,1.5*mx], marker='', color='darkorange', linestyle='-', markersize=0, linewidth=0.5,zorder=12)
#ax1b.plot([nmean+3*nstdevm,nmean+3*nstdevm],[0,1.5*mx], marker='', color='darkorange', linestyle='-', markersize=0, linewidth=0.5,zorder=12)
ax1b.set_xlabel('$Count$',fontsize=10)
ax1b.set_ylabel('$Density$',fontsize=10)
ax1b.set_title('Count $\overline{n}$ (mean='+ str(ev) +')\n' + str(trials) + ' trials, ' + str(repeats)  + ' repeats',fontsize=12)
ax1b.text(0.02,0.98,'Mean: ' +  str(np.round(nmean,4)) + ', Std: '+  str(np.round(nstdev,4))  + '\n' +  str(np.round(nmean-2*nstdevm,4)) +  '$<\overline{n}$<'+  str(np.round(nmean+2*nstdevm,4)) + ' (95%)'    , backgroundcolor='white', transform=ax1b.transAxes, fontsize=10,verticalalignment='top', zorder=15)
ax1b.grid(b=True, which='major', color='#666666', linestyle='-', zorder=0)
ax1b.ticklabel_format(axis='y', style='sci', scilimits=None, useOffset=None, useLocale=None, useMathText=None)
ax1b.axes.set_ylim([0,1.2*mx])
ax1b.axes.set_xlim([np.mean(narray)-4*np.std(narray),np.mean(narray)+4*np.std(narray)])
ax1b.ticklabel_format(axis="x", style="sci", scilimits=(0,0))
ax1b.legend(loc='upper right',fontsize=10)

#Determine Bins and Max Piarray
step=(np.max(piarray)-np.min(piarray))/20
piround=step*np.round(piarray/step,0)
df=pd.DataFrame({'pir' : piround})
dfg=df.groupby(['pir'])['pir'].agg(['count']).reset_index()
x=dfg['pir'].to_numpy()
bins=dfg['pir'].to_numpy()
y=dfg['count'].to_numpy()
y=y/(step*np.sum(y))
mx=np.max(y)

ax1c.hist(piarray, density=True, rwidth=1, bins=(bins-0.5*step), color='black', edgecolor='white', linewidth=1, zorder=1)
ax1c.plot(x,y, marker='v', color='gray', linestyle='-', markersize=2, zorder=10,linewidth=0)
ax1c.plot([pim,pim],[0,1.5*mx], marker='', color='darkorange', linestyle='-', markersize=0, linewidth=1.5, label='$\pi$ (statistical)')
#ax1c.plot([piminc,piminc],[0,1.5*mx], marker='', color='darkorange', linestyle='-', markersize=0, linewidth=0.5)
#ax1c.plot([pimaxc,pimaxc],[0,1.5*mx], marker='', color='darkorange', linestyle='-', markersize=0, linewidth=0.5)
ax1c.set_xlabel('$\pi$',fontsize=10)
ax1c.set_ylabel('$Density$',fontsize=10)
ax1c.set_title('$\pi$ Histogram\n'+ str(repeats) + ' repeats',fontsize=12)
ax1c.text(0.02,0.98,'$\pi$: ' + str(np.round(pim,4)) + ' (mean $\overline{n}$)'+ '\n' +  str(np.round(piminc,4)) +  '$<\pi$<'+  str(np.round(pimaxc,4)) + ' (95%)' , backgroundcolor='white', transform=ax1c.transAxes, fontsize=10,verticalalignment='top')
ax1c.grid(b=True, which='major', color='#666666', linestyle='-', zorder=0)
ax1c.legend(loc='upper right',fontsize=10)
ax1c.axes.set_ylim([0,1.2*mx])
#ax1c.axes.set_xlim([np.min(piarray),np.mx(piarray)])
ax1c.ticklabel_format(axis='y', style='sci', scilimits=None, useOffset=None, useLocale=None, useMathText=None)

plt1.savefig('Pi Random ' +  str(sa) + ', Samples=' + str(int(samples)) + ', Trials=' + str(int(trials)) + ', Repeats=' + str(int(repeats)), dpi=300, bbox_inches='tight')

print("Calculation Time: " + str(round(timesec(start_time),1))+" seconds")
print('pi: ' + str(np.round(pim,4)))
print(str(np.round(piminc,4)) + '<pi<' + str(np.round(pimaxc,4)) + ' (95%)')


# In[ ]:


# In[ ]: