#!/usr/bin/env python
# coding: utf-8
# In[1]:
import numpy as np
from pycircstat2 import Circular, load_data
# # Hypothesis Testing
#
# ### Table of Contents
#
# - [rayleigh_test](#the-rayleigh-test)
# - [V_test](#the-v-test)
# - [omnibus_test](#the-hodges-ajne-test)
# - [batschelet_test](#the-batschelet-test)
# - [chisquare_test](#chi-square-test)
# - [kuiper_test](#kuipers-test)
# - [watson_test](#watsons-one-sample-u2-test)
# - [raospacing_test](#raos-spacing-test)
# - [symmetry_test](#symmetry-test-around-the-median)
# - [one_sample_test](#one-sample-test)
# - [watson_williams_test](#watson-williams-test-for-two-multisample)
# - [watson_u2_test](#watsons-u2-test-for-two-multisample-with-or-without-ties)
# - [wheeler_watson_test](#wheeler-and-watson-two-sample-test)
# - [wallraff_test](#wallraffs-two-sample-test-for-angular-dispersion)
#
# ### See also
#
# Chapter 27 of Zar (2010) contains many examples and step-by-step guide of how to compute most of circular hypothesis testing. We replicated all those examples and figures in notebook [`B2-Zar-2010`](https://nbviewer.org/github/circstat/pycircstat2/blob/main/examples/B2-Zar-2010.ipynb) with `pycircstats2`.
# ## Testing for Uniformity
# ### The Rayleigh Test
#
# `rayleigh_test(alpha)` tests $H_{0}: \rho=0$ vs. $H_{A}: \rho \neq 0$, where $\rho$ is the population mean vector length. If the Rayleigh Test rejects $H_0$ ($p<0.05$), then the population is not a uniform circular distribution, or there is a mean population direction.
#
# **NOTE**: The Rayleigh Test assumes the data is unimodal.
# In[2]:
from pycircstat2.hypothesis import rayleigh_test
d1 = load_data('D1', source='zar')['θ'].values
c1 = Circular(data=d1)
z, pval = rayleigh_test(c1.alpha, verbose=True)
# ### The V Test
#
# `V_test(angle, alpha)` is a modified Rayleigh test that tests $H_{0}: \rho=0$ vs. $H_{A}: \rho \neq 0$ and has a mean angle ($\mu$).
# In[3]:
from pycircstat2.hypothesis import V_test
d7 = load_data('D7', source='zar')['θ'].values
c7 = Circular(data=d7)
V, u, pval = V_test(angle=np.deg2rad(90), alpha=c7.alpha, verbose=True)
# ### The Hodges-Ajne Test
#
#
# `omnibus_test(alpha)` tests $H_0$: uniform vs. $H_A$: not unifrom. Also called Ajne's A Test, or "omnibus test" because it works well for unimodal, bimodal, and multimoodal distributions.
# In[4]:
from pycircstat2.hypothesis import omnibus_test
d8 = load_data('D8', source='zar')['θ'].values
c8 = Circular(data=d8)
A, pval = omnibus_test(c8.alpha, verbose=True)
# ### The Batschelet Test
#
# `batschelet_test(alpha)` is a modified Hodges-Ajne Test that tests $H_0$: uniform vs. $H_A$: not unifrom but concentrated around an angle θ.
# In[5]:
from pycircstat2.hypothesis import batschelet_test
C, pval = batschelet_test(angle=np.deg2rad(45), alpha=c8.alpha, verbose=True)
# ### Goodness-of-Fit Tests for Uniformity
#
# #### Chi-Square Test
#
# `chisquare_test(alpha)` tests the goodness of fit of a theoretical circular frequency distribution to an observed one. Here it is used to test whether the data in the population are distributed unifromly around the circle. This method is for grouped data.
# In[6]:
from pycircstat2.hypothesis import chisquare_test
d2 = load_data("D2", source="zar")
c2 = Circular(data=d2["θ"].values, w=d2["w"].values)
chi2, pval = chisquare_test(c2.w, verbose=True)
# `kuiper_test(alpha)`, `watson_test(alpha)`, and `raospacing_test(alpha)` are Goodness-of-fit tests for ungrouped data. P-values for these tests are computed through simulation.
# In[7]:
from pycircstat2.hypothesis import kuiper_test, watson_test, rao_spacing_test
pigeon = np.array([20, 135, 145, 165, 170, 200, 300, 325, 335, 350, 350, 350, 355])
c_pigeon = Circular(data=pigeon)
# #### Kuiper's Test
# In[8]:
V, pval = kuiper_test(c_pigeon.alpha, n_simulation=9999, verbose=True)
# #### Watson's one-sample U2 Test
# In[9]:
U2, pval = watson_test(c_pigeon.alpha, n_simulation=9999, verbose=True)
# #### Rao's Spacing Test
# In[10]:
U, pval = rao_spacing_test(c_pigeon.alpha, n_simulation=9999, verbose=True)
# ## Testing for Symmetry
# ### Symmetry Test (around the median)
#
# `symmetry_test(alpha)` tests $H_0$: symmetrical around $\theta$ vs. $H_A$: not symmetrical around $\theta$, where $\theta$ is the median of the population.
# In[11]:
from pycircstat2.hypothesis import symmetry_test
d9 = load_data('D9', source='zar')['θ'].values
c9 = Circular(data=d9)
statistics, pval = symmetry_test(alpha=c9.alpha, verbose=True)
# ## Testing for the Mean Angle
# ### One-Sample Test
#
# `one_sample_test(alpha)` tests $H_{0}: \mu_a=\mu_0$ vs. $H_{A}: \mu_a \neq \mu_0$ ,where $\mu_{a}$ is the population mean angle and $\mu_{0}$ is a specified angle. This test is simply observing whether $\mu_{0}$ lies within the confidence interval for $\mu_{a}$.
# In[12]:
from pycircstat2.hypothesis import one_sample_test
reject_or_not = one_sample_test(angle=np.deg2rad(90), alpha=c7.alpha, verbose=True)
# ## Two-Sample or Multisample Test
# ### Watson-Williams Test for Two-/Multisample
#
# `watson_williams_test(circs)` tests $H_0$: $\mu_1 = \mu_2 = ... = \mu_n$ vs. $H_A$: $\mu_1 \neq \mu_2 \neq ... \neq \mu_n$
# In[13]:
from pycircstat2.hypothesis import watson_williams_test
data = load_data("D11", source="zar")
s1 = Circular(data=data[data["sample"] == 1]["θ"].values)
s2 = Circular(data=data[data["sample"] == 2]["θ"].values)
s3 = Circular(data=data[data["sample"] == 3]["θ"].values)
F, pval = watson_williams_test(circs=[s1, s2, s3], verbose=True)
# ### Watson's U2 Test for Two-/multisample with or without Ties
#
# `watson_U2_test(circs)` tests $H_0$: $\mu_1 = \mu_2 = ... = \mu_n$ vs. $H_A$: $\mu_1 \neq \mu_2 \neq ... \neq \mu_n$ for data with or without ties
# In[14]:
from pycircstat2.hypothesis import watson_u2_test
# In[15]:
# without ties
d = load_data("D12", source="zar")
c0 = Circular(data=d[d["sample"] == 1]["θ"].values)
c1 = Circular(data=d[d["sample"] == 2]["θ"].values)
U2, pval = watson_u2_test(circs=[c0, c1], verbose=True)
# In[16]:
# with ties
d = load_data("D13", source="zar")
c0 = Circular(data=d[d["sample"] == 1]["θ"].values, w=d[d["sample"] == 1]["w"].values)
c1 = Circular(data=d[d["sample"] == 2]["θ"].values, w=d[d["sample"] == 2]["w"].values)
U2, pval = watson_u2_test(circs=[c0, c1], verbose=True)
# ### Wheeler and Watson Two-sample Test
#
# `wheeler_watson_test(circs)` tests $H_0$: $\mu_1 = \mu_2 = ... = \mu_n$ vs. $H_A$: $\mu_1 \neq \mu_2 \neq ... \neq \mu_n$.
# In[17]:
from pycircstat2.hypothesis import wheeler_watson_test
d = load_data("D12", source="zar")
c0 = Circular(data=d[d["sample"] == 1]["θ"].values)
c1 = Circular(data=d[d["sample"] == 2]["θ"].values)
W, pval = wheeler_watson_test(circs=[c0, c1], verbose=True)
# ### Wallraff's Two-sample Test for Angular Dispersion
# In[18]:
from pycircstat2.hypothesis import wallraff_test
d = load_data("D14", source="zar")
c0 = Circular(data=d[d["sex"] == "male"]["θ"].values)
c1 = Circular(data=d[d["sex"] == "female"]["θ"].values)
U, pval = wallraff_test(angle=np.deg2rad(135), circs=[c0, c1], verbose=True)
# In[19]:
from pycircstat2.utils import time2float
d = load_data("D15", source="zar")
c0 = Circular(data=time2float(d[d["sex"] == "male"]["time"].values))
c1 = Circular(data=time2float(d[d["sex"] == "female"]["time"].values))
U, pval = wallraff_test(angle=np.deg2rad(time2float(['7:55', '8:15'])), circs=[c0, c1], verbose=True)
# In[20]:
get_ipython().run_line_magic('load_ext', 'watermark')
get_ipython().run_line_magic('watermark', '--time --date --timezone --updated --python --iversions --watermark -p pycircstat2')