%load_ext autoreload

import sys, os
import logging
import datetime as dt
from datetime import datetime
import numpy as np
import scipy.stats as st
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from scipy import stats
from numpy.typing import ArrayLike
from scipy.integrate import quad
from scipy.stats import norm, binom

s_logger = logging.getLogger('py4j.java_gateway')
s_logger.setLevel(logging.ERROR)  
c_logger = logging.getLogger('py4j.clientserver')
c_logger.setLevel(logging.ERROR) 

logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)

# Run this cell to start collecting the examples that are given in the text and run them later. 
# --- Examples (incremental build) ---
fixed_horizon_examples = {}

# Example 1: one-sample estimation, continuous
fixed_horizon_examples["example 1"] = {
    "type": "one sample estimation of mean (continuous)",
    "mode": "ME",
    "outcome": "continuous",
    "one_sample": True,
    "two_sample": False,
    "sides": 2,
    "alpha": 0.05,
    "me": 5,
    "sigma": 18,
    "expected_n": 50,
}

# Example 2: one-sample estimation, dichotomous
fixed_horizon_examples["example 2"] = {
    "type": "one sample estimation of proportion (dichotomous)",
    "mode": "ME",
    "outcome": "dichotomous",
    "one_sample": True,
    "two_sample": False,
    "sides": 2,
    "alpha": 0.05,
    "me": 0.04,
    "p": 0.5,  # conservative
    "expected_n": 601,
}

# Example 3: two-sample estimation, continuous (common sigma), with dropout adjustment
fixed_horizon_examples["example 3"] = {
    "type": "two sample estimation of mean difference (continuous, common sigma)",
    "mode": "ME",
    "outcome": "continuous",
    "one_sample": False,
    "two_sample": True,
    "sides": 2,
    "alpha": 0.05,
    "me": 4,
    "sigma": 24,
    "dropout": 0.10,  # unusable fraction
    "expected_n_raw_per_group": 277,
    "expected_n_adj_per_group": 308,
    "expected_total_adj": 616,
}

# Example 4: two-sample estimation, continuous (pooled sigma), with dropout adjustment
fixed_horizon_examples["example 4"] = {
    "type": "two sample estimation of mean difference (continuous, pooled sigma)",
    "mode": "ME",
    "outcome": "continuous",
    "one_sample": False,
    "two_sample": True,
    "sides": 2,
    "alpha": 0.05,
    "me": 3,
    "sigma": 12.7,     # pooled S_p (computed outside)
    "dropout": 0.15,
    "expected_n_raw_per_group": 138,
    "expected_n_adj_per_group": 163,
    "expected_total_adj": 326,
}

# Example 5: matched/paired estimation, continuous (sigma_d)
fixed_horizon_examples["example 5"] = {
    "type": "matched samples estimation of mean difference (continuous)",
    "mode": "ME",
    "outcome": "continuous",
    "one_sample": True,   # effectively one set of paired diffs
    "two_sample": False,
    "sides": 2,
    "alpha": 0.05,
    "me": 10,
    "sigma": 45,          # sigma_d
    "expected_n": 78,
}

# Example 6: two-sample estimation, dichotomous (common prevalence p)
fixed_horizon_examples["example 6"] = {
    "type": "two sample estimation of proportion difference (dichotomous, common p)",
    "mode": "ME",
    "outcome": "dichotomous",
    "one_sample": False,
    "two_sample": True,
    "sides": 2,
    "alpha": 0.05,
    "me": 0.03,
    "p": 0.08,          # common/avg prevalence
    "expected_n_per_group": 629,
    "expected_total": 1258,
}

# Example 7a: one-sample hypothesis test (two-sided), continuous
fixed_horizon_examples["example 7a"] = {
    "type": "one sample hypothesis test (continuous, two-sided)",
    "mode": "MDE",
    "outcome": "continuous",
    "one_sample": True,
    "two_sample": False,
    "sides": 2,
    "alpha": 0.05,
    "beta": 0.20,
    "mde": 12,
    "sigma": 30,
    "expected_n": 49,
}

# Example 7b: one-sample hypothesis test (one-sided), continuous
fixed_horizon_examples["example 7b"] = {
    "type": "one sample hypothesis test (continuous, one-sided)",
    "mode": "MDE",
    "outcome": "continuous",
    "one_sample": True,
    "two_sample": False,
    "sides": 1,
    "alpha": 0.05,
    "beta": 0.20,
    "mde": 12,
    "sigma": 30,
    "expected_n": 39,
}

# Example 8: one-sample hypothesis test (one-sided), dichotomous vs benchmark p0
fixed_horizon_examples["example 8"] = {
    "type": "one sample hypothesis test (dichotomous, one-sided vs benchmark)",
    "mode": "MDE",
    "outcome": "dichotomous",
    "one_sample": True,
    "two_sample": False,
    "sides": 1,
    "alpha": 0.05,
    "beta": 0.20,
    "p0": 0.03,       # benchmark under H0 (used for sigma)
    "mde": 0.003,
    "expected_n": 19936,  # note: tiny rounding differences are possible
}

# Example 9: two-sample hypothesis test (A/B), continuous
fixed_horizon_examples["example 9"] = {
    "type": "two sample hypothesis test (continuous A/B, two-sided)",
    "mode": "MDE",
    "outcome": "continuous",
    "one_sample": False,
    "two_sample": True,
    "sides": 2,
    "alpha": 0.05,
    "beta": 0.20,
    "mde": 40,
    "sigma": 300,     # SD of outcome (per group)
    # "expected_n_per_group": 887,  # text used ES≈0.133; exact with sigma=300 gives 882
}

# Example 10: matched/paired hypothesis test, continuous (server before/after)
fixed_horizon_examples["example 10"] = {
    "type": "matched samples hypothesis test (continuous, two-sided)",
    "mode": "MDE",
    "outcome": "continuous",
    "one_sample": True,
    "two_sample": False,
    "sides": 2,
    "alpha": 0.05,
    "beta": 0.20,
    "mde": 5,         # mu_d
    "sigma": 12,      # sigma_d
    "expected_n": 46,
}

# Example 11: two-sample hypothesis test (A/B), dichotomous
fixed_horizon_examples["example 11"] = {
    "type": "two sample hypothesis test (dichotomous A/B, two-sided)",
    "mode": "MDE",
    "outcome": "dichotomous",
    "one_sample": False,
    "two_sample": True,
    "sides": 2,
    "alpha": 0.05,
    "beta": 0.20,
    "p1": 0.06,
    "p2": 0.072,
    # mde omitted intentionally; runner will set mde = |p2-p1|
    # "expected_n_per_group": 6806,  # this depends on rounding; exact inputs will differ slightly
}

import numpy as np
import scipy.stats as st


def _z_critical(alpha: float, sides: int) -> float:
    """
    Compute the Normal critical value for a total type-I error rate alpha.

    Two-sided:  z = Z_{1 - alpha/2}
    One-sided:  z = Z_{1 - alpha}

    Args:
        alpha: Total significance level (e.g., 0.05).
        sides: 2 for two-sided, 1 for one-sided.

    Returns:
        Critical value z.

    Raises:
        ValueError: if alpha not in (0,1) or sides not in {1,2}.
    """
    if not (0 < alpha < 1):
        raise ValueError(f"alpha must be in (0, 1), got {alpha}")
    if sides not in (1, 2):
        raise ValueError(f"sides must be 1 or 2, got {sides}")

    return st.norm.ppf(1 - alpha / 2) if sides == 2 else st.norm.ppf(1 - alpha)


def get_sample_size_ME(
    sigma: float,
    alpha: float,
    me: float,
    two_sample: bool = False,
    sides: int = 2,
    attrition: float = 0.0,
) -> int:
    """
    Sample size for estimation with margin of error (ME) using Normal approximation.

    One sample (or paired differences treated as one sample):
        n = (z * sigma / ME)^2

    Two independent samples with equal allocation and common variance:
        n_per_group = 2 * (z * sigma / ME)^2
    Implemented by using sigma_eff = sqrt(2)*sigma when two_sample=True.

    Dichotomous outcomes:
        Provide sigma = sqrt(p*(1-p)) (or pooled/benchmark p as appropriate).

    Matched/paired outcomes:
        Set two_sample=False and provide sigma = sigma_d (SD of paired differences).

    Attrition:
        If attrition > 0, returned n is inflated by 1/(1-attrition).

    Args:
        sigma: Standard deviation of outcome (or sqrt(p*(1-p)) for dichotomous).
        alpha: Total type-I error rate (e.g., 0.05).
        me: Desired margin of error (half-width).
        two_sample: If True, returns per-group n for two independent groups.
        sides: 2 (two-sided) or 1 (one-sided).
        attrition: Fraction unusable (e.g., 0.10). Default 0.0.

    Returns:
        Required sample size (ceil).
        - If two_sample=False: n
        - If two_sample=True: n_per_group
    """
    if sigma <= 0:
        raise ValueError(f"sigma must be > 0, got {sigma}")
    if me <= 0:
        raise ValueError(f"me must be > 0, got {me}")
    if not (0 <= attrition < 1):
        raise ValueError(f"attrition must be in [0, 1), got {attrition}")

    z = _z_critical(alpha=alpha, sides=sides)
    sigma_eff = (np.sqrt(2) * sigma) if two_sample else sigma

    n = (z * sigma_eff / me) ** 2
    n = np.ceil(n)
    
    if attrition > 0:
        n = np.ceil(n / (1 - attrition))

    return int(n)


def get_sample_size_MDE(
    sigma: float,
    alpha: float,
    beta: float,
    mde: float,
    two_sample: bool = False,
    sides: int = 2,
    attrition: float = 0.0,
) -> int:
    """
    Sample size for hypothesis testing with power (MDE) using Normal approximation.

    One sample (or paired differences treated as one sample):
        n = ((z_alpha + z_beta) * sigma / MDE)^2

    Two independent samples with equal allocation and common variance:
        n_per_group = 2 * ((z_alpha + z_beta) * sigma / MDE)^2
    Implemented by using sigma_eff = sqrt(2)*sigma when two_sample=True.

    Dichotomous outcomes:
        Provide sigma = sqrt(p*(1-p)) using:
          - p0 for one-sample benchmark tests, or
          - pooled p for two-sample proportion comparisons.

    Matched/paired outcomes:
        Set two_sample=False, provide sigma = sigma_d and mde = mu_d.

    Attrition:
        If attrition > 0, returned n is inflated by 1/(1-attrition).

    Args:
        sigma: Standard deviation of outcome (or sqrt(p*(1-p)) for dichotomous).
        alpha: Total type-I error rate (e.g., 0.05).
        beta: Type-II error rate (e.g., 0.20 for 80% power).
        mde: Minimum detectable effect (absolute difference).
        two_sample: If True, returns per-group n for two independent groups.
        sides: 2 (two-sided) or 1 (one-sided).
        attrition: Fraction unusable (e.g., 0.10). Default 0.0.

    Returns:
        Required sample size (ceil).
        - If two_sample=False: n
        - If two_sample=True: n_per_group
    """
    if sigma <= 0:
        raise ValueError(f"sigma must be > 0, got {sigma}")
    if mde <= 0:
        raise ValueError(f"mde must be > 0, got {mde}")
    if not (0 < beta < 1):
        raise ValueError(f"beta must be in (0, 1), got {beta}")
    if not (0 <= attrition < 1):
        raise ValueError(f"attrition must be in [0, 1), got {attrition}")

    z_a = _z_critical(alpha=alpha, sides=sides)
    z_b = st.norm.ppf(1 - beta)

    sigma_eff = (np.sqrt(2) * sigma) if two_sample else sigma
    n = (((z_a + z_b) * sigma_eff) / mde) ** 2
    n = np.ceil(n)
    
    if attrition > 0:
        n = np.ceil(n / (1 - attrition))

    return int(n)


# ---------------- Runner helpers ----------------

def _resolve_sigma_from_example(ex: dict) -> float:
    """
    Resolve 'sigma' for an example dict.

    - continuous: expects ex["sigma"]
    - dichotomous:
        * if ex has "p":  sigma = sqrt(p*(1-p))
        * elif ex has "p0": sigma = sqrt(p0*(1-p0))
        * elif ex has "p1" and "p2": sigma = sqrt(p_pool*(1-p_pool)),
          where p_pool defaults to (p1+p2)/2 unless ex["p_pooled"] is provided
        * elif ex has "sigma": uses it directly
    """
    outcome = ex.get("outcome")
    if outcome == "continuous":
        return float(ex["sigma"])

    if outcome == "dichotomous":
        if "sigma" in ex:
            return float(ex["sigma"])
        if "p" in ex:
            p = float(ex["p"])
            return float(np.sqrt(p * (1 - p)))
        if "p0" in ex:
            p0 = float(ex["p0"])
            return float(np.sqrt(p0 * (1 - p0)))
        if "p1" in ex and "p2" in ex:
            p1, p2 = float(ex["p1"]), float(ex["p2"])
            p_pool = float(ex.get("p_pooled", (p1 + p2) / 2))
            return float(np.sqrt(p_pool * (1 - p_pool)))

    raise ValueError(f"Cannot resolve sigma for example: {ex}")


def _resolve_mde_from_example(ex: dict) -> float:
    """
    Resolve 'mde' for an example dict.

    - If ex has "mde": use it.
    - If dichotomous and has p1,p2: mde = |p2 - p1|
    """
    if "mde" in ex:
        return float(ex["mde"])
    if ex.get("outcome") == "dichotomous" and "p1" in ex and "p2" in ex:
        return float(abs(float(ex["p2"]) - float(ex["p1"])))
    raise ValueError(f"Cannot resolve mde for example: {ex}")


def _format_expected_fields(ex: dict) -> str:
    """
    Return a suffix like:
      " | expected_n=50, expected_total_adj=616"
    including any keys that start with 'expected'.
    """
    expected_items = [(k, ex[k]) for k in ex.keys() if k.startswith("expected")]
    if not expected_items:
        return ""
    expected_items.sort(key=lambda kv: kv[0])  # stable order
    parts = [f"{k}={v}" for k, v in expected_items]
    return " | " + ", ".join(parts)


def run_fixed_horizon_examples(examples: dict) -> None:
    """
    Iterate over the examples dict and print sample sizes.

    Conventions:
      - mode == "ME": prints n (or n_per_group for two_sample). If dropout present, prints raw + adjusted.
      - mode == "MDE": prints n (or n_per_group for two_sample).
      - accepts either "dropout" or "attrition" in the example dict.
    """
    for name, ex in examples.items():
        mode = ex["mode"]
        two_sample = bool(ex.get("two_sample", False))
        sides = int(ex.get("sides", 2))
        alpha = float(ex["alpha"])
        attrition = float(ex.get("attrition", ex.get("dropout", 0.0)))
        suffix = _format_expected_fields(ex)
        
        sigma = _resolve_sigma_from_example(ex)

        if mode == "ME":
            me = float(ex["me"])

            # raw
            n_raw = get_sample_size_ME(
                sigma=sigma,
                alpha=alpha,
                me=me,
                two_sample=two_sample,
                sides=sides,
                attrition=0.0,
            )

            if attrition > 0:
                n_adj = get_sample_size_ME(
                    sigma=sigma,
                    alpha=alpha,
                    me=me,
                    two_sample=two_sample,
                    sides=sides,
                    attrition=attrition,
                )
                if two_sample:
                    print(f"{name}: n_raw_per_group={n_raw}, n_adj_per_group={n_adj}, total_adj={2*n_adj}{suffix}")
                else:
                    print(f"{name}: n_raw={n_raw}, n_adj={n_adj}{suffix}")
            else:
                if two_sample:
                    print(f"{name}: n_per_group={n_raw}, total={2*n_raw}{suffix}")
                else:
                    print(f"{name}: n={n_raw}{suffix}")

        elif mode == "MDE":
            beta = float(ex["beta"])
            mde = _resolve_mde_from_example(ex)
        
            n_raw = get_sample_size_MDE(
                sigma=sigma,
                alpha=alpha,
                beta=beta,
                mde=mde,
                two_sample=two_sample,
                sides=sides,
                attrition=0.0,
            )
        
            if attrition > 0:
                n_adj = get_sample_size_MDE(
                    sigma=sigma,
                    alpha=alpha,
                    beta=beta,
                    mde=mde,
                    two_sample=two_sample,
                    sides=sides,
                    attrition=attrition,
                )
                if two_sample:
                    print(f"{name}: n_raw_per_group={n_raw}, n_adj_per_group={n_adj}, total_adj={2*n_adj}{suffix}")
                else:
                    print(f"{name}: n_raw={n_raw}, n_adj={n_adj}{suffix}")
            else:
                if two_sample:
                    print(f"{name}: n_per_group={n_raw}, total={2*n_raw}{suffix}")
                else:
                    print(f"{name}: n={n_raw}{suffix}")


# Run:
# run_fixed_horizon_examples(fixed_horizon_examples)

run_fixed_horizon_examples(fixed_horizon_examples)

example 1: n=50 | expected_n=50
example 2: n=601 | expected_n=601
example 3: n_raw_per_group=277, n_adj_per_group=308, total_adj=616 | expected_n_adj_per_group=308, expected_n_raw_per_group=277, expected_total_adj=616
example 4: n_raw_per_group=138, n_adj_per_group=163, total_adj=326 | expected_n_adj_per_group=163, expected_n_raw_per_group=138, expected_total_adj=326
example 5: n=78 | expected_n=78
example 6: n_per_group=629, total=1258 | expected_n_per_group=629, expected_total=1258
example 7a: n=50 | expected_n=49
example 7b: n=39 | expected_n=39
example 8: n=19991 | expected_n=19936
example 9: n_per_group=883, total=1766
example 10: n=46 | expected_n=46
example 11: n_per_group=6720, total=13440

# Figure 1
import numpy as np
from scipy.stats import norm

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers.default = "jupyterlab"

X = np.arange(-60, 140.1, 0.1)
mu0, mu1 = 20, 70
sigma = 15 # note this is actually sigma / sqrt(n), we don't care about n here for illustration, so we just use this value.
x_c = 47
Y0 = norm.pdf(X, mu0, sigma)
Y1 = norm.pdf(X, mu1, sigma)
fig = make_subplots(rows=1, cols=1,)
fig.update_layout({"title": f"Figure 1: Sampling distribution of X_bar for some imaginary sample size under the null and alternative hypothesis",
                        # 'yaxis_range': [-0.5, 1.5], 
                        "height": 500}, )
fig.add_scatter(x=X, y=Y0, name="N(mu_0, sigma^2/n)", mode="lines", line={"color": "blue", "width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=Y1, name="N(mu_1, sigma^2/n)", mode="lines", line={"color": "red", "width": 0.75}, row=1, col=1)
fig.add_scatter(x=[x_c,], y=[0.0,], name="C", mode="markers", line={"color": "black", "width": 5}, row=1, col=1)
fig.add_vline(x=x_c, name="C", line_width=1, line_dash="dot", line_color="black", row=1, col=1)
fig.add_vline(x=mu0, name="mu0", line_width=1, line_dash="dot", line_color="blue", row=1, col=1)
fig.add_vline(x=mu1, name="mu1", line_width=1, line_dash="dot", line_color="red", row=1, col=1)
fig.add_annotation(
    x=x_c, y=0, # position
    text="C", # text
    #showarrow=True,
    arrowhead=3, arrowsize=1, arrowwidth=2, arrowcolor="black"
)
fig.add_annotation(
    x=mu0, y=0, # position
    text="mu0", # text
    #showarrow=True,
    arrowhead=3, arrowsize=1, arrowwidth=2, arrowcolor="black"
)
fig.add_annotation(
    x=mu1, y=0, # position
    text="mu1", # text
    #showarrow=True,
    arrowhead=3, arrowsize=1, arrowwidth=2, arrowcolor="black"
)
X_fill0 = np.arange(x_c, 140.1, 0.1)
Y_fill0 = norm.pdf(X_fill0, mu0, sigma)
X_fill1 = np.arange(-60, x_c+0.1, 0.1)
Y_fill1 = norm.pdf(X_fill1, mu1, sigma)
fig.add_trace(go.Scatter(x=X_fill0, y=Y_fill0, fill="tozeroy", name="alpha"))
fig.add_trace(go.Scatter(x=X_fill1, y=Y_fill1, fill="tozeroy", name="beta"))

fig.show()

# Figure 1
X = np.arange(-80, 120.1, 0.1)
d0, d1 = 0, 50
sigma = 15
x_c = 27
Y0 = norm.pdf(X, d0, sigma)
Y1 = norm.pdf(X, d1, sigma)
fig = make_subplots(rows=1, cols=1,)
fig.update_layout({"title": f"Figure 2: Sampling distribution of D_bar for some imaginary sample size under the null and alternative hypothesis",
                        # 'yaxis_range': [-0.5, 1.5], 
                        "height": 500}, )
fig.add_scatter(x=X, y=Y0, name="N(d_0=0, 2*sigma^2/n)", mode="lines", line={"color": "blue", "width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=Y1, name="N(d1=delta, 2*sigma^2/n)", mode="lines", line={"color": "red", "width": 0.75}, row=1, col=1)
fig.add_scatter(x=[x_c,], y=[0.0,], name="C", mode="markers", line={"color": "black", "width": 5}, row=1, col=1)
fig.add_vline(x=x_c, name="C", line_width=1, line_dash="dot", line_color="black", row=1, col=1)
fig.add_vline(x=d0, name="d0", line_width=1, line_dash="dot", line_color="blue", row=1, col=1)
fig.add_vline(x=d1, name="d1", line_width=1, line_dash="dot", line_color="red", row=1, col=1)
fig.add_annotation(
    x=x_c, y=0, # position
    text="C", # text
    #showarrow=True,
    arrowhead=3, arrowsize=1, arrowwidth=2, arrowcolor="black"
)
fig.add_annotation(
    x=d0, y=0, # position
    text="d0=0", # text
    #showarrow=True,
    arrowhead=3, arrowsize=1, arrowwidth=2, arrowcolor="black"
)
fig.add_annotation(
    x=d1, y=0, # position
    text="d1=delta", # text
    #showarrow=True,
    arrowhead=3, arrowsize=1, arrowwidth=2, arrowcolor="black"
)
X_fill0 = np.arange(x_c, 120.1, 0.1)
Y_fill0 = norm.pdf(X_fill0, d0, sigma)
X_fill1 = np.arange(-80, x_c+0.1, 0.1)
Y_fill1 = norm.pdf(X_fill1, d1, sigma)
fig.add_trace(go.Scatter(x=X_fill0, y=Y_fill0, fill="tozeroy", name="alpha"))
fig.add_trace(go.Scatter(x=X_fill1, y=Y_fill1, fill="tozeroy", name="beta"))
fig.show()

class Lot():
  def __init__(self, p, seed=42):
    self.rng = np.random.default_rng(seed)
    self.p = p

  def observe_unit(self):
    return self.rng.binomial(1, self.p)
  
class SPRT():
  def __init__(self, p0, p1, alpha, beta):
    assert p0 < p1, "p0 and p1 cannot be equal (div 0) and p1 must be stricktly bigger than p0"
    assert 0 < p0 < 1 and 0 < p1 < 1, "p0 and p1 cannot be 0 or 1"
    a = np.log(beta / (1 - alpha))
    b = np.log((1 - beta) / alpha)
    c = np.log(p1 / p0)
    d = np.log((1 - p1) / (1 - p0))
    e = np.log((1 - p0) / (1 - p1))
    denom = c - d
    self.bias0 = a / denom
    self.bias1 = b / denom
    self.slope = e / denom
    assert self.slope > 0, "slope cannot be negative"
    assert self.bias0 <= 0, "L0's bias must be negaive"
    assert self.bias1 >= 0, "L1's bias must be positive"
    
  def get_l0(self, m):
    return self.bias0 + self.slope*m

  def get_l1(self, m):
    return self.bias1 + self.slope*m

p0, p1 = 0.4, 0.6
alpha, beta = 0.05, 0.2
sprt = SPRT(p0, p1, alpha, beta)
# consider three different lots
p_lot1, p_lot2, p_lot3 = 0.3, 0.5, 0.7
lot1, lot2, lot3 = Lot(p_lot1), Lot(p_lot2), Lot(p_lot3)
n_observe = 50
L0 = sprt.get_l0(0), sprt.get_l0(n_observe)
L1 = sprt.get_l1(0), sprt.get_l1(n_observe)
sample1 = np.cumsum([lot1.observe_unit() for m in range(n_observe)])
sample2 = np.cumsum([lot2.observe_unit() for m in range(n_observe)])
sample3 = np.cumsum([lot3.observe_unit() for m in range(n_observe)])

X = [m for m in range(1, n_observe+1)]
fig = make_subplots(rows=3, cols=1,)
fig.update_layout({"title": f"Simulated SPRT for hypothesis H0: p = {p0}, H1: p ={p1}, for three different lots with p = {p_lot1}, {p_lot2}, {p_lot3}",
                        # 'yaxis_range': [-0.5, 1.5], 
                        "height": 1500}, )

fig.add_scatter(x=(0, n_observe), y=L0, name="Accept H0", mode="lines", line={"color": "blue", "width": 0.75, "dash": "dot"}, row=1, col=1)
fig.add_scatter(x=(0, n_observe), y=L1, name="Accept H1", mode="lines", line={"color": "red", "width": 0.75, "dash": "dot"}, row=1, col=1)
fig.add_scatter(x=X, y=sample1, name="Sample from Lot1", mode="lines+markers", row=1, col=1)

fig.add_scatter(x=(0, n_observe), y=L0, showlegend = False, mode="lines", line={"color": "blue", "width": 0.75, "dash": "dot"}, row=2, col=1)
fig.add_scatter(x=(0, n_observe), y=L1, showlegend = False, mode="lines", line={"color": "red", "width": 0.75, "dash": "dot"}, row=2, col=1)
fig.add_scatter(x=X, y=sample2, name="Sample from Lot2", mode="lines+markers", row=2, col=1)

fig.add_scatter(x=(0, n_observe), y=L0, showlegend = False, mode="lines", line={"color": "blue", "width": 0.75, "dash": "dot"}, row=3, col=1)
fig.add_scatter(x=(0, n_observe), y=L1, showlegend = False, mode="lines", line={"color": "red", "width": 0.75, "dash": "dot"}, row=3, col=1)
fig.add_scatter(x=X, y=sample3, name="Sample from Lot3", mode="lines+markers", row=3, col=1)

p0, p1 = 0.4, 0.6
alpha, beta = 0.05, 0.2
# Here we assume p0 is the baseline
sigma = np.sqrt(p0 * (1 - p0))
mde = abs(p1 - p0)

n_per_group = get_sample_size_MDE(
    sigma=sigma,
    alpha=alpha,
    beta=beta,
    mde=mde,
    two_sample=True,
    sides=2,
    attrition=0.0
)

print("n_per_group =", n_per_group, "total =", 2 * n_per_group)

n_per_group = 95 total = 190

from numpy.typing import ArrayLike
from scipy.integrate import quad
# from scipy.stats import norm, binom
class GaussianSample():
    def __init__(self, loc, scale, seed=42):
        self.rng = np.random.default_rng(seed)
        self.loc = loc
        self.scale = scale
    
    def draw(self, size=None):
        return self.rng.normal(self.loc, self.scale, size)


class BinomialSample():
    def __init__(self, p, seed=42):
        self.rng = np.random.default_rng(seed)
        self.p = p

    def draw(self, n=1, size=None):
        return self.rng.binomial(n, self.p, size)


class mSPRT():
    """
    mSPRT parent class containing closed form solution
    and helper function for numerical solutions
    """
    def __init__(self, theta0, tau):
        self.theta0 = theta0
        self.tau = tau

    # def get_likelihood(self, f_theta: ArrayLike, f_theta0: ArrayLike):
    #     log_likelihood = np.sum(np.log(f_theta) - np.log(f_theta0))
    #     return np.exp(log_likelihood)
    
    def get_lambda_normal_cf(self, X, Y, sigma):
        """
        Calculate lambda hat from closed form.
        """
        if sigma == 0:
            print("invalid data")
            return None
        n = len(X)
        X, Y = np.array(X), np.array(Y)
        Z_bar = np.mean(Y) - np.mean(X)
        n2 = n**2
        sigma2 = sigma**2
        tau2 = self.tau**2
        a = np.sqrt(2*sigma2 / (2*sigma2 + n*tau2))
        b = tau2*n2*(Z_bar - self.theta0)**2
        c = 4*sigma2*(2*sigma2 + n*tau2)
        return a*np.exp(b/c)
    
    def get_lambda_bernoulli_cf(self, X, Y):
        """
        Calculate lambda hat from closed form.
        """
        n = len(X)
        X, Y = np.array(X), np.array(Y)
        X_bar, Y_bar = np.mean(X), np.mean(Y)
        Z_bar = Y_bar - X_bar
        # v_n can be 0 specially in the beginning where there is not enough data. 
        # The obvious case is where there is a single sample, but it can also happen 
        # when the value of the below is zero
        Vn = X_bar*(1 - X_bar) + Y_bar*(1 - Y_bar)
        if Vn == 0:
            return None
        n2 = n**2
        tau2 = self.tau**2
        a = np.sqrt(Vn / (Vn + n*tau2))
        b = tau2*n2*(Z_bar - self.theta0)**2
        c = 2*Vn*(Vn + n*tau2)
        return a*np.exp(b/c)
    
    def get_likelihood(self, theta: float, Z: ArrayLike, sigma):
        """
        Function of theta for integration
        Z is the array of Gaussian observations
        """
        f_theta = norm.pdf(Z, theta, np.sqrt(2)*sigma)
        f_theta0 = norm.pdf(Z, self.theta0, np.sqrt(2)*sigma)
        pi_theta = norm.pdf(theta, self.theta0, self.tau)
        log_likelihood = np.sum(np.log(f_theta) - np.log(f_theta0))
        return np.exp(log_likelihood)*pi_theta
    
    def get_lambda_normal_nm(self, X, Y, sigma):
        """
        Calculate lambda hat numerically.
        """
        if sigma == 0:
            print("invalid data")
            return None
        X, Y = np.array(X), np.array(Y)
        Z = Y - X
        # We use 6 sigma as the limits of integral because it convers more 
        # than 99.99 % of the area under the normal distribution curve.
        a, b = self.theta0 - 3*self.tau, self.theta0 + 3*self.tau
        y, err = quad(self.get_likelihood, a, b, args=(Z, sigma))
        return y

# compare numeric and closed form solutions for different samples sizes
N = 50
p1, p2, sigma = 0.3, 0.6, 1
theta0 = 0.0 # Diff of means under H0
tau = 2 # variance of mixture
msprt = mSPRT(theta0, tau)
sampler0 = GaussianSample(p1, sigma)
sampler1 = GaussianSample(p2, sigma)
lambda_cf, lambda_nm = [], []
for n in range(1, N+1): # n represents sample size
    X = np.array(sampler0.draw(n))
    Y = np.array(sampler1.draw(n))
    lambda_cf.append(msprt.get_lambda_normal_cf(X, Y, sigma))
    lambda_nm.append(msprt.get_lambda_normal_nm(X, Y, sigma))

# we can see that the closed form and numerical solutions are very close. 
print("RMS", np.mean(np.array(lambda_cf) - np.array(lambda_nm)))

print("------------------------------------")
N = 50
p1, p2, sigma = 100.0, 130.0, 10
theta0 = 0.0 # Diff of means under H0
tau = 2 # variance of mixture
msprt = mSPRT(theta0, tau)
sampler0 = GaussianSample(p1, sigma)
sampler1 = GaussianSample(p2, sigma)
lambda_cf, lambda_nm = [], []
for n in range(1, N+1): # n represents sample size
    X = np.array(sampler0.draw(n))
    Y = np.array(sampler1.draw(n))
    lambda_cf.append(msprt.get_lambda_normal_cf(X, Y, sigma))
    lambda_nm.append(msprt.get_lambda_normal_nm(X, Y, sigma))

# we can see that the closed form and numerical solutions are very largely. 
print("RMS", np.mean(np.array(lambda_cf) - np.array(lambda_nm)))

RMS 3.4458893599653705e-09
------------------------------------
RMS 4.674640557812788e+22

# A/B test
N = 100
p1, p2 = 0.3, 0.6
theta0 = 0.0 # Diff of means under H0
tau = 0.05 # variance of mixture
msprt = mSPRT(theta0, tau)
# do not use the same random seed for the two samplers, otherwise they will be in sync
samplerb0 = BinomialSample(p1, seed=11)
samplerb1 = BinomialSample(p2, seed=37)
lambda_cf1 = []
X, Y = [], []
# We draw one at a time to simulate sequential testing
for i in range(1, N+1):
    X.append(samplerb0.draw(n=1))
    Y.append(samplerb1.draw(n=1))
    lambda_cf1.append(msprt.get_lambda_bernoulli_cf(X, Y,))
#----------------------------------

# Gaussian test
N = 100
mu1, mu2, sigma = 0.3, 0.6, np.sqrt(0.45*(1-0.45)) # for average p (pooled)
theta0 = 0.05 # Diff of means under H0
tau = 0.1 # variance of mixture
msprt = mSPRT(theta0, tau)
# do not use the same random seed for the two samplers, otherwise they will be in sync
samplerg0 = GaussianSample(mu1, sigma, seed=11)
samplerg1 = GaussianSample(mu2, sigma, seed=37)
lambda_cf2, lambda_nm2 = [], []
X, Y = [], []
# We draw one at a time to simulate sequential testing
for i in range(1, N+1):
    X.append(samplerg0.draw())
    Y.append(samplerg1.draw())
    lambda_cf2.append(msprt.get_lambda_normal_cf(X, Y, sigma))
    lambda_nm2.append(msprt.get_lambda_normal_nm(X, Y, sigma))

lambda_cf1 = [x for x in lambda_cf1 if x is not None]
lambda_cf2 = [x for x in lambda_cf2 if x is not None]
lambda_nm2 = [x for x in lambda_nm2 if x is not None]
p_cf1 = [min(1, 1/x) for x in lambda_cf1]
p_cf2 = [min(1, 1/x) for x in lambda_cf2]
p_nm2 = [min(1, 1/x) for x in lambda_nm2]

# RMS of diff between closed form and numerical in each case
print(np.mean(np.array(lambda_cf2) - np.array(lambda_nm2)))
print(np.sqrt(np.mean((np.array(lambda_cf2) - np.array(lambda_nm2))**2)))

0.01769053394626333
0.028664705511410888

print(np.mean(np.array(p_cf2) - np.array(p_nm2)))
print(np.sqrt(np.mean((np.array(p_cf2) - np.array(p_nm2))**2)))

-0.0011365351459868937
0.001591464082717296

from copy import deepcopy
def fmin(a):
    """
    Forward copy the minimum element in a list, e.g.
    [4, 3, 5, 6, 2, 4, 1, 3, 6] --> [4, 3, 3, 3, 2, 2, 1, 1, 1]
    """
    L = deepcopy(a)
    if not L:
        return None
    mn = L[0]
    for i in range(len(L)):
        if L[i] < mn:
            mn = L[i]
        else:
            L[i] = mn
    return L

pv_cf1 = fmin(p_cf1)
pv_cf2 = fmin(p_cf2)
pv_nm2 = fmin(p_nm2)

print(np.mean(np.array(pv_cf2) - np.array(pv_nm2)))
print(np.sqrt(np.mean((np.array(pv_cf2) - np.array(pv_nm2))**2)))

-0.0016721060447630182
0.00212154120582936

X1, X2 = [m for m in range(len(p_cf1))], [m for m in range(len(p_cf2))]
fig = make_subplots(rows=1, cols=1,)
fig.update_layout({"title": f"P Raw truncated at 1 for mSPRT for Gaussian and Bernoulli tests for verification only (these are not p values)",
                        # 'yaxis_range': [-0.5, 1.5], 
                        "height": 500}, )

fig.add_scatter(x=X1, y=p_cf1, name="Bernoulli", mode="lines", line={"color": "blue", "width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X2, y=p_cf2, name="Gaussian", mode="lines", line={"color": "red", "width": 0.75}, row=1, col=1)

X1, X2 = [m for m in range(len(pv_cf1))], [m for m in range(len(pv_cf2))]
fig = make_subplots(rows=1, cols=1,)
fig.update_layout({"title": f"p-values for mSPRT for Gaussian and Bernoulli tests",
                        # 'yaxis_range': [-0.5, 1.5], 
                        "height": 500}, )

fig.add_scatter(x=X1, y=pv_cf1, name="Bernoulli", mode="lines", line={"color": "blue", "width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X2, y=pv_cf2, name="Gaussian", mode="lines", line={"color": "red", "width": 0.75}, row=1, col=1)
fig.add_hline(y=0.05, line_width=0.75, line_dash="dot", line_color="black")
fig.add_annotation(
    x=100.0, y=0.05, # position
    text="alpha=0.05", # text
    #showarrow=True,
    arrowhead=3, arrowsize=1, arrowwidth=2, arrowcolor="black"
)
fig.show()

# TODO is mSPRT equavalent to two sample? From the closed form solution and using 2sigma^2 it looks like it is.

p0 = 0.45
# Here we assume p0 is the baseline
sigma = np.sqrt(p0 * (1 - p0))   # baseline SD, no extra sqrt(2)
mde = 0.3

n_per_group = get_sample_size_MDE(
    sigma=sigma,
    alpha=0.05,
    beta=0.2,
    mde=mde,
    two_sample=True,   # applies sqrt(2) internally for equal-size two-sample
    sides=2,           # set to 1 if you want one-sided
    attrition=0.0
)

print(n_per_group)

44

X1, X2 = [m for m in range(len(pv_cf2))], [m for m in range(len(pv_nm2))]
fig = make_subplots(rows=1, cols=1,)
fig.update_layout({"title": f"comparing p values between closed form and numberic for for Normal case",
                        # 'yaxis_range': [-0.5, 1.5], 
                        "height": 500}, )

fig.add_scatter(x=X1, y=pv_cf2, name="Closed Form", mode="lines", line={"color": "blue", "width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X2, y=pv_nm2, name="Numerical", mode="lines", line={"color": "red", "width": 0.75}, row=1, col=1)

class Arm():
    """
    The simulated reward environment for b Bernoulli arm with an unknown true
    probability p. 
    """
    def __init__(self, p: float, seed: int=42):
        self.rng = np.random.default_rng(seed)
        self.p = p

    def get_reward(self):
        return self.rng.binomial(1, self.p)


class ArmModel():
    """
    Model is Beta distribution 
    for Bernoulli bandit.
    Params: alpha, beta
    """
    def __init__(self, seed: int=42):
        """
        """
        self.rng = np.random.default_rng(seed)
        self.alpha = 1
        self.beta = 1
        self.value = 0.5

    def reset(self):
        self.alpha = 1
        self.beta = 1
        self.value = 0.5

    def posterior_update(self, reward: int):
        self.alpha += reward
        self.beta += 1 - reward

    @property
    def expected_value(self):
         return self.alpha / (self.alpha + self.beta)
    # we don't need the setter method for the expected_value property
     
    def draw(self):
        self.value = self.rng.beta(self.alpha, self.beta)
        return self.value
    

class MAB():
    """
    For the purpose of arm selection, we draw one sample from 
    the posterior instead of drawing G samples and selecting the most probable arm
    based on omega probablities. Hence this MAB forces more exploration. However, we do take 
    into account as an option drawing G samples for calculating omega probabilities for 
    stoppage. 
    """
    def __init__(self, arms: list, models: list, G: int =100, seed: int=42):
        """
        Multi-Arm Bandit class
        Arguments:
            arms (list): List of Bernoulli arms
            models (list): List of models for the arms
            G (int): Number of Monte-Carlo draws
        """
        self.rng = np.random.default_rng(seed)
        self.K = len(arms)
        self.arms = arms
        self.models = models
        self.G = G

    def draw(self):
        return [model.draw() for model in self.models]
    
    def observe(self, selected_arm):
        return self.arms[selected_arm].get_reward()
    
    def sample_omega(self):
        """
        Sample the proability of being optimal for each arm model
        by taking G Monte-Carlo draws and calculating the frequency 
        of being the winner for each arm.
        """
        assert self.G > 1, "Monte-Carlo sample needs G > 1"
        omega = np.array([0]*self.K)
        for g in range(self.G):
            sample = self.draw()
            idx = np.argmax(sample)
            omega[idx] += 1
        omega = omega / self.G
        return omega
    
    def select(self, sample):
        return np.argmax(sample)
 
    def regret_from_expected_value(self, sample):
        expected_values = [model.expected_value for model in self.models]
        selected_arm = self.select(sample)
        best_arm = self.select(expected_values)
        regret = expected_values[best_arm] - expected_values[selected_arm]
        regret_percent = regret / expected_values[best_arm]
        return regret, regret_percent
     
    def regret_from_value(self, sample, omegas=None):
        omegas = omegas or self.sample_omega()
        selected_arm = self.select(sample)
        best_arm = self.select(omegas)
        regret = sample[selected_arm] - sample[best_arm]
        regret_percent = regret / sample[best_arm]
        return regret, regret_percent, omegas
    
    def posterior_update(self, selected_arm, reward):
        self.models[selected_arm].posterior_update(reward)


class ExperimentResult():
    """Placeholder for results
        'norm' prefix stands for normalized regret, i.e., regret percentage.
        'avg' prefix stands for running average of the regret at the time step.
        We use range [1, T] that corresponds to the number of tirals.
        """
    def __init__(self, K: int, T: int, regret_method: str):
        self.K = K
        self.X = [i for i in range(1, T)]
        self.regret_method = regret_method
        self.drawn = [0]*self.K
        self.rewards = []
        self.regret = []
        self.norm_regret = []
        self.avg_regret = []
        self.avg_norm_regret = []
        self.omegas = []

    def format(self):
        self.omegas = np.round(self.omegas, 3)
        self.regret = np.round(self.regret, 3)
        self.avg_regret = np.round(self.avg_regret, 3)
        self.norm_regret = np.round(self.norm_regret, 3)
        self.avg_norm_regret = np.round(self.avg_norm_regret, 3)


class RegretMethod():
    def __init__(self, name):
        self.name = name

    def calc_step_average(self, x_list, x_t):
        """
        Running avergae of sequence of values
        """
        if not x_list: 
            return x_t
        t = 1 + len(x_list)
        x_t_minus_1 = x_list[-1]
        return x_t_minus_1 + (1 / t) * (x_t - x_t_minus_1)
    
    def append_results(
            self, 
            mab: MAB, 
            reward: int, 
            sample: list, 
            selected_arm: int, 
            result: ExperimentResult):
        raise NotImplementedError


class ERegret(RegretMethod):
    """
    Expected regret
    """
    def init__(self, name):
        super(ERegret, self).__init__(name)
    
    def append_results(
            self, 
            mab: MAB, 
            reward: int, 
            sample: list, 
            selected_arm: int, 
            result: ExperimentResult
        ):
        result.rewards.append(self.calc_step_average(result.rewards, reward))
        result.drawn[selected_arm] += 1
        step_omegas = list(np.array(result.drawn) / sum(result.drawn)) 
        a, b = mab.regret_from_expected_value(sample)
        result.regret.append(a)
        result.norm_regret.append(b)
        result.avg_regret.append(self.calc_step_average(result.avg_regret, a))
        result.avg_norm_regret.append(self.calc_step_average(result.avg_norm_regret, b))
        result.omegas.append(step_omegas)


class TRegret(RegretMethod):
    """
    Time step based regret
    """
    def init__(self, name):
        super(TRegret, self).__init__(name)

    def append_results(
            self, 
            mab: MAB, 
            reward: int, 
            sample: list, 
            selected_arm: int, 
            result: ExperimentResult
        ):  
        """ 
        In the first run the selected_arm in step_omegas has the highest omega which is the way it should be. 
        Also, this shows why we need two conditions for optimality convergence: In the begining
        we have an arm with 100% probability of being optimal, but the regret is large. If after
        the regret gets sufficiently small we get a p_optimal > 0.95 for an arm, then we have found
        the optimal arm. 
        """
        result.rewards.append(self.calc_step_average(result.rewards, reward))
        result.drawn[selected_arm] += 1
        step_omegas = list(np.array(result.drawn) / sum(result.drawn)) 
        a, b, _ = mab.regret_from_value(sample, omegas=step_omegas)
        result.regret.append(a)
        result.norm_regret.append(b)
        result.avg_regret.append(self.calc_step_average(result.avg_regret, a))
        result.avg_norm_regret.append(self.calc_step_average(result.avg_norm_regret, b))
        result.omegas.append(step_omegas)

class GRegret(RegretMethod):
    """
    G sample based regret.
    """
    def init__(self, name):
        super(GRegret, self).__init__(name)

    def append_results(
            self, 
            mab: MAB,  
            reward: int, 
            sample: list, 
            selected_arm: int, 
            result: ExperimentResult
        ): 
        result.rewards.append(self.calc_step_average(result.rewards, reward))
        result.drawn[selected_arm] += 1     
        a, b, step_omegas = mab.regret_from_value(sample)
        result.regret.append(a)
        result.norm_regret.append(b)
        result.avg_regret.append(self.calc_step_average(result.avg_regret, a))
        result.avg_norm_regret.append(self.calc_step_average(result.avg_norm_regret, b))
        result.omegas.append(step_omegas)

class Experiment():
    def __init__(
            self, 
            p_list: list, 
            regret_method: RegretMethod, 
            T: int, 
            G:int, 
            seed: int=42
        ):
        arms = [Arm(p, seed=seed) for p in p_list]
        models = [ArmModel(seed=seed+1) for i in range(len(p_list))]
        self.mab = MAB(arms, models, G, seed=seed+2)
        self.regret_method = regret_method
        self.T = T
        self.result = ExperimentResult(self.mab.K, self.T, self.regret_method.name)

    def run(self):
        # time steps
        # We use range [1, T] that corresponds to the number of tirals.
        for t in range(1, self.T + 1):
            sample = self.mab.draw()
            selected_arm = self.mab.select(sample)
            reward = self.mab.observe(selected_arm)
            self.regret_method.append_results(
                self.mab, 
                reward, 
                sample, 
                selected_arm, 
                self.result)
            self.mab.posterior_update(selected_arm, reward)
        
        self.result.format() 
        return self.result

p_list=[0.58, 0.60, 0.62]
T = 7000
G = 100
seed = 42

experiment1 = Experiment(p_list=p_list, regret_method=ERegret("E Regret"), T=T, G=G, seed=seed)
result1 = experiment1.run()

experiment2 = Experiment(p_list=p_list, regret_method=TRegret("T Regret"), T=T, G=G, seed=seed)
result2 = experiment2.run()

experiment3 = Experiment(p_list=p_list, regret_method=GRegret("G Regret"), T=T, G=G, seed=seed)
result3 = experiment3.run()

X = result1.X

fig = make_subplots(rows=1, cols=1,)
fig.update_layout({"title": f"Regret Values for Different Methods", "height": 500, "yaxis_range": [0.0, 0.5],}, )

fig.add_scatter(x=X, y=result1.regret, name=result1.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=result2.regret, name=result2.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=result3.regret, name=result3.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)

fig = make_subplots(rows=1, cols=1,)
fig.update_layout({"title": f"Avg Regret Values for Different Methods", "height": 500, "yaxis_range": [0.0, 0.2],}, )

fig.add_scatter(x=X, y=result1.avg_regret, name=result1.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=result2.avg_regret, name=result2.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=result3.avg_regret, name=result3.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)

fig = make_subplots(rows=1, cols=1,)
fig.update_layout({"title": f"Norm Regret Values for Different Methods", "height": 500, "yaxis_range": [0.0, 0.5],}, )

fig.add_scatter(x=X, y=result1.norm_regret, name=result1.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=result2.norm_regret, name=result2.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=result3.norm_regret, name=result3.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)

fig = make_subplots(rows=1, cols=1,)
fig.update_layout({"title": f"Avg Norm Regret Values for Different Methods", "height": 500, "yaxis_range": [0.0, 0.5],}, )

fig.add_scatter(x=X, y=result1.avg_norm_regret, name=result1.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=result2.avg_norm_regret, name=result2.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=result3.avg_norm_regret, name=result3.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)

result1.norm_regret[-1], result2.norm_regret[-1], result3.norm_regret[-1]

(np.float64(0.0), np.float64(0.0), np.float64(0.0))

result1.drawn, result2.drawn, result3.drawn

([522, 1024, 5454], [522, 1024, 5454], [468, 1012, 5520])

result1.omegas[-1], result2.omegas[-1], result3.omegas[-1]

(array([0.075, 0.146, 0.779]),
 array([0.075, 0.146, 0.779]),
 array([0.  , 0.03, 0.97]))

fig = make_subplots(rows=1, cols=1,)
fig.update_layout({"title": f"Probability of being optimal for the true best arm", "height": 500, "yaxis_range": [0.0, 1.0],}, )

fig.add_scatter(x=X, y=result1.omegas[:, -1], name=result1.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=result2.omegas[:, -1], name=result2.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=result3.omegas[:, -1], name=result3.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)

fig = make_subplots(rows=1, cols=1,)
fig.update_layout({"title": f"Average Rewards", "height": 500, "yaxis_range": [0.0, 1.0],}, )

fig.add_scatter(x=X, y=result1.rewards, name=result1.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=result2.rewards, name=result2.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)
fig.add_scatter(x=X, y=result3.rewards, name=result3.regret_method, mode="lines", line={"width": 0.75,}, row=1, col=1)

# TESTS
rm = RegretMethod("rm")
vals = [1, 2, 3, 4, 5]
avgs = []
for val in vals:
    avgs.append(rm.calc_step_average(avgs, val))
assert avgs == [1, 1.5, 2, 2.5, 3.0]
avgs

[1, 1.5, 2.0, 2.5, 3.0]

draw(g)	$\theta_a \sim Beta(13, 9)$	$\theta_b \sim Beta(21, 11)$	$\theta_c \sim Beta(31, 11)$	Winner
1	0.54	0.73	0.74	c
2	0.55	0.66	0.73	c
3	0.53	0.85	0.80	b
4	0.57	0.50	0.65	c
5	0.52	0.67	0.83	c
...	...	...	...	...
G	0.65	0.84	0.63	b

draw(g)	$\theta_a \sim Beta(13, 9)$	$\theta_b \sim Beta(21, 11)$	$\theta_c \sim Beta(31, 11)$	Regret
1	0.54	0.73	0.74	0.0
2	0.55	0.66	0.73	0.0
3	0.53	0.85	0.80	0.05
4	0.57	0.50	0.65	0.0
5	0.52	0.67	0.83	0.0
...	...	...	...	...
G	0.65	0.84	0.63	0.11

draw(g)	$\theta_a \sim Beta(13, 9)$	$\theta_b \sim Beta(21, 11)$	$\theta_c \sim Beta(31, 11)$	Regret
1	0.54	0.73	0.74	0.0
2	0.55	0.66	0.73	0.0
3	0.53	0.805	0.80	0.005
4	0.57	0.50	0.65	0.0
5	0.52	0.67	0.83	0.0
...	...	...	...	...
G	0.65	0.639	0.63	0.009

draw(g)	$\theta_a \sim Beta(13, 9)$, $E[\theta_a]=0.59$	$\theta_b \sim Beta(21, 11)$, $E[\theta_b]=0.656$	$\theta_c \sim Beta(31, 11)$, $E[\theta_c]=0.738$	Expected Regret
1	0.54	0.73	0.74	0.0
2	0.55	0.66	0.73	0.0
3	0.53	0.805	0.80	0.082
4	0.57	0.50	0.65	0.0
5	0.52	0.67	0.83	0.0
...	...	...	...	...
G	0.65	0.639	0.63	0.082

Randomized Experiments Playbook¶

Introduction¶

Fixed Horizon Testing¶

Test Statistics¶

Classical Fixed-Horizon Tests¶

Estimation of Mean¶

One Sample¶

Two Independent Samples¶

Hypothesis Testing¶

One sample¶

Two Independent Samples¶

Python Funtions for Fixed Horizon Sample Sizes¶

Derivations of Sample Size Equations for Classical Fixed horizon Testing¶

One Sample¶

Two Independent Samples¶

Sequential Testing¶

Methods for Sequential Testing¶

Sequential Probability Ratio Test (SPRT)¶

Neymar Pearson Lemma¶

SPRT Decision Rules¶

SPRT Process¶

One-Sided Test of a Simple Hypothesis - Case of a Binomial Distribution¶

Simulated Example¶

Mixture Probability Ratio Test (mSPRT)¶

Can we solve mSPRT numerically?¶

Multi-Arm Bandit Tests¶

Problem Statement¶

Thomspon Sampling¶

Stopping Criteria for Thompson Sampling¶

Simulated Example¶

Discussion¶

References¶