#

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
import pandas as pd

Assuming you have data $X_1, \ldots, X_N \sim D$ from which you want to
calculate a confidence interval of the expectation of $X$,
$\mathbb{E}[X]$ where $X \sim D$, thereâ€™s two options: t-distribution or
bootstrap.

In [3]:
N = 101
rng = np.random.default_rng()
x = rng.binomial(10, 0.5, size = N)

# t-Distribution CI

In [4]:
xbar = np.mean(x)
s = np.std(x)
se = s / np.sqrt(N)
t = st.t(df = N - 1).ppf([0.025, 0.975]) # 95%
xbar + t * se

array([4.91784634, 5.51779723])

# Bootstrap CI

In [6]:
R = 1001
bms = np.zeros(R)
for r in range(R):
    idx = rng.integers(N, size = N)
    bms[r] = np.mean(x[idx])
np.quantile(bms, [0.025, 0.975])

array([4.91089109, 5.5049505 ])

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/roualdes/data/refs/heads/master/penguins.csv")

In [3]:
df.head()

In [12]:
x = df["bill_length_mm"]

In [27]:
xbar = np.mean(x)
N = np.sum(~x.isna()) # np.size(x) # NaNs
se = np.sqrt(np.var(x) / N)
t = st.t(df = N - 1).ppf([0.025, 0.975])
xbar + t * se

array([43.34209692, 44.50176273])

In [30]:
R = 1_000
rng = np.random.default_rng()
cms = np.zeros(R)
for r in range(R):
    idx = rng.integers(N, size = N)
    cms[r] = np.mean(x[idx])
np.quantile(cms, [0.025, 0.975])

array([43.30323207, 44.41865553])

In [29]:
rng = np.random.default_rng()
idx = rng.integers(N, size = N)
x[idx]

15     36.6
69     41.8
165    48.4
240    47.5
193    49.6
       ... 
241    52.1
166    45.8
265    51.5
94     36.2
118    35.7
Name: bill_length_mm, Length: 342, dtype: float64