import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import scipy.stats as st
import pandas as pddf = pd.read_csv("https://raw.githubusercontent.com/roualdes/data/refs/heads/master/donkeys.csv")
x = df["Weight"]
N = np.size(x)def bootstrap(T, data, R = 1_000, conflevel = 0.95, **kwargs):
shp = np.shape(T(data, **kwargs))
rng = np.random.default_rng()
Ts = np.zeros((R,) + shp)
N = np.shape(data)[0]
a = (1 - conflevel) / 2
for r in range(R):
idx = rng.integers(N, size = N)
Ts[r] = T(data[idx], **kwargs)
return np.quantile(Ts, [a, 1 - a], axis = 0) bootstrap(np.mean, x)array([149.94448529, 154.31341912])
bootstrap(np.median, x)array([152., 158.])
bootstrap(np.std, x)array([24.43334109, 28.40400556])
bootstrap(lambda z: np.sqrt(np.var(z)), x, conflevel = 0.98)array([24.09612569, 28.8466865 ])
N = np.shape(df)[0]
data = np.c_[df["Weight"], np.ones(N), df["Height"]]def ll_regression(theta, data):
X = data[:, 1:]
y = data[:, 0]
lm = np.sum(X * theta, axis = 1)
return np.sum( (y - lm) ** 2 )
def ll_grad(theta, data):
X = data[:, 1:]
height = X[:, 1]
y = data[:, 0]
lm = np.sum(X * theta, axis = 1)
db0 = np.sum(y - lm)
db1 = np.sum( height * (y - lm) )
return -2 * np.array([db0, db1])def ll_regression_prediction(data, **kwargs):
rng = np.random.default_rng()
init = rng.normal(size = np.shape(data[:, 1:])[1])
o = minimize(ll_regression, init, jac = ll_grad, args = (data,))
return np.sum(o.x * np.array(kwargs["xnew"]))bootstrap(ll_regression_prediction, data, xnew = [1, 80])array([46.56569883, 62.58062269])