import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import scipy.stats as st
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/roualdes/data/refs/heads/master/donkeys.csv")
x = df["Weight"]
N = np.size(x)
def bootstrap(T, data, R = 1_000, conflevel = 0.95, **kwargs):
    shp = np.shape(T(data, **kwargs))
    rng = np.random.default_rng()
    Ts = np.zeros((R,) + shp)
    N = np.shape(data)[0]
    a = (1 - conflevel) / 2
    for r in range(R):
        idx = rng.integers(N, size = N)   
        Ts[r] = T(data[idx], **kwargs)
    return np.quantile(Ts, [a, 1 - a], axis = 0)    
bootstrap(np.mean, x)
array([149.94448529, 154.31341912])
bootstrap(np.median, x)
array([152., 158.])
bootstrap(np.std, x)
array([24.43334109, 28.40400556])
bootstrap(lambda z: np.sqrt(np.var(z)), x, conflevel = 0.98)
array([24.09612569, 28.8466865 ])
N = np.shape(df)[0]
data = np.c_[df["Weight"], np.ones(N), df["Height"]]
def ll_regression(theta, data):
    X = data[:, 1:]
    y = data[:, 0]
    lm = np.sum(X * theta, axis = 1)
    return np.sum( (y - lm) ** 2 )
    
def ll_grad(theta, data):
    X = data[:, 1:]
    height = X[:, 1]
    y = data[:, 0]
    lm = np.sum(X * theta, axis = 1)
    db0 = np.sum(y - lm)
    db1 = np.sum( height * (y - lm) )
    return -2 * np.array([db0, db1])
def ll_regression_prediction(data, **kwargs):
    rng = np.random.default_rng()
    init = rng.normal(size = np.shape(data[:, 1:])[1])
    o = minimize(ll_regression, init, jac = ll_grad, args = (data,))
    return np.sum(o.x * np.array(kwargs["xnew"]))
bootstrap(ll_regression_prediction, data, xnew = [1, 80])
array([46.56569883, 62.58062269])