import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
import pandas as pd
from scipy.optimize import minimizedf = pd.read_csv("https://raw.githubusercontent.com/roualdes/data/refs/heads/master/donkeys.csv")df.head()| BCS | Age | Sex | Length | Girth | Height | Weight | WeightAlt | |
|---|---|---|---|---|---|---|---|---|
| 0 | 3.0 | <2 | stallion | 78 | 90 | 90 | 77 | NaN |
| 1 | 2.5 | <2 | stallion | 91 | 97 | 94 | 100 | NaN |
| 2 | 1.5 | <2 | stallion | 74 | 93 | 95 | 74 | NaN |
| 3 | 3.0 | <2 | female | 87 | 109 | 96 | 116 | NaN |
| 4 | 2.5 | <2 | female | 79 | 98 | 91 | 91 | NaN |
plt.scatter(df["Height"], df["Weight"])
plt.xlabel("Height (cm)")
plt.ylabel("Weight (kg)")Text(0, 0.5, 'Weight (kg)')

def ll_regression(theta, data):
x = data["x"]
y = data["y"]
b0 = theta[0]
b1 = theta[1]
return np.sum( (y - (b0 + b1 * x)) ** 2)def ll_grad(theta, data):
x = data["x"]
y = data["y"]
b0 = theta[0]
b1 = theta[1]
lm = b0 + b1 * x
db0 = np.sum(y - lm)
db1 = np.sum( x * (y - lm) )
return -2 * np.array([db0, db1])rng = np.random.default_rng()
init = rng.normal(size = 2)
d = {
"x": df["Height"],
"y": df["Weight"]
}
o = minimize(ll_regression, init, jac = ll_grad, args = (d,))np.sum(o.x * np.array([1, 120]))237.0145972963091
plt.scatter(df["Height"], df["Weight"])
plt.xlabel("Height (cm)")
plt.ylabel("Weight (kg)")
x1 = np.sum(o.x * np.array([1, 120]))
x0 = np.sum(o.x * np.array([1, 80]))
yhat = np.array([x0, x1])
x = np.array([80, 120])
plt.plot(x, yhat, color = "black");
o.x[0]-309.30049352770055