simple_linear_regression

import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
import pandas as pd
from scipy.optimize import minimize

df = pd.read_csv("https://raw.githubusercontent.com/roualdes/data/refs/heads/master/donkeys.csv")

df.head()

	BCS	Age	Sex	Length	Girth	Height	Weight	WeightAlt
0	3.0	<2	stallion	78	90	90	77	NaN
1	2.5	<2	stallion	91	97	94	100	NaN
2	1.5	<2	stallion	74	93	95	74	NaN
3	3.0	<2	female	87	109	96	116	NaN
4	2.5	<2	female	79	98	91	91	NaN

plt.scatter(df["Height"], df["Weight"])
plt.xlabel("Height (cm)")
plt.ylabel("Weight (kg)")

Text(0, 0.5, 'Weight (kg)')

def ll_regression(theta, data):
    x = data["x"]
    y = data["y"]
    b0 = theta[0]
    b1 = theta[1]
    return np.sum( (y - (b0 + b1 * x)) ** 2)

def ll_grad(theta, data):
    x = data["x"]
    y = data["y"]
    b0 = theta[0]
    b1 = theta[1]
    lm = b0 + b1 * x
    db0 = np.sum(y - lm)
    db1 = np.sum( x * (y - lm) )
    return -2 * np.array([db0, db1])

rng = np.random.default_rng()
init = rng.normal(size = 2)
d = {
    "x": df["Height"],
    "y": df["Weight"]
}
o = minimize(ll_regression, init, jac = ll_grad, args = (d,))

np.sum(o.x * np.array([1, 120]))

237.0145972963091

plt.scatter(df["Height"], df["Weight"])
plt.xlabel("Height (cm)")
plt.ylabel("Weight (kg)")
x1 = np.sum(o.x * np.array([1, 120]))
x0 = np.sum(o.x * np.array([1, 80]))
yhat = np.array([x0, x1])
x = np.array([80, 120])
plt.plot(x, yhat, color = "black");

o.x[0]

-309.30049352770055

Other Formats